Created
December 25, 2018 06:33
-
-
Save Blufe/ff76dd1c1c01f9e5f0b1bf4665e59626 to your computer and use it in GitHub Desktop.
クリスマスにAdvent Calendarにプレゼントされた記事を調べる
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module github.com/Blufe/scraping_advent_calendar | |
require github.com/PuerkitoBio/goquery v1.5.0 // indirect |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"log" | |
"github.com/PuerkitoBio/goquery" | |
) | |
type AdventCalendar struct { | |
Title string | |
LastDayTitle string | |
LastDayURL string | |
} | |
func main() { | |
var list []AdventCalendar | |
for page := 1; ; page++ { | |
doc, err := goquery.NewDocument(fmt.Sprintf("https://qiita.com/advent-calendar/2018/calendars?page=%d", page)) | |
if err != nil { | |
log.Println("Failed to scrape url.") | |
log.Fatalln(err) | |
} | |
elms := doc.Find("table.adventCalendarList > tbody > tr > td > a") | |
if elms.Length() <= 0 { | |
break | |
} | |
elms.Each(func(_ int, s *goquery.Selection) { | |
adventCalendar := AdventCalendar{ | |
Title: s.Text(), | |
} | |
url, _ := s.Attr("href") | |
doc, err := goquery.NewDocument(fmt.Sprintf("https://qiita.com%s", url)) | |
if err != nil { | |
log.Println("Failed to scrape url.") | |
log.Fatalln(err) | |
} | |
elms := doc.Find("td.adventCalendarCalendar_day") | |
if elms.Length() <= 0 { | |
return | |
} | |
elms.Each(func(_ int, s *goquery.Selection) { | |
if s.Find("p.adventCalendarCalendar_date").Text() != "25" { | |
return | |
} | |
elms := s.Find("div.adventCalendarCalendar_comment > a") | |
if elms.Length() <= 0 { | |
return | |
} | |
adventCalendar.LastDayTitle = elms.Text() | |
adventCalendar.LastDayURL, _ = elms.Attr("href") | |
}) | |
list = append(list, adventCalendar) | |
}) | |
} | |
cnt := 0 | |
for idx, adventCalendar := range list { | |
log.Println(fmt.Sprintf("%04d: '%s' > [%s](%s)", | |
idx+1, | |
adventCalendar.Title, | |
adventCalendar.LastDayTitle, | |
adventCalendar.LastDayURL, | |
)) | |
if adventCalendar.LastDayURL != "" { | |
cnt++ | |
} | |
} | |
log.Println(fmt.Sprintf("%d / %d (%.2f%%)", cnt, len(list), (float64(cnt)/float64(len(list)))*100.0)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment