Skip to content

Instantly share code, notes, and snippets.

@Blufe
Created December 25, 2018 06:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Blufe/ff76dd1c1c01f9e5f0b1bf4665e59626 to your computer and use it in GitHub Desktop.
Save Blufe/ff76dd1c1c01f9e5f0b1bf4665e59626 to your computer and use it in GitHub Desktop.
クリスマスにAdvent Calendarにプレゼントされた記事を調べる
module github.com/Blufe/scraping_advent_calendar
require github.com/PuerkitoBio/goquery v1.5.0 // indirect
package main
import (
"fmt"
"log"
"github.com/PuerkitoBio/goquery"
)
type AdventCalendar struct {
Title string
LastDayTitle string
LastDayURL string
}
func main() {
var list []AdventCalendar
for page := 1; ; page++ {
doc, err := goquery.NewDocument(fmt.Sprintf("https://qiita.com/advent-calendar/2018/calendars?page=%d", page))
if err != nil {
log.Println("Failed to scrape url.")
log.Fatalln(err)
}
elms := doc.Find("table.adventCalendarList > tbody > tr > td > a")
if elms.Length() <= 0 {
break
}
elms.Each(func(_ int, s *goquery.Selection) {
adventCalendar := AdventCalendar{
Title: s.Text(),
}
url, _ := s.Attr("href")
doc, err := goquery.NewDocument(fmt.Sprintf("https://qiita.com%s", url))
if err != nil {
log.Println("Failed to scrape url.")
log.Fatalln(err)
}
elms := doc.Find("td.adventCalendarCalendar_day")
if elms.Length() <= 0 {
return
}
elms.Each(func(_ int, s *goquery.Selection) {
if s.Find("p.adventCalendarCalendar_date").Text() != "25" {
return
}
elms := s.Find("div.adventCalendarCalendar_comment > a")
if elms.Length() <= 0 {
return
}
adventCalendar.LastDayTitle = elms.Text()
adventCalendar.LastDayURL, _ = elms.Attr("href")
})
list = append(list, adventCalendar)
})
}
cnt := 0
for idx, adventCalendar := range list {
log.Println(fmt.Sprintf("%04d: '%s' > [%s](%s)",
idx+1,
adventCalendar.Title,
adventCalendar.LastDayTitle,
adventCalendar.LastDayURL,
))
if adventCalendar.LastDayURL != "" {
cnt++
}
}
log.Println(fmt.Sprintf("%d / %d (%.2f%%)", cnt, len(list), (float64(cnt)/float64(len(list)))*100.0))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment