Created
March 14, 2018 02:39
-
-
Save frankcash/7bc32891b686cea0a0cf98ff6ae90b33 to your computer and use it in GitHub Desktop.
Structured Colly Example for Reddit.com/r/programming
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"time" | |
"github.com/gocolly/colly" | |
) | |
type item struct { | |
StoryURL string | |
Source string | |
comments string | |
CrawledAt time.Time | |
Comments string | |
Title string | |
} | |
func main() { | |
stories := []item{} | |
// Instantiate default collector | |
c := colly.NewCollector( | |
// Visit only domains: reddit.com | |
colly.AllowedDomains("www.reddit.com"), | |
) | |
// On every a element which has .top-matter attribute call callback | |
// This class is unique to the div that holds all information about a story | |
c.OnHTML(".top-matter", func(e *colly.HTMLElement) { | |
temp := item{} | |
temp.StoryURL = e.ChildAttr("a[data-event-action=title]", "href") | |
temp.Source = "https://www.reddit.com/r/programming/" | |
temp.Title = e.ChildText("a[data-event-action=title]") | |
temp.Comments = e.ChildAttr("a[data-event-action=comments]", "href") | |
temp.CrawledAt = time.Now() | |
stories = append(stories, temp) | |
}) | |
// Before making a request print "Visiting ..." | |
c.OnRequest(func(r *colly.Request) { | |
fmt.Println("Visiting", r.URL.String()) | |
}) | |
c.Visit("https://www.reddit.com/r/programming/") | |
c.Wait() | |
fmt.Println(stories) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment