Skip to content

Instantly share code, notes, and snippets.

@kanzitelli
Created September 11, 2019 15:25
Show Gist options
  • Save kanzitelli/781149e34c0da9352fdd04e3a5710037 to your computer and use it in GitHub Desktop.
Save kanzitelli/781149e34c0da9352fdd04e3a5710037 to your computer and use it in GitHub Desktop.
Crawler/Crawler.go. #2
package crawler
import (
"time"
)
// NewsCrawler <interface>
// is used to describe news crawler class instance
type NewsCrawler interface {
Run() []models.News
}
// NewsFunc <type>
// is used to simplify news func type signature
type NewsFunc func() []models.News
// Start <function>
// is used to start process of web resources crawling every 3 minutes
func Start() {
go startCrawler()
}
func startCrawler() {
// array of crawlers for different news sources which implement NewsCrawler interface.
crawlers := []NewsCrawler{
SecretMag{},
TheoryAndPractice{},
TheVillage{},
}
// duration of each crawling process
duration := time.Minute * 3
for range time.Tick(duration) {
// all news collected from each crawler
var totalNews []models.News
for _, cr := range crawlers {
totalNews = append(totalNews, cr.Run()...)
}
dbClient := db.GetClient()
dbClient.NewsInsert(totalNews) // here it should show error but it will be ignored by mongo and it will continue to write
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment