Skip to content

Instantly share code, notes, and snippets.

@mojocn
Created March 24, 2021 10:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mojocn/b797fa2c74ad7e70c2fcc7c5172c3e61 to your computer and use it in GitHub Desktop.
Save mojocn/b797fa2c74ad7e70c2fcc7c5172c3e61 to your computer and use it in GitHub Desktop.
fetch hacknews
import (
"crypto/tls"
"errors"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/sirupsen/logrus"
"gorm.io/gorm"
"net/http"
"strings"
"time"
)
type HnCate string
var (
HnCateNews = HnCate("news")
HnCateShow = HnCate("show")
)
type HackNew struct {
gorm.Model
TitleZh string `json:"title_zh" form:"title_zh"`
TitleEn string `json:"title_en" form:"title_en"`
Url string `gorm:"index" json:"url" form:"url"`
Cate string `json:"cate" comment:"news or show" form:"cate"`
}
func downloadHtml(url string) (*goquery.Document, error) {
tr := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
client := &http.Client{Transport: tr, Timeout: time.Second * 60}
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("cookie", "user=neochau&SlKqTK32QSFSiWQu1vGgCr4aqvTx5NxT")
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36")
res, err := client.Do(req)
if err != nil {
return nil, err
}
if res.StatusCode != 200 {
return nil, errors.New("the get request's response code is not 200")
}
defer res.Body.Close()
return goquery.NewDocumentFromReader(res.Body)
}
func SpiderHN(db *gorm.DB, cate HnCate) error {
doc, err := downloadHtml(fmt.Sprintf("https://news.ycombinator.com/%s", cate))
if err != nil {
return err
}
doc.Find("a.storylink").Each(func(i int, s *goquery.Selection) {
url, _ := s.Attr("href")
if strings.HasPrefix(url, "/") {
url = "https://news.ycombinator.com" + url
}
titleEn := s.Text()
titleEn = strings.ReplaceAll(titleEn, "[", "")
titleEn = strings.ReplaceAll(titleEn, "]", "")
row := new(HackNew)
if errors.Is(db.Where("url = ?", url).Take(row).Error, gorm.ErrRecordNotFound) {
row.TitleEn = titleEn
row.Url = url
row.Cate = string(cate)
}
if row.TitleZh == "" {
zh, err := GoogleTranslate(titleEn)
if err != nil {
logrus.WithError(err).Error("google 翻译错误")
}
row.TitleZh = zh
time.Sleep(time.Second * 15)
}
err = db.Save(row).Error
if err != nil {
logrus.WithError(err).Error("upsert")
}
})
return nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment