Skip to content

Instantly share code, notes, and snippets.

@kisPocok
Created January 29, 2018 14:03
Show Gist options
  • Save kisPocok/45367b38e5c614b302e57e288093910d to your computer and use it in GitHub Desktop.
Save kisPocok/45367b38e5c614b302e57e288093910d to your computer and use it in GitHub Desktop.
go build . && ./grabber -q "nintendo switch" -top "mario,sonic" -price 60000
package main
import (
"crypto/tls"
"flag"
"fmt"
"io/ioutil"
"log"
"net/http"
"regexp"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/mmcdole/gofeed"
)
const highPriceLimit = 60000
const teszveszfeed = "http://www.teszvesz.hu/listings/index.php?ob=16&obd=2&q=%s&rss=1"
const jofogasfeed = "https://www.jofogas.hu/budapest?q=%s&o=%d" // keresés a leírásban is: `pf=b`
const vaterafeed = "http://www.vatera.hu/rss/?ob=16&obd=2&q=%s"
const hardverapro = "https://hardverapro.hu/aprok/szoftver_jatek/%s/?rss=true"
func main() {
q, price, top := flagship()
fmt.Println("Keresendő kifejezés:", q)
// Crawling
prodList := NewProductList(q, price)
prodList = fetchHardverapro(prodList)
prodList = fetchVatera(prodList)
prodList = fetchTeszvesz(prodList)
prodList = fetchJofogas(prodList)
// High price products first
if price > 0 {
fmt.Println("\nMagasabb aru termekek:")
prodList.filterConsole().show()
}
// Highlights
if top != "" {
prodList.highlights(strings.Split(top, ","))
}
// All of
fmt.Println("\nMinden:")
prodList.show()
}
type item struct {
title string
info string
price int
link string
isExpensive bool
source string
}
type Products struct {
list []item
query string
price int
}
func (p Products) filterConsole() (filteredList Products) {
for _, prod := range p.list {
if prod.isExpensive {
filteredList.list = append(filteredList.list, prod)
}
}
return
}
func (p Products) filter(fn func(p item) bool) (filteredList Products) {
for _, prod := range p.list {
if fn(prod) {
filteredList.list = append(filteredList.list, prod)
}
}
return
}
func (p Products) highlights(highlights []string) {
for _, s := range highlights {
filtered := p.filter(findText(s))
c := len(filtered.list)
if c == 0 {
continue
}
fmt.Printf("\n%s (%d):\n", s, c)
filtered.show()
}
}
func (p Products) show() {
for _, prod := range p.list {
fmt.Printf("- %s, %d ft %s\n", prod.title, prod.price, prod.link)
}
}
func NewProductList(q string, price int) Products {
return Products{
query: q,
price: price,
list: make([]item, 0),
}
}
func flagship() (string, int, string) {
q := flag.String("q", "nintendo switch", "search query")
price := flag.Int("price", highPriceLimit, "set the price of the expensive products")
top := flag.String("top", "zelda,mario,fallout", "highlighted words, separated with comma")
flag.Parse()
return *q, *price, *top
}
func fetchHardverapro(prodList Products) Products {
q := strings.Replace(prodList.query, " ", "_", -1)
url := fmt.Sprintf(hardverapro, q)
tr := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
client := &http.Client{Transport: tr}
request, _ := http.NewRequest("GET", url, nil)
request.Header.Set("Accept", "*/*")
rss, _ := client.Do(request)
body, _ := ioutil.ReadAll(rss.Body)
fp := gofeed.NewParser()
feed, _ := fp.ParseString(string(body))
fmt.Printf("Hardverapro találatok száma: %d\n", len(feed.Items))
for _, v := range feed.Items {
price := parseTeszveszPrice(v.Description)
prodList.list = append(prodList.list, item{
title: parseTeszveszTitle(v.Title),
info: v.Description,
link: v.Link,
price: price,
isExpensive: isExpensive(price),
source: "hardverapro",
})
}
return prodList
}
func fetchJofogas(prodList Products) Products {
var page = 1
for {
q := strings.Replace(prodList.query, " ", "%20", -1)
url := fmt.Sprintf(jofogasfeed, q, page)
// TODO response header formázása szar
doc, err := goquery.NewDocument(url)
if err != nil {
log.Fatal(err)
}
items := doc.Find(".reListElement")
fmt.Printf("Jofogas találatok száma: %d\n", len(items.Nodes))
if len(items.Nodes) < 1 {
break
}
items.Each(func(i int, s *goquery.Selection) {
t := s.Find("a.subject")
link, exists := t.Attr("href")
if !exists {
return
}
title := t.Text()
priceTag := s.Find("div.priceBox").Text()
price := parseJofogasPrice(priceTag)
var prod = item{
title: title,
info: title,
link: link,
price: price,
isExpensive: isExpensive(price),
source: "jofogas",
}
prodList.list = append(prodList.list, prod)
})
page++
if page > 5 {
break
}
}
return prodList
}
func fetchVatera(prodList Products) Products {
q := strings.Replace(prodList.query, " ", "+", -1)
url := fmt.Sprintf(vaterafeed, q)
fp := gofeed.NewParser()
feed, _ := fp.ParseURL(url)
fmt.Printf("Vatera találatok száma: %d\n", len(feed.Items))
for _, v := range feed.Items {
price := parseTeszveszPrice(v.Description)
prodList.list = append(prodList.list, item{
title: parseTeszveszTitle(v.Title),
info: v.Description,
link: v.Link,
price: price,
isExpensive: isExpensive(price),
source: "vatera",
})
}
return prodList
}
func fetchTeszvesz(prodList Products) Products {
q := strings.Replace(prodList.query, " ", "+", -1)
url := fmt.Sprintf(teszveszfeed, q)
fp := gofeed.NewParser()
feed, _ := fp.ParseURL(url)
fmt.Printf("Teszvesz találatok száma: %d\n", len(feed.Items))
/*
// TODO kiszedtem, mert vatera lefedi
for _, v := range feed.Items {
price := parseTeszveszPrice(v.Description)
prodList.list = append(prodList.list, item{
title: parseTeszveszTitle(v.Title),
info: v.Description,
link: v.Link,
price: price,
isExpensive: isExpensive(price),
source: "teszvesz",
})
}
*/
return prodList
}
func findText(s string) func(item) bool {
return func(p item) bool {
return exists(strings.ToLower(s), p.info)
}
}
func exists(needle, s string) bool {
re := regexp.MustCompile(needle)
s = strings.ToLower(s)
found := re.FindAllString(s, 1)
if len(found) == 0 || len(found[0]) < 2 {
return false
}
return true
}
func parseTeszveszTitle(s string) string {
re := regexp.MustCompile("(.+)]]>")
found := re.FindAllStringSubmatch(s, 1)
if len(found) == 0 || len(found[0]) < 2 {
return s
}
return found[0][1]
}
func parseTeszveszPrice(s string) (p int) {
re := regexp.MustCompile("Jelenlegi ára: (.+) Ft<br />")
found := re.FindAllStringSubmatch(s, 1)
if len(found) == 0 || len(found[0]) < 2 {
return 0
}
price := strings.Replace(found[0][1], " ", "", -1)
p, err := strconv.Atoi(price)
if err != nil {
p = 0
}
return
}
func parseJofogasPrice(s string) (p int) {
re := regexp.MustCompile(" (.+) Ft ")
found := re.FindAllStringSubmatch(s, 1)
if len(found) == 0 || len(found[0]) < 2 {
return 0
}
price := strings.Replace(found[0][1], " ", "", -1)
p, err := strconv.Atoi(price)
if err != nil {
p = 0
}
return
}
func isExpensive(price int) bool {
return price > highPriceLimit
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment