Skip to content

Instantly share code, notes, and snippets.

@FZambia
Last active August 29, 2015 14:08
Show Gist options
  • Save FZambia/d225237914bed7d6c6df to your computer and use it in GitHub Desktop.
Save FZambia/d225237914bed7d6c6df to your computer and use it in GitHub Desktop.
Alterportal.ru filter by comment count
package main
import (
"fmt"
"html/template"
"io/ioutil"
"log"
"net/http"
"strconv"
"strings"
"code.google.com/p/go-charset/charset"
_ "code.google.com/p/go-charset/data"
"github.com/PuerkitoBio/goquery"
"github.com/spf13/cobra"
"github.com/toqueteos/webbrowser"
)
const (
URL = "http://alterportal.ru/page/%d/"
)
type Result struct {
Title string
Score int
Link string
}
func parseContent(content []byte, collector chan string) {
close(collector)
}
func decodeString(str string) string {
r, err := charset.NewReader("windows-1251", strings.NewReader(str))
if err != nil {
log.Fatal(err)
}
str_bytes, err := ioutil.ReadAll(r)
if err != nil {
log.Fatal(err)
}
return string(str_bytes)
}
func processURL(url string, minScore int, collector chan *Result) {
doc, err := goquery.NewDocument(url)
if err != nil {
log.Fatal(err)
}
doc.Find("#dle-content table[width='530']").Each(func(i int, s *goquery.Selection) {
comments_text := s.Find(".slink a").Eq(1).Text()
comments_text_decoded := decodeString(comments_text)
parts := strings.Split(comments_text_decoded, " ")
if len(parts) < 2 {
return
}
comments := parts[1]
score, err := strconv.Atoi(comments[1 : len(comments)-1])
if err != nil {
log.Fatal(err)
}
if score < minScore {
return
}
title := decodeString(s.Find(".ntitle").Text())
link, _ := s.Find(".ntitle a").Attr("href")
collector <- &Result{
Title: title,
Link: link,
Score: score,
}
})
close(collector)
}
func load(page_from int, page_to int, score int) []*Result {
var results []*Result
for i := page_from; i <= page_to; i++ {
url := fmt.Sprintf(URL, i)
collector := make(chan *Result)
go processURL(url, score, collector)
for result := range collector {
results = append(results, result)
}
}
return results
}
// opens browser with Subreddits representation
func browserOutput(results []*Result, port string) {
viewHandler := func(w http.ResponseWriter, r *http.Request) {
page := `<html>
<head>
<style type="text/css">
body {margin: 0 auto; max-width: 640px; background: black; color: #CCC; font-family: Courier New, Courier; line-height: 1.2em;}
.content {padding: 30px 10px 0 10px;}
.entry {margin-bottom: 20px;}
.entry-title a:link, .entry-title a:visited {color: #9df; text-decoration: none;}
.entry-permalink a:link, .entry-permalink a:visited {color: #CCC; text-decoration: none; font-size: 0.8em;}
.entry a:hover {color: #6cf;}
</style>
</head>
<body>
<div class="content">
{{ range . }}
<div class="entry">
<span class="entry-score">{{ .Score }}<span>
<span class="entry-title"><a target="_blank" href="{{ .Link }}">{{ .Title }}</a><span>
</div>
{{ end }}
</div>
</body>
</html>`
t := template.New("browser")
t, _ = t.Parse(page)
t.Execute(w, results)
}
wait := make(chan bool)
http.HandleFunc("/", viewHandler)
log.Println("HTTP server starting, go to http://localhost:" + port)
go func() {
if err := http.ListenAndServe(":"+port, nil); err != nil {
log.Fatal("ListenAndServe: ", err)
}
wait <- true
}()
webbrowser.Open("http://localhost:" + port)
<-wait
}
func main() {
var page_from int
var page_to int
var score int
var rootCmd = &cobra.Command{
Use: "",
Short: "Filter Alterportal.ru by comments count",
Long: "Filter Alterportal.ru by comments count",
Run: func(cmd *cobra.Command, args []string) {
results := load(page_from, page_to, score)
browserOutput(results, "17000")
},
}
rootCmd.Flags().IntVarP(&page_from, "page_from", "f", 1, "page from to parse")
rootCmd.Flags().IntVarP(&page_to, "page_to", "t", 2, "page to to parse")
rootCmd.Flags().IntVarP(&score, "score", "s", 100, "min score")
rootCmd.Execute()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment