Last active
August 29, 2015 14:08
-
-
Save FZambia/d225237914bed7d6c6df to your computer and use it in GitHub Desktop.
Alterportal.ru filter by comment count
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"html/template" | |
"io/ioutil" | |
"log" | |
"net/http" | |
"strconv" | |
"strings" | |
"code.google.com/p/go-charset/charset" | |
_ "code.google.com/p/go-charset/data" | |
"github.com/PuerkitoBio/goquery" | |
"github.com/spf13/cobra" | |
"github.com/toqueteos/webbrowser" | |
) | |
const ( | |
URL = "http://alterportal.ru/page/%d/" | |
) | |
type Result struct { | |
Title string | |
Score int | |
Link string | |
} | |
func parseContent(content []byte, collector chan string) { | |
close(collector) | |
} | |
func decodeString(str string) string { | |
r, err := charset.NewReader("windows-1251", strings.NewReader(str)) | |
if err != nil { | |
log.Fatal(err) | |
} | |
str_bytes, err := ioutil.ReadAll(r) | |
if err != nil { | |
log.Fatal(err) | |
} | |
return string(str_bytes) | |
} | |
func processURL(url string, minScore int, collector chan *Result) { | |
doc, err := goquery.NewDocument(url) | |
if err != nil { | |
log.Fatal(err) | |
} | |
doc.Find("#dle-content table[width='530']").Each(func(i int, s *goquery.Selection) { | |
comments_text := s.Find(".slink a").Eq(1).Text() | |
comments_text_decoded := decodeString(comments_text) | |
parts := strings.Split(comments_text_decoded, " ") | |
if len(parts) < 2 { | |
return | |
} | |
comments := parts[1] | |
score, err := strconv.Atoi(comments[1 : len(comments)-1]) | |
if err != nil { | |
log.Fatal(err) | |
} | |
if score < minScore { | |
return | |
} | |
title := decodeString(s.Find(".ntitle").Text()) | |
link, _ := s.Find(".ntitle a").Attr("href") | |
collector <- &Result{ | |
Title: title, | |
Link: link, | |
Score: score, | |
} | |
}) | |
close(collector) | |
} | |
func load(page_from int, page_to int, score int) []*Result { | |
var results []*Result | |
for i := page_from; i <= page_to; i++ { | |
url := fmt.Sprintf(URL, i) | |
collector := make(chan *Result) | |
go processURL(url, score, collector) | |
for result := range collector { | |
results = append(results, result) | |
} | |
} | |
return results | |
} | |
// opens browser with Subreddits representation | |
func browserOutput(results []*Result, port string) { | |
viewHandler := func(w http.ResponseWriter, r *http.Request) { | |
page := `<html> | |
<head> | |
<style type="text/css"> | |
body {margin: 0 auto; max-width: 640px; background: black; color: #CCC; font-family: Courier New, Courier; line-height: 1.2em;} | |
.content {padding: 30px 10px 0 10px;} | |
.entry {margin-bottom: 20px;} | |
.entry-title a:link, .entry-title a:visited {color: #9df; text-decoration: none;} | |
.entry-permalink a:link, .entry-permalink a:visited {color: #CCC; text-decoration: none; font-size: 0.8em;} | |
.entry a:hover {color: #6cf;} | |
</style> | |
</head> | |
<body> | |
<div class="content"> | |
{{ range . }} | |
<div class="entry"> | |
<span class="entry-score">{{ .Score }}<span> | |
<span class="entry-title"><a target="_blank" href="{{ .Link }}">{{ .Title }}</a><span> | |
</div> | |
{{ end }} | |
</div> | |
</body> | |
</html>` | |
t := template.New("browser") | |
t, _ = t.Parse(page) | |
t.Execute(w, results) | |
} | |
wait := make(chan bool) | |
http.HandleFunc("/", viewHandler) | |
log.Println("HTTP server starting, go to http://localhost:" + port) | |
go func() { | |
if err := http.ListenAndServe(":"+port, nil); err != nil { | |
log.Fatal("ListenAndServe: ", err) | |
} | |
wait <- true | |
}() | |
webbrowser.Open("http://localhost:" + port) | |
<-wait | |
} | |
func main() { | |
var page_from int | |
var page_to int | |
var score int | |
var rootCmd = &cobra.Command{ | |
Use: "", | |
Short: "Filter Alterportal.ru by comments count", | |
Long: "Filter Alterportal.ru by comments count", | |
Run: func(cmd *cobra.Command, args []string) { | |
results := load(page_from, page_to, score) | |
browserOutput(results, "17000") | |
}, | |
} | |
rootCmd.Flags().IntVarP(&page_from, "page_from", "f", 1, "page from to parse") | |
rootCmd.Flags().IntVarP(&page_to, "page_to", "t", 2, "page to to parse") | |
rootCmd.Flags().IntVarP(&score, "score", "s", 100, "min score") | |
rootCmd.Execute() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment