Skip to content

Instantly share code, notes, and snippets.

@hjhee
Last active November 26, 2017 14:11
Show Gist options
  • Save hjhee/41d6c5579adeac6aa9bed8e74618f6ed to your computer and use it in GitHub Desktop.
Save hjhee/41d6c5579adeac6aa9bed8e74618f6ed to your computer and use it in GitHub Desktop.
monitor discuz! posts and send notify via pushbullet
package main
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"os"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/op/go-logging"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
)
const filename = "source.txt" // monitors urls from this list
const configFile = "pushbullet.json" // refer to PushSetup
const period = 30 // fetch period
var log = logging.MustGetLogger("dolc")
var logFormat = logging.MustStringFormatter(
`%{color}%{time:15:04:05.000} %{shortfunc} | %{level:.4s} %{id:03x}%{color:reset} %{message}`,
)
// HTMLType tells parser how to parse the HTMLPage
type HTMLType int
const (
// HTMLWebHomepage is the first page of a Tieba post
HTMLWebHomepage HTMLType = iota
// HTMLWebPage is a page of a Tieba post
HTMLWebPage
// HTMLJSON is the Lzl Comment in JSON format
HTMLJSON
// HTMLLocal is a local HTML or JSON file
HTMLLocal
)
// HTMLPage is a job for fetcher and parser
type HTMLPage struct {
// URL of the Page
URL *url.URL
// Content is the HTML code of the Page
Content []byte
// Type indicates different types of Tieba data
Type HTMLType
// Close http.Response when finished parsing
// Response *http.Response
}
// PushSetup reads pushbullet config from configFile
type PushSetup struct {
Token string `json:"token"`
DeviceIden string `json:"device_iden"`
}
var pushSetup PushSetup
// PushPayload creates a push
type PushPayload struct {
DeviceIden string `json:"device_iden"`
PushType string `json:"type"`
Title string `json:"title"`
Body string `json:"body"`
}
var pushReq *http.Request
func fetchHTMLFromURL(page *HTMLPage) error {
resp, err := http.Get(page.URL.String())
if err != nil {
return err
}
bytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
page.Content = bytes
// page.Response = resp
resp.Body.Close()
return nil
}
func gbk2UTF8(s []byte) ([]byte, error) {
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
d, e := ioutil.ReadAll(reader)
if e != nil {
return nil, e
}
return d, nil
}
func htmlParse(page *HTMLPage) (string, string, error) {
content, err := gbk2UTF8(page.Content)
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(content))
if err != nil {
return "", "", fmt.Errorf("Error parsing %s: %v", page.URL, err)
}
posts := doc.Find("a.s.xst")
postsFirst := posts.First()
body, exist := postsFirst.Attr("href")
if !exist {
body = postsFirst.Text()
} else {
body = "https://www.dolc.de/" + body
}
// posts.Each(func(i int, s *goquery.Selection) {
// fmt.Printf("Post%d: %s\n", i, s.Text())
// })
// fmt.Printf("\n")
// page.Response.Body.Close()
return postsFirst.Text(), body, err
}
func monitor(done <-chan struct{}, wg *sync.WaitGroup, page *HTMLPage) {
defer wg.Done()
var title, titleOld, body string
for {
select {
case <-done:
return
case <-time.After(time.Second * period):
titleOld = title
fetchHTMLFromURL(page)
title, body, _ = htmlParse(page)
if title != titleOld && title != "" && titleOld != "" {
// if title != titleOld && title != "" {
log.Noticef("New Posts: %s\n", title)
jVal, _ := json.Marshal(PushPayload{
DeviceIden: pushSetup.DeviceIden,
PushType: "note",
Title: title,
Body: body,
})
pushReq.Body = ioutil.NopCloser(bytes.NewBuffer(jVal))
client := &http.Client{}
resp, err := client.Do(pushReq)
if err != nil {
log.Warningf("POST request failed: %s", err)
}
defer resp.Body.Close()
}
}
}
}
func main() {
logging.SetFormatter(logFormat)
log.Infof("monitor started!")
file, _ := os.Open(configFile)
decoder := json.NewDecoder(file)
err := decoder.Decode(&pushSetup)
if err != nil {
log.Fatal("push sertup failed:", err)
}
pushReq, err = http.NewRequest("POST", "https://api.pushbullet.com/v2/pushes", nil)
pushReq.Header.Set("Access-Token", pushSetup.Token)
pushReq.Header.Set("Content-Type", "application/json")
if err != nil {
log.Fatal("http request build failed:", err)
}
// closing done to force all goroutines to quit
// Go Concurrency Patterns: Pipelines and cancellation
// https://blog.golang.org/pipelines
done := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
in, err := os.OpenFile(filename, os.O_RDONLY, 0644)
if err != nil {
log.Fatalf("Error reading url list: %v", err)
return
}
defer in.Close()
reader := bufio.NewReader(in)
// reading file line by line in go
// https://stackoverflow.com/a/41741702/6091246
// case:
// If you don't mind that the line could be very long (i.e. use a lot of RAM). It keeps the \n at the end of the string returned.
var line string
for isEOF := false; !isEOF; {
line, err = reader.ReadString('\n')
if err != nil {
isEOF = true
}
line = strings.TrimSpace(line)
if line == "" {
continue
}
u, err := url.Parse(strings.TrimSpace(line))
if err != nil {
log.Warningf("[Fetch] Error parsing %s, skipping\n", line)
continue
}
var pageType HTMLType
if u.Scheme == "file" {
pageType = HTMLLocal
} else {
pageType = HTMLWebHomepage
}
wg.Add(1)
go monitor(done, &wg, &HTMLPage{URL: u, Type: pageType})
}
}()
wg.Wait()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment