Skip to content

Instantly share code, notes, and snippets.

@dittos
Created February 6, 2013 15:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dittos/4723511 to your computer and use it in GitHub Desktop.
Save dittos/4723511 to your computer and use it in GitHub Desktop.
First take on Go.
package main
import (
"fmt"
"net/http"
"exp/html"
)
type MatchFunc func(*html.Node) bool
type Matcher []MatchFunc
func NewMatcher() *Matcher {
m := new(Matcher)
*m = make(Matcher, 0, 1)
return m
}
func (m *Matcher) Match(n *html.Node) bool {
for _, f := range *m {
if !f(n) {
return false
}
}
return true
}
func (m *Matcher) AddRule(f MatchFunc) {
*m = append(*m, f)
}
func (m *Matcher) Tag(tag string) *Matcher {
m.AddRule(func(n *html.Node) bool {
if n.Type == html.ElementNode && n.Data == tag {
return true
}
return false
})
return m
}
func GetAttr(n *html.Node, key string) string {
for _, attr := range n.Attr {
if attr.Key == key {
return attr.Val
}
}
return ""
}
func (m *Matcher) Class(class string) *Matcher {
m.AddRule(func(n *html.Node) bool {
if GetAttr(n, "class") == class {
return true
}
return false
})
return m
}
func (m *Matcher) FindNode(n *html.Node) (*html.Node) {
if m.Match(n) {
return n
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
res := m.FindNode(c)
if res != nil {
return res
}
}
return nil
}
func (m *Matcher) FindNodes(n *html.Node) (res []*html.Node) {
if m.Match(n) {
res = append(res, n)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
res = append(res, m.FindNodes(c)...)
}
return res
}
func FlattenNode(n *html.Node) string {
if n.Type == html.TextNode {
return n.Data
}
res := ""
for c := n.FirstChild; c != nil; c = c.NextSibling {
res += FlattenNode(c)
}
return res
}
func main() {
resp, err := http.Get("http://www.ilbe.com/ilbe")
if err != nil {
// ...
}
defer resp.Body.Close()
doc, err := html.Parse(resp.Body)
if err != nil {
// ...
}
table := NewMatcher().Tag("table").Class("boardList").FindNode(doc)
if table != nil {
rows := NewMatcher().Tag("tr").FindNodes(table)
titleCellMatcher := NewMatcher().Tag("td").Class("title")
for _, row := range rows {
if GetAttr(row, "class") == "notice" {
continue
}
titleCell := titleCellMatcher.FindNode(row)
if titleCell != nil {
link := NewMatcher().Tag("a").FindNode(titleCell)
title := FlattenNode(link)
url := GetAttr(link, "href")
commentCountNode := NewMatcher().Tag("span").Class("replyAndTrackback").FindNode(titleCell)
var commentCount int
fmt.Sscanf(FlattenNode(commentCountNode), "%d", &commentCount)
fmt.Printf("Title=%s\nURL=%s\nCommentCount=%d\n\n", title, url, commentCount)
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment