Created
February 6, 2013 15:58
-
-
Save dittos/4723511 to your computer and use it in GitHub Desktop.
First take on Go.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"net/http" | |
"exp/html" | |
) | |
type MatchFunc func(*html.Node) bool | |
type Matcher []MatchFunc | |
func NewMatcher() *Matcher { | |
m := new(Matcher) | |
*m = make(Matcher, 0, 1) | |
return m | |
} | |
func (m *Matcher) Match(n *html.Node) bool { | |
for _, f := range *m { | |
if !f(n) { | |
return false | |
} | |
} | |
return true | |
} | |
func (m *Matcher) AddRule(f MatchFunc) { | |
*m = append(*m, f) | |
} | |
func (m *Matcher) Tag(tag string) *Matcher { | |
m.AddRule(func(n *html.Node) bool { | |
if n.Type == html.ElementNode && n.Data == tag { | |
return true | |
} | |
return false | |
}) | |
return m | |
} | |
func GetAttr(n *html.Node, key string) string { | |
for _, attr := range n.Attr { | |
if attr.Key == key { | |
return attr.Val | |
} | |
} | |
return "" | |
} | |
func (m *Matcher) Class(class string) *Matcher { | |
m.AddRule(func(n *html.Node) bool { | |
if GetAttr(n, "class") == class { | |
return true | |
} | |
return false | |
}) | |
return m | |
} | |
func (m *Matcher) FindNode(n *html.Node) (*html.Node) { | |
if m.Match(n) { | |
return n | |
} | |
for c := n.FirstChild; c != nil; c = c.NextSibling { | |
res := m.FindNode(c) | |
if res != nil { | |
return res | |
} | |
} | |
return nil | |
} | |
func (m *Matcher) FindNodes(n *html.Node) (res []*html.Node) { | |
if m.Match(n) { | |
res = append(res, n) | |
} | |
for c := n.FirstChild; c != nil; c = c.NextSibling { | |
res = append(res, m.FindNodes(c)...) | |
} | |
return res | |
} | |
func FlattenNode(n *html.Node) string { | |
if n.Type == html.TextNode { | |
return n.Data | |
} | |
res := "" | |
for c := n.FirstChild; c != nil; c = c.NextSibling { | |
res += FlattenNode(c) | |
} | |
return res | |
} | |
func main() { | |
resp, err := http.Get("http://www.ilbe.com/ilbe") | |
if err != nil { | |
// ... | |
} | |
defer resp.Body.Close() | |
doc, err := html.Parse(resp.Body) | |
if err != nil { | |
// ... | |
} | |
table := NewMatcher().Tag("table").Class("boardList").FindNode(doc) | |
if table != nil { | |
rows := NewMatcher().Tag("tr").FindNodes(table) | |
titleCellMatcher := NewMatcher().Tag("td").Class("title") | |
for _, row := range rows { | |
if GetAttr(row, "class") == "notice" { | |
continue | |
} | |
titleCell := titleCellMatcher.FindNode(row) | |
if titleCell != nil { | |
link := NewMatcher().Tag("a").FindNode(titleCell) | |
title := FlattenNode(link) | |
url := GetAttr(link, "href") | |
commentCountNode := NewMatcher().Tag("span").Class("replyAndTrackback").FindNode(titleCell) | |
var commentCount int | |
fmt.Sscanf(FlattenNode(commentCountNode), "%d", &commentCount) | |
fmt.Printf("Title=%s\nURL=%s\nCommentCount=%d\n\n", title, url, commentCount) | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment