Last active
July 26, 2016 01:08
-
-
Save cyrusn/a0504c017e6a88ecf625a8e8df33a22b to your computer and use it in GitHub Desktop.
search the Interest Code for given job_ID through O'net online website "http://www.onetonline.org/"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"strings" | |
"sync" | |
"github.com/PuerkitoBio/goquery" | |
) | |
// some ids as examples | |
var urls = []string{ | |
"11-3061.00", | |
"11-9013.01", | |
"11-9013.02", | |
"17-1012.00", | |
"17-1022.00", | |
"17-2051.00", | |
"17-2071.00", | |
"19-4099.01", | |
"29-9012.00", | |
"37-2011.00", | |
"41-2011.00", | |
"45-2092.01", | |
"45-3011.00", | |
"47-2031.00", | |
"47-2111.00", | |
"47-2152.02", | |
"47-2221.00", | |
"47-3014.00", | |
"49-9095.00", | |
"51-1011.00", | |
"51-6031.00", | |
"51-9071.00", | |
} | |
type server struct { | |
response chan oNetResult | |
} | |
type oNetResult struct { | |
id string | |
title string | |
interest string | |
} | |
func main() { | |
// var urls = data.Urls | |
var s = newServer() | |
for i := range s.response { | |
fmt.Println(i) | |
} | |
} | |
func newServer() *server { | |
s := server{ | |
response: make(chan oNetResult, len(urls)), | |
} | |
go s.readURL(urls) | |
return &s | |
} | |
func (s *server) readURL(urls []string) { | |
defer close(s.response) | |
var wg sync.WaitGroup | |
for _, url := range urls { | |
wg.Add(1) | |
go func(url string) { | |
defer wg.Done() | |
s.response <- parseDoc(url) | |
}(url) | |
} | |
wg.Wait() | |
} | |
func parseDoc(id string) oNetResult { | |
doc, err := goquery.NewDocument("http://www.onetonline.org/link/summary/" + id) | |
if err != nil { | |
panic(err) | |
} | |
titleQuery := "#allcontent #realcontent #content h2 span.titleb" | |
interestQuery := "#allcontent #realcontent #content p:contains('Interest code:') b" | |
titlePrefix := id + " - " | |
return oNetResult{ | |
id: id, | |
title: strings.TrimPrefix(getElement(doc, titleQuery), titlePrefix), | |
interest: getElement(doc, interestQuery), | |
} | |
} | |
func getElement(doc *goquery.Document, queryString string) (result string) { | |
doc.Find(queryString).Each( | |
func(i int, s *goquery.Selection) { | |
// For each item found, get the band and title | |
result = s.Text() | |
}) | |
return result | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment