Skip to content

Instantly share code, notes, and snippets.

@cyrusn
Last active July 26, 2016 01:08
Show Gist options
  • Save cyrusn/a0504c017e6a88ecf625a8e8df33a22b to your computer and use it in GitHub Desktop.
Save cyrusn/a0504c017e6a88ecf625a8e8df33a22b to your computer and use it in GitHub Desktop.
search the Interest Code for given job_ID through O'net online website "http://www.onetonline.org/"
package main
import (
"fmt"
"strings"
"sync"
"github.com/PuerkitoBio/goquery"
)
// some ids as examples
var urls = []string{
"11-3061.00",
"11-9013.01",
"11-9013.02",
"17-1012.00",
"17-1022.00",
"17-2051.00",
"17-2071.00",
"19-4099.01",
"29-9012.00",
"37-2011.00",
"41-2011.00",
"45-2092.01",
"45-3011.00",
"47-2031.00",
"47-2111.00",
"47-2152.02",
"47-2221.00",
"47-3014.00",
"49-9095.00",
"51-1011.00",
"51-6031.00",
"51-9071.00",
}
type server struct {
response chan oNetResult
}
type oNetResult struct {
id string
title string
interest string
}
func main() {
// var urls = data.Urls
var s = newServer()
for i := range s.response {
fmt.Println(i)
}
}
func newServer() *server {
s := server{
response: make(chan oNetResult, len(urls)),
}
go s.readURL(urls)
return &s
}
func (s *server) readURL(urls []string) {
defer close(s.response)
var wg sync.WaitGroup
for _, url := range urls {
wg.Add(1)
go func(url string) {
defer wg.Done()
s.response <- parseDoc(url)
}(url)
}
wg.Wait()
}
func parseDoc(id string) oNetResult {
doc, err := goquery.NewDocument("http://www.onetonline.org/link/summary/" + id)
if err != nil {
panic(err)
}
titleQuery := "#allcontent #realcontent #content h2 span.titleb"
interestQuery := "#allcontent #realcontent #content p:contains('Interest code:') b"
titlePrefix := id + " - "
return oNetResult{
id: id,
title: strings.TrimPrefix(getElement(doc, titleQuery), titlePrefix),
interest: getElement(doc, interestQuery),
}
}
func getElement(doc *goquery.Document, queryString string) (result string) {
doc.Find(queryString).Each(
func(i int, s *goquery.Selection) {
// For each item found, get the band and title
result = s.Text()
})
return result
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment