Created
April 7, 2016 15:13
-
-
Save KeisukeUtsumi/673b677aca57147eaae3a0589359325b to your computer and use it in GitHub Desktop.
最低賃金を取得するだけの雑実装
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"fmt" | |
"io" | |
"net/http" | |
"strconv" | |
"strings" | |
"github.com/PuerkitoBio/goquery" | |
"golang.org/x/text/encoding/japanese" | |
"golang.org/x/text/transform" | |
) | |
// URL 取得するHTMLのあるURL | |
const URL string = "http://www.mhlw.go.jp/stf/seisakunitsuite/bunya/koyou_roudou/roudoukijun/minimumichiran/" | |
func main() { | |
fmt.Println("最低賃金取得するよ") | |
fmt.Println("とりあえずホームページから取得します") | |
//body := getHomePageHTML() | |
//fmt.Println(body) | |
document, err := goquery.NewDocumentFromReader(getHomePageHTML()) | |
if err != nil { | |
panic(fmt.Errorf("なんかエラーでた[%v]", err)) | |
} | |
var areas []*Area | |
// ほしいのは一個目のTBODYって決め打ち | |
document.Find("tbody").First().Find("tr").EachWithBreak(func(_ int, tr *goquery.Selection) bool { | |
// ヘッダ行は3カラムになってるので無視できる | |
if tr.Children().Length() == 4 { | |
a := &Area{} | |
var c int | |
tr.Find("td").EachWithBreak(func(_ int, td *goquery.Selection) bool { | |
c++ | |
switch c { | |
case 1: | |
// 都道府県名 | |
a.Name = f(td.Text()) | |
case 2: | |
s, e := strconv.Atoi(f(td.Text())) | |
if e != nil { | |
panic(fmt.Errorf("パースミスってるぽよ %v", e)) | |
} | |
a.PriceWithTax = s | |
case 3: | |
s, e := strconv.Atoi(f(td.Text())) | |
if e != nil { | |
panic(fmt.Errorf("パースミスってるぽよ %v", e)) | |
} | |
a.Price = s | |
case 4: | |
// 4カラム目は不要 | |
return false | |
} | |
return true | |
}) | |
areas = append(areas, a) | |
} | |
if len(areas) >= 47 { | |
// 47個集まったら終わってよし | |
return false | |
} | |
return true | |
}) | |
for _, area := range areas { | |
fmt.Printf("都道府県:%s\t最低賃金:%d(%d)円\n", area.Name, area.PriceWithTax, area.Price) | |
} | |
} | |
// Area 都道府県毎にデータ | |
type Area struct { | |
Name string // 都道府県名 | |
Price int | |
PriceWithTax int | |
} | |
func getHomePageHTML() io.Reader { | |
c := &http.Client{} | |
request, err := http.NewRequest("GET", URL, nil) | |
if err != nil { | |
panic(fmt.Errorf("リクエスト作れなかったぽよ err[%v]", err)) | |
} | |
response, err := c.Do(request) | |
if err != nil { | |
panic(fmt.Errorf("リクエストのエラーぽよ err[%v]", err)) | |
} | |
defer func() { | |
response.Body.Close() | |
}() | |
if response.StatusCode != http.StatusOK { | |
panic(fmt.Errorf("200じゃないレスポンスぽよ err[%v]", err)) | |
} | |
body, err := sjis2utf8(response.Body) | |
if err != nil { | |
panic(fmt.Errorf("body読み込めなかったぽよ err[%v]", err)) | |
} | |
return body | |
} | |
func sjis2utf8(in io.Reader) (io.Reader, error) { | |
scan := bufio.NewScanner(transform.NewReader(in, japanese.ShiftJIS.NewDecoder())) | |
var response string | |
for scan.Scan() { | |
response = response + scan.Text() + "\n" | |
} | |
if err := scan.Err(); err != nil { | |
return nil, err | |
} | |
return strings.NewReader(response), nil | |
} | |
func f(s string) string { | |
s = strings.Replace(s, " ", "", -1) | |
s = strings.Replace(s, " ", "", -1) | |
s = strings.Replace(s, " ", "", -1) | |
s = strings.Replace(s, "(", "", -1) | |
s = strings.Replace(s, "(", "", -1) | |
s = strings.Replace(s, ")", "", -1) | |
s = strings.Replace(s, ")", "", -1) | |
s = strings.TrimSpace(s) | |
return s | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment