Skip to content

Instantly share code, notes, and snippets.

@akiraak
Created May 5, 2017 18:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akiraak/c3b40726779921ece2589bc6b21b648d to your computer and use it in GitHub Desktop.
Save akiraak/c3b40726779921ece2589bc6b21b648d to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"io/ioutil"
"strconv"
"strings"
"time"
)
type ImageInfo struct {
url string
width int
height int
}
type BookInfo struct {
asin string
title string
binding string
author string
publisher string
publicationDate string
images map[string]ImageInfo
}
func parseXmls(result chan []BookInfo, xmls []string) {
bookInfos := []BookInfo{}
for _, xml := range xmls {
dom, _ := goquery.NewDocumentFromReader(strings.NewReader(xml))
dom.Find("Item").Each(func(_ int, item *goquery.Selection) {
bookInfo := BookInfo{}
bookInfo.asin = item.Find("ASIN").Text()
attributes := item.Find("ItemAttributes").First()
if attributes.Length() > 0 {
bookInfo.title = attributes.Find("Title").Text()
bookInfo.binding = attributes.Find("Binding").Text()
bookInfo.author = attributes.Find("Author").Text()
bookInfo.publisher = attributes.Find("Publisher").Text()
bookInfo.publicationDate = attributes.Find("PublicationDate").Text()
}
imageLabels := []string{
"SmallImage",
"MediumImage",
"LargeImage",
}
images := map[string]ImageInfo{}
for _, imageLabel := range imageLabels {
xml := item.Find(imageLabel).First()
url := xml.Find("URL").Text()
width, _ := strconv.Atoi(xml.Find("Height").Text())
height, _ := strconv.Atoi(xml.Find("Width").Text())
image := ImageInfo{url, width, height}
images[imageLabel] = image
}
bookInfo.images = images
bookInfos = append(bookInfos, bookInfo)
})
}
result <- bookInfos
}
func getXmls() []string {
xmls := []string{}
for i := 0; i <= 1440; i += 10 {
path := fmt.Sprintf("xmls/%d.xml", i)
xml, _ := ioutil.ReadFile(path)
xmls = append(xmls, string(xml))
}
return xmls
}
func divideXmls(xmls []string, num int) [][]string {
xmlsNum := len(xmls)
size := xmlsNum / num
result := [][]string{}
for i := 0; i < num; i++ {
start := size * i
end := size * (i + 1)
if i == (num - 1) {
end = xmlsNum
}
result = append(result, xmls[start:end])
}
return result
}
func main() {
allXmls := getXmls()
divXmls := divideXmls(allXmls, 4)
start := time.Now()
result := make(chan []BookInfo)
for _, xmls := range divXmls {
go parseXmls(result, xmls)
}
bookInfos := []BookInfo{}
for _, _ = range divXmls {
bookInfos = append(bookInfos, <-result...)
}
end := time.Now()
fmt.Printf("xml数: %d\n", len(allXmls))
fmt.Printf("book数: %d\n", len(bookInfos))
fmt.Printf("parse時間: %f秒\n", (end.Sub(start)).Seconds())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment