Skip to content

Instantly share code, notes, and snippets.

@akiraak
Last active May 5, 2017 18:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akiraak/cec2e0acc7a127220b602c91515b18e1 to your computer and use it in GitHub Desktop.
Save akiraak/cec2e0acc7a127220b602c91515b18e1 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"io/ioutil"
"strconv"
"strings"
"time"
)
type ImageInfo struct {
url string
width int
height int
}
type BookInfo struct {
asin string
title string
binding string
author string
publisher string
publicationDate string
images map[string]ImageInfo
}
func parseXmls(xmls []string) []BookInfo {
bookInfos := []BookInfo{}
for _, xml := range xmls {
dom, _ := goquery.NewDocumentFromReader(strings.NewReader(xml))
dom.Find("Item").Each(func(_ int, item *goquery.Selection) {
bookInfo := BookInfo{}
bookInfo.asin = item.Find("ASIN").Text()
attributes := item.Find("ItemAttributes").First()
if attributes.Length() > 0 {
bookInfo.title = attributes.Find("Title").Text()
bookInfo.binding = attributes.Find("Binding").Text()
bookInfo.author = attributes.Find("Author").Text()
bookInfo.publisher = attributes.Find("Publisher").Text()
bookInfo.publicationDate = attributes.Find("PublicationDate").Text()
}
imageLabels := []string{
"SmallImage",
"MediumImage",
"LargeImage",
}
images := map[string]ImageInfo{}
for _, imageLabel := range imageLabels {
xml := item.Find(imageLabel).First()
url := xml.Find("URL").Text()
width, _ := strconv.Atoi(xml.Find("Height").Text())
height, _ := strconv.Atoi(xml.Find("Width").Text())
image := ImageInfo{url, width, height}
images[imageLabel] = image
}
bookInfo.images = images
bookInfos = append(bookInfos, bookInfo)
})
}
return bookInfos
}
func getXmls() []string {
xmls := []string{}
for i := 0; i <= 1440; i += 10 {
path := fmt.Sprintf("xmls/%d.xml", i)
xml, _ := ioutil.ReadFile(path)
xmls = append(xmls, string(xml))
}
return xmls
}
func main() {
xmls := getXmls()
start := time.Now()
bookInfos := parseXmls(xmls)
end := time.Now()
fmt.Printf("xml数: %d\n", len(xmls))
fmt.Printf("book数: %d\n", len(bookInfos))
fmt.Printf("parse時間: %f秒\n", (end.Sub(start)).Seconds())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment