Skip to content

Instantly share code, notes, and snippets.

@mono0x mono0x/parser.go
Created Jun 21, 2016

Embed
What would you like to do?
package main
import (
"errors"
"io"
"net/url"
"regexp"
"strconv"
"strings"
"time"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/transform"
"golang.org/x/text/width"
"github.com/PuerkitoBio/goquery"
)
const (
IndexPageURL = "http://www.puroland.co.jp/chara_gre/mobile/"
NextDayPageURLBase = "http://www.puroland.co.jp/chara_gre/chara_sentaku_nextday.asp"
)
var (
DateRe = regexp.MustCompile(`(\d+)年(\d+)月(\d+)日(?:\([日月火水木金土]\))?`)
ScheduleItemRe = regexp.MustCompile(`\A\s*(\d+):(\d+)-(\d+):(\d+)\s*(.+)\z`)
)
type IndexPage struct {
Date time.Time
Published bool
Secret bool
MenuPageURL string
NextDayPageURL string
}
type MenuPage struct {
Items []MenuPageItem
}
type MenuPageItem struct {
CharacterName string
CharacterPageURL string
}
type CharacterPage struct {
Date time.Time
CharacterName string
Items []CharacterPageItem
}
type CharacterPageItem struct {
StartAt time.Time
EndAt time.Time
Place string
}
type NextDayPage struct {
Date time.Time
Items []NextDayPageItem
}
type NextDayPageItem struct {
CharacterName string
}
func GetSecretIndexPageURL(date time.Time) string {
return IndexPageURL + "?para=" + date.Format("20060102")
}
func ParseIndexPage(r io.Reader) (*IndexPage, error) {
decodedReader := transform.NewReader(r, japanese.ShiftJIS.NewDecoder())
doc, err := goquery.NewDocumentFromReader(decodedReader)
if err != nil {
return nil, err
}
date, err := extractDate(doc.Find("p[align='center'] font[size='-1']").First().Text())
if err != nil {
return nil, err
}
secret := false
doc.Find("p").EachWithBreak(func(_ int, s *goquery.Selection) bool {
if strings.Contains(s.Text(), "本日のキャラクター情報は公開されておりません。P") {
secret = true
return false
}
return true
})
if secret {
return &IndexPage{
Date: date,
Secret: true,
}, nil
}
forms := doc.Find("form")
if forms.Length() == 0 {
return &IndexPage{
Date: date,
}, nil
}
form := forms.First()
values := url.Values{}
form.First().Find("input").Each(func(_ int, s *goquery.Selection) {
name, exists := s.Attr("name")
if !exists {
return
}
value, exists := s.Attr("value")
if !exists {
return
}
values.Add(name, value)
})
return &IndexPage{
Date: date,
Published: true,
MenuPageURL: IndexPageURL + form.AttrOr("action", "") + "?" + values.Encode(),
NextDayPageURL: NextDayPageURLBase + "?TCHK=" + values.Get("TCHK"),
}, nil
}
func ParseMenuPage(r io.Reader) (*MenuPage, error) {
decodedReader := transform.NewReader(r, japanese.ShiftJIS.NewDecoder())
doc, err := goquery.NewDocumentFromReader(decodedReader)
if err != nil {
return nil, err
}
links := doc.Find("a[href^='chara_sche.asp?']")
items := make([]MenuPageItem, 0, links.Size())
links.Each(func(_ int, s *goquery.Selection) {
items = append(items, MenuPageItem{
CharacterName: s.Text(),
CharacterPageURL: IndexPageURL + s.AttrOr("href", ""),
})
})
return &MenuPage{
Items: items,
}, nil
}
func ParseCharacterPage(r io.Reader) (*CharacterPage, error) {
decodedReader := transform.NewReader(r, japanese.ShiftJIS.NewDecoder())
doc, err := goquery.NewDocumentFromReader(decodedReader)
if err != nil {
return nil, err
}
headers := doc.Find("p[align='center'] font[size='-1']")
date, err := extractDate(headers.First().Text())
if err != nil {
return nil, err
}
name := headers.Eq(1).Text()
sections := doc.Find("p[align='left'] font[size='-1']")
items := make([]CharacterPageItem, 0, sections.Size())
sections.Each(func(i int, s *goquery.Selection) {
submatches := ScheduleItemRe.FindStringSubmatch(width.Fold.String(s.Text()))
if len(submatches) == 0 {
return
}
var startAt, endAt time.Time
var place string
{
h, err := strconv.Atoi(submatches[1])
if err != nil {
return
}
m, err := strconv.Atoi(submatches[2])
if err != nil {
return
}
startAt = date.Add(time.Duration(h)*time.Hour + time.Duration(m)*time.Minute)
}
{
h, err := strconv.Atoi(submatches[3])
if err != nil {
return
}
m, err := strconv.Atoi(submatches[4])
if err != nil {
return
}
endAt = date.Add(time.Duration(h)*time.Hour + time.Duration(m)*time.Minute)
}
place = submatches[5]
items = append(items, CharacterPageItem{
StartAt: startAt,
EndAt: endAt,
Place: place,
})
})
return &CharacterPage{
CharacterName: name,
Date: date,
Items: items,
}, nil
}
func ParseNextDayPage(r io.Reader) (*NextDayPage, error) {
decodedReader := transform.NewReader(r, japanese.ShiftJIS.NewDecoder())
doc, err := goquery.NewDocumentFromReader(decodedReader)
if err != nil {
return nil, err
}
date, err := extractDate(doc.Find(".newsTop3").First().Text())
if err != nil {
return nil, err
}
cells := doc.Find("#newsWrap2 table tr:nth-child(2n) td")
items := make([]NextDayPageItem, 0, cells.Size())
cells.Each(func(_ int, s *goquery.Selection) {
items = append(items, NextDayPageItem{
CharacterName: s.Text(),
})
})
return &NextDayPage{
Date: date,
Items: items,
}, nil
}
func extractDate(s string) (time.Time, error) {
submatches := DateRe.FindStringSubmatch(s)
if len(submatches) == 0 {
return time.Time{}, errors.New("date not found")
}
y, err := strconv.Atoi(submatches[1])
if err != nil {
return time.Time{}, err
}
m, err := strconv.Atoi(submatches[2])
if err != nil {
return time.Time{}, err
}
d, err := strconv.Atoi(submatches[3])
if err != nil {
return time.Time{}, err
}
loc, err := time.LoadLocation("Asia/Tokyo")
if err != nil {
return time.Time{}, err
}
return time.Date(y, time.Month(m), d, 0, 0, 0, 0, loc), nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.