Created
June 21, 2016 14:12
-
-
Save mono0x/ee4fa555954e4fb148a216c1e8369a63 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"errors" | |
"io" | |
"net/url" | |
"regexp" | |
"strconv" | |
"strings" | |
"time" | |
"golang.org/x/text/encoding/japanese" | |
"golang.org/x/text/transform" | |
"golang.org/x/text/width" | |
"github.com/PuerkitoBio/goquery" | |
) | |
const ( | |
IndexPageURL = "http://www.puroland.co.jp/chara_gre/mobile/" | |
NextDayPageURLBase = "http://www.puroland.co.jp/chara_gre/chara_sentaku_nextday.asp" | |
) | |
var ( | |
DateRe = regexp.MustCompile(`(\d+)年(\d+)月(\d+)日(?:\([日月火水木金土]\))?`) | |
ScheduleItemRe = regexp.MustCompile(`\A\s*(\d+):(\d+)-(\d+):(\d+)\s*(.+)\z`) | |
) | |
type IndexPage struct { | |
Date time.Time | |
Published bool | |
Secret bool | |
MenuPageURL string | |
NextDayPageURL string | |
} | |
type MenuPage struct { | |
Items []MenuPageItem | |
} | |
type MenuPageItem struct { | |
CharacterName string | |
CharacterPageURL string | |
} | |
type CharacterPage struct { | |
Date time.Time | |
CharacterName string | |
Items []CharacterPageItem | |
} | |
type CharacterPageItem struct { | |
StartAt time.Time | |
EndAt time.Time | |
Place string | |
} | |
type NextDayPage struct { | |
Date time.Time | |
Items []NextDayPageItem | |
} | |
type NextDayPageItem struct { | |
CharacterName string | |
} | |
func GetSecretIndexPageURL(date time.Time) string { | |
return IndexPageURL + "?para=" + date.Format("20060102") | |
} | |
func ParseIndexPage(r io.Reader) (*IndexPage, error) { | |
decodedReader := transform.NewReader(r, japanese.ShiftJIS.NewDecoder()) | |
doc, err := goquery.NewDocumentFromReader(decodedReader) | |
if err != nil { | |
return nil, err | |
} | |
date, err := extractDate(doc.Find("p[align='center'] font[size='-1']").First().Text()) | |
if err != nil { | |
return nil, err | |
} | |
secret := false | |
doc.Find("p").EachWithBreak(func(_ int, s *goquery.Selection) bool { | |
if strings.Contains(s.Text(), "本日のキャラクター情報は公開されておりません。P") { | |
secret = true | |
return false | |
} | |
return true | |
}) | |
if secret { | |
return &IndexPage{ | |
Date: date, | |
Secret: true, | |
}, nil | |
} | |
forms := doc.Find("form") | |
if forms.Length() == 0 { | |
return &IndexPage{ | |
Date: date, | |
}, nil | |
} | |
form := forms.First() | |
values := url.Values{} | |
form.First().Find("input").Each(func(_ int, s *goquery.Selection) { | |
name, exists := s.Attr("name") | |
if !exists { | |
return | |
} | |
value, exists := s.Attr("value") | |
if !exists { | |
return | |
} | |
values.Add(name, value) | |
}) | |
return &IndexPage{ | |
Date: date, | |
Published: true, | |
MenuPageURL: IndexPageURL + form.AttrOr("action", "") + "?" + values.Encode(), | |
NextDayPageURL: NextDayPageURLBase + "?TCHK=" + values.Get("TCHK"), | |
}, nil | |
} | |
func ParseMenuPage(r io.Reader) (*MenuPage, error) { | |
decodedReader := transform.NewReader(r, japanese.ShiftJIS.NewDecoder()) | |
doc, err := goquery.NewDocumentFromReader(decodedReader) | |
if err != nil { | |
return nil, err | |
} | |
links := doc.Find("a[href^='chara_sche.asp?']") | |
items := make([]MenuPageItem, 0, links.Size()) | |
links.Each(func(_ int, s *goquery.Selection) { | |
items = append(items, MenuPageItem{ | |
CharacterName: s.Text(), | |
CharacterPageURL: IndexPageURL + s.AttrOr("href", ""), | |
}) | |
}) | |
return &MenuPage{ | |
Items: items, | |
}, nil | |
} | |
func ParseCharacterPage(r io.Reader) (*CharacterPage, error) { | |
decodedReader := transform.NewReader(r, japanese.ShiftJIS.NewDecoder()) | |
doc, err := goquery.NewDocumentFromReader(decodedReader) | |
if err != nil { | |
return nil, err | |
} | |
headers := doc.Find("p[align='center'] font[size='-1']") | |
date, err := extractDate(headers.First().Text()) | |
if err != nil { | |
return nil, err | |
} | |
name := headers.Eq(1).Text() | |
sections := doc.Find("p[align='left'] font[size='-1']") | |
items := make([]CharacterPageItem, 0, sections.Size()) | |
sections.Each(func(i int, s *goquery.Selection) { | |
submatches := ScheduleItemRe.FindStringSubmatch(width.Fold.String(s.Text())) | |
if len(submatches) == 0 { | |
return | |
} | |
var startAt, endAt time.Time | |
var place string | |
{ | |
h, err := strconv.Atoi(submatches[1]) | |
if err != nil { | |
return | |
} | |
m, err := strconv.Atoi(submatches[2]) | |
if err != nil { | |
return | |
} | |
startAt = date.Add(time.Duration(h)*time.Hour + time.Duration(m)*time.Minute) | |
} | |
{ | |
h, err := strconv.Atoi(submatches[3]) | |
if err != nil { | |
return | |
} | |
m, err := strconv.Atoi(submatches[4]) | |
if err != nil { | |
return | |
} | |
endAt = date.Add(time.Duration(h)*time.Hour + time.Duration(m)*time.Minute) | |
} | |
place = submatches[5] | |
items = append(items, CharacterPageItem{ | |
StartAt: startAt, | |
EndAt: endAt, | |
Place: place, | |
}) | |
}) | |
return &CharacterPage{ | |
CharacterName: name, | |
Date: date, | |
Items: items, | |
}, nil | |
} | |
func ParseNextDayPage(r io.Reader) (*NextDayPage, error) { | |
decodedReader := transform.NewReader(r, japanese.ShiftJIS.NewDecoder()) | |
doc, err := goquery.NewDocumentFromReader(decodedReader) | |
if err != nil { | |
return nil, err | |
} | |
date, err := extractDate(doc.Find(".newsTop3").First().Text()) | |
if err != nil { | |
return nil, err | |
} | |
cells := doc.Find("#newsWrap2 table tr:nth-child(2n) td") | |
items := make([]NextDayPageItem, 0, cells.Size()) | |
cells.Each(func(_ int, s *goquery.Selection) { | |
items = append(items, NextDayPageItem{ | |
CharacterName: s.Text(), | |
}) | |
}) | |
return &NextDayPage{ | |
Date: date, | |
Items: items, | |
}, nil | |
} | |
func extractDate(s string) (time.Time, error) { | |
submatches := DateRe.FindStringSubmatch(s) | |
if len(submatches) == 0 { | |
return time.Time{}, errors.New("date not found") | |
} | |
y, err := strconv.Atoi(submatches[1]) | |
if err != nil { | |
return time.Time{}, err | |
} | |
m, err := strconv.Atoi(submatches[2]) | |
if err != nil { | |
return time.Time{}, err | |
} | |
d, err := strconv.Atoi(submatches[3]) | |
if err != nil { | |
return time.Time{}, err | |
} | |
loc, err := time.LoadLocation("Asia/Tokyo") | |
if err != nil { | |
return time.Time{}, err | |
} | |
return time.Date(y, time.Month(m), d, 0, 0, 0, 0, loc), nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment