#!/usr/bin/env ruby
# encoding: utf-8

#
# Web Archiveからスクレイピングして小説家になろうの目次ページをnarou.rbのtoc.yamlと同じ形式で出力するやつ
#

require 'yaml'
require 'nokogiri'
require 'open-uri'


begin
  url = "http://web.archive.org/web/20131127132412/http://ncode.syosetu.com/n7145bl/"
  x = ""
  open(url).each{ |d|
    x += d.clone
  }
  doc = Nokogiri.HTML(x)
rescue
  doc = Nokogiri.HTML("<html><body><p>abort</p></body></html>")
  ans[ans.length] = "Nokogiri aborted"
end

begin
  title = doc.xpath("//div[@class='novel_title']/div/a").inner_text
  author = doc.xpath("//div[@class='novel_writername']/a[@href]").inner_text
  toc_url = "http://ncode.syosetu.com/n7145bl/"
  story = doc.xpath("//div[@class='novel_ex']").inner_text
  ans = []
  idx = 1
  chapter = ''
  subchapter = ''
  change_chapter = false
#  change_subchapter = false
  doc.xpath("//div[@class='novel_sublist']/table/tr").each do |node|
    if node.at(".//td[@class='chapter']") then
      chapter = node.xpath(".//td[@class='chapter']").inner_text
      change_chapter = true
    else
      nsa = node.xpath(".//td[@class='period_subtitle']/a[@href]")[0]
      href = nsa.get_attribute("href").sub(/\/web\/20131127132412\/http:\/\/ncode.syosetu.com(.*$)/, '\1')
      subtitle = nsa.inner_text
      file_subtitle = subtitle
      long_update = node.xpath(".//td[@class='long_update']")[0]
      if long_update.at(".//span") then
        span = long_update.xpath(".//span[@title]")[0]
        subupdate = span.get_attribute("title").sub(/(.*) 改稿/, '\1')
        subdate = long_update.inner_text.sub(/^\r\n(.*)\r\n\r\n(改)\r\n/, '\1')
      else
        subdate = long_update.inner_text.sub(/\r\n(.*)\r\n/, '\1')
        subupdate = subdate
      end
      if change_chapter then
        ans.push({"index"=>idx, "href"=>href, "chapter"=>chapter, "subchapter"=>"", "subtitle"=>subtitle, "file_subtitle"=>file_subtitle, "subdate"=>subdate, "subupdate"=>subupdate, "download_time"=>Time.now})
        change_chapter = false
      else
        ans.push({"index"=>idx, "href"=>href, "chapter"=>"", "subchapter"=>"", "subtitle"=>subtitle, "file_subtitle"=>file_subtitle, "subdate"=>subdate, "subupdate"=>subupdate, "download_time"=>Time.now})
      end
      idx += 1
    end
    File.open("toc.yaml", 'w') { |file|
      YAML.dump({"title"=>title,"author"=>author,"toc_url"=>toc_url,"story"=>story,"subtitles"=>ans},file)
    }
  end
rescue
  p "something wrong"
end