Created
October 23, 2011 04:47
-
-
Save tdtds/1306887 to your computer and use it in GitHub Desktop.
無償公開されているFate/Zero1を青空文庫形式に変換する
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# This command was obsolete. see also https://github.com/tdtds/aozoragen | |
# | |
#!/usr/bin/env ruby | |
# -*- coding: utf-8; -*- | |
# | |
# Usage: making PDF for Kindle using Aozira-Kindle Servive. | |
# % ruby fate-zero1_to_aozora > fate1.txt | |
# % curl --data-urlencode text@fate1.txt -d s=m -o fate1.pdf http://a2k.aill.org/download.cgi | |
# | |
class FateZero1 | |
require 'open-uri' | |
require 'nokogiri' | |
def initialize( *htmls ) | |
@htmls = htmls | |
if @htmls.empty? | |
uri = URI( 'http://sai-zen-sen.jp/sa/fate-zero/works/' ) | |
index = Nokogiri( open( uri, 'r', &:read ) ) | |
(index / 'article a').each do |a| | |
u = uri.dup | |
u.path = a.attr( 'href' ) | |
@htmls << u | |
end | |
end | |
end | |
def aozorize | |
yield '[cover]', cover | |
@htmls.each do |html| | |
doc = Nokogiri( open( html, 'r:utf-8', &:read ) ) | |
(doc / 'section.book-page-spread').each do |section| | |
section.children.each do |elem| | |
case elem.name | |
when 'hgroup' | |
yield html, section_title( detag( elem ) ) | |
when 'div' | |
if elem.attr( 'class' ) == 'pgroup' | |
(elem / 'p').each do |prg| | |
yield html, " #{detag( prg )}\n" | |
end | |
yield html, "\n" | |
else | |
$stderr.puts "#{elem.name}.#{elem.attr('class')}" | |
end | |
else | |
$stderr.puts elem.name unless elem.name == 'text' | |
end | |
end | |
yield html, page_break + "\n" | |
end | |
end | |
end | |
def save_cache # for debug: saving html files to local | |
@htmls.each_with_index do |h, i| | |
$stderr.print h.to_s | |
file = '%02d.html' % i | |
if File::exists? file | |
$stderr.puts '...skip' | |
next | |
end | |
open( '%02d.html' % i, 'w' ) do |f| | |
f.write( open( h, 'r', &:read ) ) | |
end | |
$stderr.puts " saved to #{file}." | |
end | |
end | |
private | |
def cover | |
return <<-TEXT | |
Fate/Zero 1 第四次聖杯戦争秘話 | |
虚淵玄 | |
#{page_break} | |
TEXT | |
end | |
def detag( elem ) | |
(elem / 'ruby rp').each do |rp| | |
case rp.text | |
when '(' | |
rp.inner_html = '《' | |
when ')' | |
rp.inner_html = '》' | |
end | |
end | |
elem.to_html.gsub( /<.*?>/, '' ).strip | |
end | |
def section_title( title ) | |
<<-TEXT | |
#{title} | |
TEXT | |
end | |
def page_break | |
"[#改ページ]" | |
end | |
end | |
if __FILE__ == $0 | |
# ::: first, you can make cache files into local | |
# FateZero1::new( *ARGV ).save_cache | |
fprev = '' | |
FateZero1::new( *ARGV ).aozorize do |filename, text| | |
$stderr.puts filename if filename != fprev | |
print text | |
fprev = filename | |
end | |
end | |
# Local Variables: | |
# mode: ruby | |
# indent-tabs-mode: t | |
# tab-width: 3 | |
# ruby-indent-level: 3 | |
# End: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
なぜか「嘘」「呑」「剥」が化けている。Unicode的な何かの罠か。