Skip to content

Instantly share code, notes, and snippets.

@udzura
Created December 21, 2012 06:14
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save udzura/4350990 to your computer and use it in GitHub Desktop.
Save udzura/4350990 to your computer and use it in GitHub Desktop.
Suruga System Utility Script - Project NA00S3
source :rubygems
gem 'nokogiri'
# gem 'pry'
group :build do
gem 'maliq'
gem 'liquid'
end
# -*- coding: utf-8 -*-
# Licensed under MIT
require 'nokogiri'
# require 'pry'
require 'open-uri'
require 'fileutils'
require 'erb'
require 'date'
module SurugaUtils
class NA00S3
include Nokogiri::XML
def initialize(uri="http://nareru-se.dengeki.com/webnovel/?pagenum=1", index=1)
source = open(uri).read
@doc = Nokogiri::HTML(source)
@index = index
end
attr_reader :doc, :index
def to_readable_paragraphs
main_nodes.
chunk{|node| (node.is_a? Element and node.name == 'br') ? nil : true }.
map(&:last).
map{|parts| parts.map{|node| node.text.strip }.join }
end
def generate_build_files(dir='./', file_name="nareru-se-%02d.md")
unless File.directory? dir
FileUtils.mkdir dir
end
file = File.join dir, (file_name % index)
File.open(file, 'w') do |file|
body = to_readable_paragraphs.join("\n\n")
file.write ERB.new(TEMPLATE).result binding
end
STDERR.puts "generated #{file}"
end
def main_nodes
doc.css("#main_frame .novel > p").children
end
def title
doc.css("#main_frame > #left_frame > img").map{|img| img.attributes["alt"].value }.max_by(&:length)
end
end
TEMPLATE = <<-EOT.gsub(/^ {4}/, '')
---
language: 'ja'
unique_identifier:
- 'http://nareru-se.dengeki.com/'
title: 'ドラマCD発売記念特別短編「立華ズ・ブートキャンプ」'
subtitle: '萌えるSE残酷物語'
creator: '夏海公司'
date: '<%= Date.today.to_s %>'
---
# <%= title %> - <%= index %>
## chapter <%= index %>
<%= body %>
EOT
end
# Licensed under MIT
PAGES = 1..7
FILE_NAME = "nareru-se-%02d.md"
MD_FILES = PAGES.map {|index| FILE_NAME % index }
XHTML_FILES = MD_FILES.map {|f| f.sub '.md', '.xhtml' }
file 'nareru.epub' => (XHTML_FILES + ["images/cover.png", "css/style.css"]) do
sh 'maliq_gepub -o nareru.epub'
end
rule '.xhtml' => '.md' do
sh 'maliq *.md'
end
file 'images/cover.png' do
mkdir_p "images"
sh "wget http://nareru-se.dengeki.com/common/img/top/bg_kv_top.png -O images/cover.png"
end
file 'css/style.css' do
mkdir_p "css"
open('css/style.css', 'w') do |f|
f.write <<-EOCSS
h1, h2, h3 {
color: #8B1A1A;
}
ol {
list-style-type: none;
}
EOCSS
end
end
rule '.md' do
require './my-own-purpose-script'
PAGES.each do |index|
uri = "http://nareru-se.dengeki.com/webnovel/?pagenum=#{index}"
SurugaUtils::NA00S3.new(uri, index).generate_build_files("./", FILE_NAME)
end
end
desc "Scrape web contents and generate epub"
task :default => 'nareru.epub'
desc "Cleanup files"
task :clean do
(MD_FILES + XHTML_FILES + %w(nareru.epub images css)).each {|path| rm_rf path }
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment