Skip to content

Instantly share code, notes, and snippets.

@jphager2
Last active September 12, 2016 09:14
Show Gist options
  • Save jphager2/aef5c03fc037440d84137f0d03d08308 to your computer and use it in GitHub Desktop.
Save jphager2/aef5c03fc037440d84137f0d03d08308 to your computer and use it in GitHub Desktop.
Create an Epub of Works from Pottermore
require 'uri'
require 'erb'
require 'nokogiri'
require 'rpub'
require 'open-uri'
module Potter
class Fetcher
SITEMAP_URL = "https://www.pottermore.com/sitemap.xml"
URL_MATCHER = /.*\/writing-by-jk-rowling\/(.+)/
attr_reader :pages
def fetch
@pages = []
doc = Nokogiri::HTML(open(SITEMAP_URL).read)
puts "Fetching writing by JK Rowling"
urls = doc.css('url loc').each do |url|
next unless url.text =~ URL_MATCHER
process(url.text)
end
end
private
def process(url)
print '.'
doc = Nokogiri::HTML(open(url).read)
title = doc.css('h1').text
body = doc.css(".jkr-writing-artefact__text").inner_html
pages << {
url: url, title: title, body: body, doc: doc
}
end
end
class Writer
attr_reader :pages
def initialize(pages)
@pages = pages
.select { |page| page[:body].to_s.length.nonzero? }
.sort_by { |page| page[:title] }
end
def to_epub
page_template = File.read('templates/page.md.erb')
pages.each_with_index do |page, chapter|
b = binding
markdown = ERB.new(page_template).result(b)
filename = "epub/#{chapter.to_s.rjust(5, '0')}.md"
page[:body].gsub!(/></, ">\n\n<")
File.open(filename, 'w') { |f| f.write(markdown) }
end
Dir.chdir('epub') do
`rpub complie`
`rm *.md`
`mv *.epub ..`
end
end
end
end
fetcher = Potter::Fetcher.new
fetcher.fetch
writer = Potter::Writer.new(fetcher.pages)
writer.to_epub
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment