Created
July 29, 2010 16:09
-
-
Save rummelonp/498531 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$KCODE = 'u' | |
require 'rubygems' | |
require 'open-uri' | |
require 'xmlsimple' | |
require 'eeepub' | |
class String | |
def strip_tags | |
return self.gsub(/<[^>]+?>/i, '') | |
end | |
def each_char | |
return self.scan(/./) do |c| | |
yield(c) | |
end | |
end | |
def char_count | |
n = 0 | |
self.each_char do | |
n += 1 | |
end | |
return n | |
end | |
def substr(from, to) | |
i = 0 | |
s = '' | |
self.each_char do |c| | |
s += c if i >= from && to && i < to | |
i += 1 | |
end | |
return s | |
end | |
end | |
class Numeric | |
RomanTable = { | |
1 => 'I', | |
4 => 'IV', | |
5 => 'V', | |
9 => 'IX', | |
10 => 'X', | |
40 => 'XL', | |
50 => 'L', | |
90 => 'XC', | |
100 => 'C', | |
400 => 'CD', | |
500 => 'D', | |
900 => 'CM', | |
1000 => 'M' | |
} | |
def roman(s = self) | |
return '-' if 3999 < s || 0 >= s | |
k = RomanTable.keys.delete_if{|x| x > s}.sort[-1] | |
return RomanTable[k] + roman(s - k).gsub('-', '') | |
end | |
end | |
class Tumblr2epub | |
URL = 'http://:username.tumblr.com/api/read?num=50&start=:count' | |
USER_DIR = File.expand_path(ENV['HOME']) | |
TEMP_DIR = File.expand_path(ENV['TMPDIR']) | |
def self.read(username, count, type = nil, log = false) | |
i = 0 | |
tumblr = {} | |
posts = [] | |
loop do | |
puts "Reading #{i + 1} page" if log | |
url = URL.gsub(/:username/, username).gsub(/:count/, (i * 50).to_s) | |
url += '&type=' + type if ['quote', 'photo', 'regular'].include?(type) | |
xml_read = lambda do |error_count| | |
begin | |
return xml = XmlSimple.xml_in(open(url).read) | |
rescue => error | |
if error_count < 10 | |
puts error | |
puts "Read failure, Retry" if log | |
sleep 1 | |
xml_read.call(error_count + 1) | |
else | |
raise error | |
end | |
end | |
end | |
xml = xml_read.call(0) | |
tumblr[:tumblelog] = xml['tumblelog'][0] if i == 0 | |
posts.concat(xml['posts'][0]['post']) | |
puts "Readed #{i + 1} page, load #{posts.length} posts" if log | |
if posts.length >= count || posts.length % 50 != 0 | |
break | |
else | |
i += 1 | |
end | |
end | |
tumblr[:posts] = posts | |
return tumblr | |
end | |
def self.load(path, log = false) | |
path = File.expand_path(path) | |
puts "Loading yaml from \"#{path}\"" if log | |
tumblr = YAML.load_file(path) | |
puts "Loaded yaml" if log | |
return tumblr | |
end | |
def self.dump(tumblr, dir = USER_DIR, log = false) | |
puts "Dumping yaml" if log | |
filename = "#{tumblr[:tumblelog]['name'].gsub(/-/, '_')}_#{Time.now.strftime('%Y%M%d%H%M%S')}.yaml" | |
path = "#{File.expand_path(dir)}/#{filename}" | |
YAML.dump(tumblr, File.open(path, 'wb')) | |
puts "Dumped yaml to \"#{path}\"" if log | |
return path | |
end | |
def self.make(tumblr, dir = USER_DIR, log = false) | |
tumblelog = tumblr[:tumblelog] | |
posts = tumblr[:posts] | |
epub = EeePub::Easy.new do | |
title tumblelog['title'] | |
creator tumblelog['name'] | |
date Time.now.strftime('%Y-%M-%d') | |
identifier "http://#{tumblelog['name']}.tumblr.com", :scheme => 'URL' | |
uid "http://#{tumblelog['name']}.tumblr.com" | |
end | |
css_path = make_css | |
epub.assets << css_path | |
css_name = File.basename(css_path) | |
posts.each do |post| | |
if post['type'] == 'photo' | |
photo_url = post['photo-url'].find {|photo_url| photo_url['max-width'] == '500'} | |
epub.assets << Photo.download(photo_url['content'], log) | |
end | |
html = to_html(post) | |
if html | |
epub.sections << | |
[ | |
to_title(post), | |
<<-HTML | |
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> | |
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="ja"> | |
<head> | |
<title>Tumblr</title> | |
</head> | |
<link rel="stylesheet" href="#{css_name}" type="text/css" /> | |
<body> | |
#{html} | |
</body> | |
</html> | |
HTML | |
] | |
end | |
end | |
filename = "#{tumblelog['name'].gsub(/-/, '_')}_#{Time.now.strftime('%Y%M%d%H%M%S')}.epub" | |
path = "#{File.expand_path(dir)}/#{filename}" | |
puts "Making epub" | |
epub.save(path) | |
Photo.clean(log) | |
puts "Make epub to \"#{path}\"" if log | |
return filename | |
end | |
def self.make_css | |
css = <<-CSS | |
body { | |
font: 14px/1.4 Arial, Helvetica, sans-serif; | |
margin: 0; | |
padding: 0; | |
} | |
p, ul, ol, blockquote { | |
margin: 3px 0 1px; | |
} | |
body :first-child, | |
body :last-child, | |
p:last-child, | |
ul:last-child, | |
ol:last-child { | |
margin-bottom: 0 !important; | |
} | |
a img { | |
border-width: 0; | |
} | |
blockquote { | |
border-left: 4px solid #dcdcdc; | |
margin-left: 0 !important; | |
margin-right: 0 !important; | |
padding-left: 10px !important; | |
border-width: 4px !important; | |
border-color: #a8bccf !important; | |
} | |
blockquote blockquote { | |
border-color: #839aaf !important; | |
} | |
blockquote blockquote blockquote { | |
border-color: #6b7d8f !important; | |
} | |
blockquote blockquote blockquote blockquote { | |
border-color: #4c5e6f !important; | |
} | |
blockquote blockquote blockquote blockquote blockquote { | |
border-color: #36434f !important; | |
} | |
CSS | |
path = "#{TEMP_DIR}/style.css" | |
file = File.open(path, 'wb') | |
file.write(css) | |
file.close | |
return path | |
end | |
def self.to_html(post) | |
if post['type'] == 'quote' | |
return "#{post['quote-text'].to_s}\n#{post['quote-source'].to_s}\n" | |
elsif post['type'] == 'photo' | |
photo_url = post['photo-url'].find {|photo_url| photo_url['max-width'] == '500'} | |
image_path = Photo.get(photo_url['content']) | |
if image_path | |
image_path = "#{File.basename(image_path)}" | |
else | |
image_path = photo_url['content'] | |
end | |
return "<a href=\"#{image_path}\"><img src=\"#{image_path}\" /></a>#{post['photo-caption'].to_s}" | |
elsif post['type'] == 'regular' | |
if post['regular-title'] | |
return "<h2>#{post['regular-title'].to_s}</h2>\n#{post['regular-body'].to_s}\n" | |
else | |
return "#{post['regular-body'].to_s}\n" | |
end | |
else | |
return nil | |
end | |
end | |
def self.to_title(post) | |
type = post['type'].capitalize | |
title = nil | |
if post['type'] == 'quote' | |
title = post['quote-text'] | |
elsif post['type'] == 'photo' | |
if post['photo-caption'] | |
title = post['photo-caption'] | |
end | |
elsif post['type'] == 'regular' | |
if post['regular-title'] | |
title = post['regular-title'] | |
else | |
title = post['regular-body'] | |
end | |
end | |
if title | |
title = title.to_s.strip_tags.gsub(/\s{2,}/, ' ').strip | |
if title.char_count > 20 | |
return "#{type} - #{title.substr(0, 20)}..." | |
else | |
return "#{type} - #{title}" | |
end | |
else | |
return type | |
end | |
end | |
class Photo | |
@@images = {} | |
def self.download(url, log = false) | |
puts "Downloading \"#{url}\"" if log | |
ext = File.extname(url) | |
ext = '.png' unless ext | |
photo_download = lambda do |error_count| | |
begin | |
image = open(url) | |
path = "#{TEMP_DIR}/#{rand(256**16).to_s(16)}#{ext}" | |
file = File.open(path, 'wb') | |
file.write(image.read) | |
file.close | |
image.close | |
return path | |
rescue => error | |
if error_count < 10 | |
puts error | |
puts "Download failure, Retry" if log | |
sleep 1 | |
photo_download.call(error_count + 1) | |
else | |
raise error | |
end | |
end | |
end | |
path = photo_download.call(0) | |
puts "Downloaded to \"#{path}\"" if log | |
@@images[url] = path | |
return path | |
end | |
def self.get(url) | |
@@images[url] | |
end | |
def self.clean(log = false) | |
puts "Delete cache files" if log && @@images.length > 0 | |
@@images.each do |url, file| | |
puts "Deleting \"#{file}\"" | |
File.delete(file) | |
end | |
@@images = {} | |
puts "Deleted all cache files" if log | |
return true | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment