Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@rummelonp
Created July 29, 2010 16:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rummelonp/498531 to your computer and use it in GitHub Desktop.
Save rummelonp/498531 to your computer and use it in GitHub Desktop.
$KCODE = 'u'
require 'rubygems'
require 'open-uri'
require 'xmlsimple'
require 'eeepub'
class String
def strip_tags
return self.gsub(/<[^>]+?>/i, '')
end
def each_char
return self.scan(/./) do |c|
yield(c)
end
end
def char_count
n = 0
self.each_char do
n += 1
end
return n
end
def substr(from, to)
i = 0
s = ''
self.each_char do |c|
s += c if i >= from && to && i < to
i += 1
end
return s
end
end
class Numeric
RomanTable = {
1 => 'I',
4 => 'IV',
5 => 'V',
9 => 'IX',
10 => 'X',
40 => 'XL',
50 => 'L',
90 => 'XC',
100 => 'C',
400 => 'CD',
500 => 'D',
900 => 'CM',
1000 => 'M'
}
def roman(s = self)
return '-' if 3999 < s || 0 >= s
k = RomanTable.keys.delete_if{|x| x > s}.sort[-1]
return RomanTable[k] + roman(s - k).gsub('-', '')
end
end
class Tumblr2epub
URL = 'http://:username.tumblr.com/api/read?num=50&start=:count'
USER_DIR = File.expand_path(ENV['HOME'])
TEMP_DIR = File.expand_path(ENV['TMPDIR'])
def self.read(username, count, type = nil, log = false)
i = 0
tumblr = {}
posts = []
loop do
puts "Reading #{i + 1} page" if log
url = URL.gsub(/:username/, username).gsub(/:count/, (i * 50).to_s)
url += '&type=' + type if ['quote', 'photo', 'regular'].include?(type)
xml_read = lambda do |error_count|
begin
return xml = XmlSimple.xml_in(open(url).read)
rescue => error
if error_count < 10
puts error
puts "Read failure, Retry" if log
sleep 1
xml_read.call(error_count + 1)
else
raise error
end
end
end
xml = xml_read.call(0)
tumblr[:tumblelog] = xml['tumblelog'][0] if i == 0
posts.concat(xml['posts'][0]['post'])
puts "Readed #{i + 1} page, load #{posts.length} posts" if log
if posts.length >= count || posts.length % 50 != 0
break
else
i += 1
end
end
tumblr[:posts] = posts
return tumblr
end
def self.load(path, log = false)
path = File.expand_path(path)
puts "Loading yaml from \"#{path}\"" if log
tumblr = YAML.load_file(path)
puts "Loaded yaml" if log
return tumblr
end
def self.dump(tumblr, dir = USER_DIR, log = false)
puts "Dumping yaml" if log
filename = "#{tumblr[:tumblelog]['name'].gsub(/-/, '_')}_#{Time.now.strftime('%Y%M%d%H%M%S')}.yaml"
path = "#{File.expand_path(dir)}/#{filename}"
YAML.dump(tumblr, File.open(path, 'wb'))
puts "Dumped yaml to \"#{path}\"" if log
return path
end
def self.make(tumblr, dir = USER_DIR, log = false)
tumblelog = tumblr[:tumblelog]
posts = tumblr[:posts]
epub = EeePub::Easy.new do
title tumblelog['title']
creator tumblelog['name']
date Time.now.strftime('%Y-%M-%d')
identifier "http://#{tumblelog['name']}.tumblr.com", :scheme => 'URL'
uid "http://#{tumblelog['name']}.tumblr.com"
end
css_path = make_css
epub.assets << css_path
css_name = File.basename(css_path)
posts.each do |post|
if post['type'] == 'photo'
photo_url = post['photo-url'].find {|photo_url| photo_url['max-width'] == '500'}
epub.assets << Photo.download(photo_url['content'], log)
end
html = to_html(post)
if html
epub.sections <<
[
to_title(post),
<<-HTML
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="ja">
<head>
<title>Tumblr</title>
</head>
<link rel="stylesheet" href="#{css_name}" type="text/css" />
<body>
#{html}
</body>
</html>
HTML
]
end
end
filename = "#{tumblelog['name'].gsub(/-/, '_')}_#{Time.now.strftime('%Y%M%d%H%M%S')}.epub"
path = "#{File.expand_path(dir)}/#{filename}"
puts "Making epub"
epub.save(path)
Photo.clean(log)
puts "Make epub to \"#{path}\"" if log
return filename
end
def self.make_css
css = <<-CSS
body {
font: 14px/1.4 Arial, Helvetica, sans-serif;
margin: 0;
padding: 0;
}
p, ul, ol, blockquote {
margin: 3px 0 1px;
}
body :first-child,
body :last-child,
p:last-child,
ul:last-child,
ol:last-child {
margin-bottom: 0 !important;
}
a img {
border-width: 0;
}
blockquote {
border-left: 4px solid #dcdcdc;
margin-left: 0 !important;
margin-right: 0 !important;
padding-left: 10px !important;
border-width: 4px !important;
border-color: #a8bccf !important;
}
blockquote blockquote {
border-color: #839aaf !important;
}
blockquote blockquote blockquote {
border-color: #6b7d8f !important;
}
blockquote blockquote blockquote blockquote {
border-color: #4c5e6f !important;
}
blockquote blockquote blockquote blockquote blockquote {
border-color: #36434f !important;
}
CSS
path = "#{TEMP_DIR}/style.css"
file = File.open(path, 'wb')
file.write(css)
file.close
return path
end
def self.to_html(post)
if post['type'] == 'quote'
return "#{post['quote-text'].to_s}\n#{post['quote-source'].to_s}\n"
elsif post['type'] == 'photo'
photo_url = post['photo-url'].find {|photo_url| photo_url['max-width'] == '500'}
image_path = Photo.get(photo_url['content'])
if image_path
image_path = "#{File.basename(image_path)}"
else
image_path = photo_url['content']
end
return "<a href=\"#{image_path}\"><img src=\"#{image_path}\" /></a>#{post['photo-caption'].to_s}"
elsif post['type'] == 'regular'
if post['regular-title']
return "<h2>#{post['regular-title'].to_s}</h2>\n#{post['regular-body'].to_s}\n"
else
return "#{post['regular-body'].to_s}\n"
end
else
return nil
end
end
def self.to_title(post)
type = post['type'].capitalize
title = nil
if post['type'] == 'quote'
title = post['quote-text']
elsif post['type'] == 'photo'
if post['photo-caption']
title = post['photo-caption']
end
elsif post['type'] == 'regular'
if post['regular-title']
title = post['regular-title']
else
title = post['regular-body']
end
end
if title
title = title.to_s.strip_tags.gsub(/\s{2,}/, ' ').strip
if title.char_count > 20
return "#{type} - #{title.substr(0, 20)}..."
else
return "#{type} - #{title}"
end
else
return type
end
end
class Photo
@@images = {}
def self.download(url, log = false)
puts "Downloading \"#{url}\"" if log
ext = File.extname(url)
ext = '.png' unless ext
photo_download = lambda do |error_count|
begin
image = open(url)
path = "#{TEMP_DIR}/#{rand(256**16).to_s(16)}#{ext}"
file = File.open(path, 'wb')
file.write(image.read)
file.close
image.close
return path
rescue => error
if error_count < 10
puts error
puts "Download failure, Retry" if log
sleep 1
photo_download.call(error_count + 1)
else
raise error
end
end
end
path = photo_download.call(0)
puts "Downloaded to \"#{path}\"" if log
@@images[url] = path
return path
end
def self.get(url)
@@images[url]
end
def self.clean(log = false)
puts "Delete cache files" if log && @@images.length > 0
@@images.each do |url, file|
puts "Deleting \"#{file}\""
File.delete(file)
end
@@images = {}
puts "Deleted all cache files" if log
return true
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment