Created
November 12, 2008 09:19
-
-
Save arika/24118 to your computer and use it in GitHub Desktop.
exports tDiary data ad HTML text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# | |
# this script dumps diary data from tDiary 2.2.x. | |
# | |
# usage: $0 /path/to/tdiary/dir [/path/to/tdiary.conf/dir] > tdiary.dat | |
# | |
# Author: akira at arika.org | |
# License: tDiary's | |
config_override = proc do |config| | |
=begin | |
if sp = config.options['sp.selected'] | |
config.options['sp.selected'] = | |
(sp.split(/\n/) - %w(hatena_star.rb add_bookmark.rb)).join("\n") | |
end | |
config.options['image.dir'] = '/path/to/image/dir' | |
=end | |
end | |
plugin_override = proc do | |
def subtitle_link(date, index, subtitle) | |
subtitle | |
end | |
=begin | |
def my(url, text = '', title = '') | |
ret = %Q!<typo:tdiarycompat method="my" url="#{h url}"! | |
ret << %Q! text="#{title}"! unless title.empty? | |
ret << %Q!>#{text}</typo:tdiarycompat>! | |
end | |
=end | |
=begin | |
alias :orig_image :image | |
def image(*arg) | |
html = orig_image(*arg) | |
m = %r!\A(?:<a href="([^"]+)">)?<img class="([^"]+)" src="([^"]+)" alt="([^"]*)" title="[^"]*"(?: width="([^"]+)"(?: height="([^"]+)")?)?>!.match(html) | |
unless m | |
warn "unexpected html by image: #{html}" | |
return html | |
end | |
patt = /\A#{Regexp.quote(h(@image_url))}/ | |
ret = %Q!<typo:tdiarycompat method="image"! | |
ret << %Q! image="#{m[3].sub(patt, '')}"! | |
ret << %Q! link="#{m[1].sub(patt, '')}"! if m[1] | |
ret << %Q! place="#{m[2]}"! if m[2] | |
ret << %Q! title="#{m[4]}"! if m[4] | |
ret << %Q! width="#{m[5]}"! if m[5] | |
ret << %Q! height="#{m[6]}"! if m[6] | |
ret << '/>' | |
ret | |
end | |
=end | |
=begin | |
def amazon_get(asin, show_image = true, label = nil, position = 'amazon') | |
position = 'box' if position == 'detail' | |
ret = %Q!<typo:asin asin="#{h asin}" style="#{position}"! | |
if show_image | |
size = case @conf['amazon.imgsize'] | |
when 0 | |
'large' | |
when 2 | |
'small' | |
else | |
'medium' | |
end | |
ret << %Q! image="#{size}"}! | |
end | |
if label | |
ret << %Q!>#{label}</typo:asin>! | |
else | |
ret << '/>' | |
end | |
ret | |
end | |
=end | |
=begin | |
def flickr(pid, size = nil, place = nil) | |
size ||= @conf['flickr.default_size'] || 'small' | |
ret = %Q!<typo:tdiarycompat method="flickr" img="#{h pid.to_s}" size="#{h size}"! | |
ret << %Q! place="#{h place}"! if place | |
ret << '/>' | |
end | |
=end | |
end | |
section_modify = proc do |section| | |
section.body.gsub!(%r!^\s*<%=\s*a_plugin_method .*%>\s*!, '') | |
end | |
td_dir = ARGV.shift | |
td_conf_dir = ARGV.shift | |
td_dir = File.expand_path(td_dir) | |
td_dir.untaint | |
$LOAD_PATH.unshift(td_dir) | |
td_conf_dir = td_dir unless td_conf_dir | |
td_conf_dir.untaint | |
save_cwd = Dir.pwd | |
Dir.chdir(td_conf_dir) | |
require 'cgi' | |
require 'tdiary' | |
class TDiaryProxy < TDiary::TDiaryBase | |
def initialize(*args, &block) | |
super | |
@ignore_parser_cache = true # don't use parser cache | |
@plugin_override = block | |
@_processed_subtitle = nil | |
calendar # initialize @years | |
end | |
attr_accessor :_processed_subtitle | |
def clear_cache(*args); end | |
def store_cache(*args); end | |
def transaction(date) | |
@io.transaction(date) do |*arg| | |
yield(*arg) | |
DIRTY_NONE | |
end | |
end | |
def each_month | |
@years.keys.sort.each do |year| | |
@years[year].sort.each do |month| | |
yield(Time.local(year.to_i, month.to_i)) | |
end | |
end | |
end | |
def _eval_rhtml(rhtml) | |
r = ERB.new('<%= rhtml %>').result(binding) | |
r = ERB.new(r).src | |
if @plugin | |
@plugin.eval_src(r.untaint, @conf.secure) | |
else | |
r | |
end | |
end | |
def _body_enter_proc(date) | |
_eval_rhtml("<% body_enter_proc(Time.at(#{date.to_i})) %>") | |
end | |
def _body_leave_proc(date) | |
_eval_rhtml("<% body_leave_proc(Time.at(#{date.to_i})) %>") | |
end | |
def _dump_each_section | |
each_month do |month| | |
transaction(month) do |@diaries| | |
@diaries.each do |dstr, diary| | |
@date = diary.date | |
load_plugins | |
@plugin.instance_eval(&@plugin_override) if @plugin_override | |
_body_enter_proc(@date) | |
yield(diary) | |
_body_leave_proc(@date) | |
end | |
end | |
end | |
end | |
def _section_rhtml(sec) | |
@_current_rhtml = sec.body_to_html | |
end | |
def _section_html(rhtml, sec) | |
sep = "\n__tdiary_dump_processed_subtitle__#{rand(9999)}\n" | |
eval_body = | |
"<%= section_enter_proc(Time.at(#{@date.to_i})) %>" + | |
"<% _tdiary_dump_processed_subtitle = subtitle_proc(Time.at(#{@date.to_i}), #{(sec.subtitle || '').dump.gsub(/%/, '\\\\045')}) %>" + | |
rhtml + | |
"<%= section_leave_proc(Time.at(#{@date.to_i})) %>" + | |
"#{sep}<%= _tdiary_dump_processed_subtitle %>" | |
html, @_processed_subtitle = _eval_rhtml(eval_body).split(/#{Regexp.quote(sep)}/, 2) | |
html | |
end | |
end | |
ENV['REQUEST_METHOD'] = 'GET' | |
cgi = CGI.new | |
conf = TDiary::Config.new(cgi) | |
config_override.call(conf) if config_override | |
articles = [] | |
tdiary = TDiaryProxy.new(cgi, nil, conf, &plugin_override) | |
tdiary._dump_each_section do |diary| | |
diary_hash = { | |
:title => diary.title, | |
:date => diary.date, | |
:last_modified => diary.last_modified, | |
:article => [], | |
:comment => [], | |
:trackback => [], | |
:visible => diary.visible?, | |
} | |
diary.each_section do |sec| | |
if section_modify | |
section_modify.call(sec) | |
end | |
rhtml = tdiary._section_rhtml(sec) | |
rhtml = '<a name="p<%= "%02d"%@section_index[@date] %>"></a>' + rhtml | |
html = tdiary._section_html(rhtml, sec) | |
html.sub!(%r{\A(<a name="p\d+"></a>)(.*?<p>)}) { "#{$2}#{$1}" } | |
diary_hash[:article] << { | |
:title => tdiary._processed_subtitle.sub(/^(\[[^\[\]]+\]\s*)+/, ''), | |
:body => html, | |
:category => sec.categories, | |
} | |
end | |
diary.each_comment do |com| | |
comment_hash = { | |
:time => com.date, | |
:visible => com.instance_eval { @show }, # avoid overriding of "visible?" on tb-show.rb, etc. | |
} | |
if com.name == 'TrackBack' | |
turl, tblogname, ttitle, tbody = com.body.split(/\n/, 4) | |
comment_hash.merge!({ | |
:blog_name => tblogname, | |
:title => ttitle, | |
:url => turl, | |
:body => tbody, | |
}) | |
type = :trackback | |
else | |
comment_hash.merge!({ | |
:author => com.name, | |
:mail => com.mail, | |
:body => "<p>#{com.body.make_link.gsub(/\n/, '<br>').gsub(/<br><br>\Z/, '')}</p>", | |
}) | |
type = :comment | |
end | |
diary_hash[type] << comment_hash | |
end | |
articles << diary_hash | |
# break if articles.size > 10 | |
end | |
Marshal.dump(articles, $stdout) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment