Skip to content

Instantly share code, notes, and snippets.

@arika
Created November 12, 2008 09:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arika/24118 to your computer and use it in GitHub Desktop.
Save arika/24118 to your computer and use it in GitHub Desktop.
exports tDiary data ad HTML text
#!/usr/bin/ruby
#
# this script dumps diary data from tDiary 2.2.x.
#
# usage: $0 /path/to/tdiary/dir [/path/to/tdiary.conf/dir] > tdiary.dat
#
# Author: akira at arika.org
# License: tDiary's
config_override = proc do |config|
=begin
if sp = config.options['sp.selected']
config.options['sp.selected'] =
(sp.split(/\n/) - %w(hatena_star.rb add_bookmark.rb)).join("\n")
end
config.options['image.dir'] = '/path/to/image/dir'
=end
end
plugin_override = proc do
def subtitle_link(date, index, subtitle)
subtitle
end
=begin
def my(url, text = '', title = '')
ret = %Q!<typo:tdiarycompat method="my" url="#{h url}"!
ret << %Q! text="#{title}"! unless title.empty?
ret << %Q!>#{text}</typo:tdiarycompat>!
end
=end
=begin
alias :orig_image :image
def image(*arg)
html = orig_image(*arg)
m = %r!\A(?:<a href="([^"]+)">)?<img class="([^"]+)" src="([^"]+)" alt="([^"]*)" title="[^"]*"(?: width="([^"]+)"(?: height="([^"]+)")?)?>!.match(html)
unless m
warn "unexpected html by image: #{html}"
return html
end
patt = /\A#{Regexp.quote(h(@image_url))}/
ret = %Q!<typo:tdiarycompat method="image"!
ret << %Q! image="#{m[3].sub(patt, '')}"!
ret << %Q! link="#{m[1].sub(patt, '')}"! if m[1]
ret << %Q! place="#{m[2]}"! if m[2]
ret << %Q! title="#{m[4]}"! if m[4]
ret << %Q! width="#{m[5]}"! if m[5]
ret << %Q! height="#{m[6]}"! if m[6]
ret << '/>'
ret
end
=end
=begin
def amazon_get(asin, show_image = true, label = nil, position = 'amazon')
position = 'box' if position == 'detail'
ret = %Q!<typo:asin asin="#{h asin}" style="#{position}"!
if show_image
size = case @conf['amazon.imgsize']
when 0
'large'
when 2
'small'
else
'medium'
end
ret << %Q! image="#{size}"}!
end
if label
ret << %Q!>#{label}</typo:asin>!
else
ret << '/>'
end
ret
end
=end
=begin
def flickr(pid, size = nil, place = nil)
size ||= @conf['flickr.default_size'] || 'small'
ret = %Q!<typo:tdiarycompat method="flickr" img="#{h pid.to_s}" size="#{h size}"!
ret << %Q! place="#{h place}"! if place
ret << '/>'
end
=end
end
section_modify = proc do |section|
section.body.gsub!(%r!^\s*<%=\s*a_plugin_method .*%>\s*!, '')
end
td_dir = ARGV.shift
td_conf_dir = ARGV.shift
td_dir = File.expand_path(td_dir)
td_dir.untaint
$LOAD_PATH.unshift(td_dir)
td_conf_dir = td_dir unless td_conf_dir
td_conf_dir.untaint
save_cwd = Dir.pwd
Dir.chdir(td_conf_dir)
require 'cgi'
require 'tdiary'
class TDiaryProxy < TDiary::TDiaryBase
def initialize(*args, &block)
super
@ignore_parser_cache = true # don't use parser cache
@plugin_override = block
@_processed_subtitle = nil
calendar # initialize @years
end
attr_accessor :_processed_subtitle
def clear_cache(*args); end
def store_cache(*args); end
def transaction(date)
@io.transaction(date) do |*arg|
yield(*arg)
DIRTY_NONE
end
end
def each_month
@years.keys.sort.each do |year|
@years[year].sort.each do |month|
yield(Time.local(year.to_i, month.to_i))
end
end
end
def _eval_rhtml(rhtml)
r = ERB.new('<%= rhtml %>').result(binding)
r = ERB.new(r).src
if @plugin
@plugin.eval_src(r.untaint, @conf.secure)
else
r
end
end
def _body_enter_proc(date)
_eval_rhtml("<% body_enter_proc(Time.at(#{date.to_i})) %>")
end
def _body_leave_proc(date)
_eval_rhtml("<% body_leave_proc(Time.at(#{date.to_i})) %>")
end
def _dump_each_section
each_month do |month|
transaction(month) do |@diaries|
@diaries.each do |dstr, diary|
@date = diary.date
load_plugins
@plugin.instance_eval(&@plugin_override) if @plugin_override
_body_enter_proc(@date)
yield(diary)
_body_leave_proc(@date)
end
end
end
end
def _section_rhtml(sec)
@_current_rhtml = sec.body_to_html
end
def _section_html(rhtml, sec)
sep = "\n__tdiary_dump_processed_subtitle__#{rand(9999)}\n"
eval_body =
"<%= section_enter_proc(Time.at(#{@date.to_i})) %>" +
"<% _tdiary_dump_processed_subtitle = subtitle_proc(Time.at(#{@date.to_i}), #{(sec.subtitle || '').dump.gsub(/%/, '\\\\045')}) %>" +
rhtml +
"<%= section_leave_proc(Time.at(#{@date.to_i})) %>" +
"#{sep}<%= _tdiary_dump_processed_subtitle %>"
html, @_processed_subtitle = _eval_rhtml(eval_body).split(/#{Regexp.quote(sep)}/, 2)
html
end
end
ENV['REQUEST_METHOD'] = 'GET'
cgi = CGI.new
conf = TDiary::Config.new(cgi)
config_override.call(conf) if config_override
articles = []
tdiary = TDiaryProxy.new(cgi, nil, conf, &plugin_override)
tdiary._dump_each_section do |diary|
diary_hash = {
:title => diary.title,
:date => diary.date,
:last_modified => diary.last_modified,
:article => [],
:comment => [],
:trackback => [],
:visible => diary.visible?,
}
diary.each_section do |sec|
if section_modify
section_modify.call(sec)
end
rhtml = tdiary._section_rhtml(sec)
rhtml = '<a name="p<%= "%02d"%@section_index[@date] %>"></a>' + rhtml
html = tdiary._section_html(rhtml, sec)
html.sub!(%r{\A(<a name="p\d+"></a>)(.*?<p>)}) { "#{$2}#{$1}" }
diary_hash[:article] << {
:title => tdiary._processed_subtitle.sub(/^(\[[^\[\]]+\]\s*)+/, ''),
:body => html,
:category => sec.categories,
}
end
diary.each_comment do |com|
comment_hash = {
:time => com.date,
:visible => com.instance_eval { @show }, # avoid overriding of "visible?" on tb-show.rb, etc.
}
if com.name == 'TrackBack'
turl, tblogname, ttitle, tbody = com.body.split(/\n/, 4)
comment_hash.merge!({
:blog_name => tblogname,
:title => ttitle,
:url => turl,
:body => tbody,
})
type = :trackback
else
comment_hash.merge!({
:author => com.name,
:mail => com.mail,
:body => "<p>#{com.body.make_link.gsub(/\n/, '<br>').gsub(/<br><br>\Z/, '')}</p>",
})
type = :comment
end
diary_hash[type] << comment_hash
end
articles << diary_hash
# break if articles.size > 10
end
Marshal.dump(articles, $stdout)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment