arika (owner)

Revisions

gist: 24118 Download_button fork
public
Description:
exports tDiary data ad HTML text
Public Clone URL: git://gist.github.com/24118.git
Embed All Files: show embed
tdiary-export.rb #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#!/usr/bin/ruby
#
# this script dumps diary data from tDiary 2.2.x.
#
# usage: $0 /path/to/tdiary/dir [/path/to/tdiary.conf/dir] > tdiary.dat
#
# Author: akira at arika.org
# License: tDiary's
 
config_override = proc do |config|
=begin
if sp = config.options['sp.selected']
config.options['sp.selected'] =
(sp.split(/\n/) - %w(hatena_star.rb add_bookmark.rb)).join("\n")
end
config.options['image.dir'] = '/path/to/image/dir'
=end
end
 
plugin_override = proc do
  def subtitle_link(date, index, subtitle)
    subtitle
  end
 
=begin
def my(url, text = '', title = '')
ret = %Q!<typo:tdiarycompat method="my" url="#{h url}"!
ret << %Q! text="#{title}"! unless title.empty?
ret << %Q!>#{text}</typo:tdiarycompat>!
end
=end
 
=begin
alias :orig_image :image
def image(*arg)
html = orig_image(*arg)
m = %r!\A(?:<a href="([^"]+)">)?<img class="([^"]+)" src="([^"]+)" alt="([^"]*)" title="[^"]*"(?: width="([^"]+)"(?: height="([^"]+)")?)?>!.match(html)
unless m
warn "unexpected html by image: #{html}"
return html
end
 
patt = /\A#{Regexp.quote(h(@image_url))}/
ret = %Q!<typo:tdiarycompat method="image"!
ret << %Q! image="#{m[3].sub(patt, '')}"!
ret << %Q! link="#{m[1].sub(patt, '')}"! if m[1]
ret << %Q! place="#{m[2]}"! if m[2]
ret << %Q! title="#{m[4]}"! if m[4]
ret << %Q! width="#{m[5]}"! if m[5]
ret << %Q! height="#{m[6]}"! if m[6]
ret << '/>'
 
ret
end
=end
 
=begin
def amazon_get(asin, show_image = true, label = nil, position = 'amazon')
position = 'box' if position == 'detail'
ret = %Q!<typo:asin asin="#{h asin}" style="#{position}"!
if show_image
size = case @conf['amazon.imgsize']
when 0
'large'
when 2
'small'
else
'medium'
end
ret << %Q! image="#{size}"}!
end
if label
ret << %Q!>#{label}</typo:asin>!
else
ret << '/>'
end
ret
end
=end
 
=begin
def flickr(pid, size = nil, place = nil)
size ||= @conf['flickr.default_size'] || 'small'
ret = %Q!<typo:tdiarycompat method="flickr" img="#{h pid.to_s}" size="#{h size}"!
ret << %Q! place="#{h place}"! if place
ret << '/>'
end
=end
end
 
section_modify = proc do |section|
  section.body.gsub!(%r!^\s*<%=\s*a_plugin_method .*%>\s*!, '')
end
 
td_dir = ARGV.shift
td_conf_dir = ARGV.shift
 
td_dir = File.expand_path(td_dir)
td_dir.untaint
$LOAD_PATH.unshift(td_dir)
 
td_conf_dir = td_dir unless td_conf_dir
td_conf_dir.untaint
save_cwd = Dir.pwd
Dir.chdir(td_conf_dir)
 
require 'cgi'
require 'tdiary'
 
class TDiaryProxy < TDiary::TDiaryBase
def initialize(*args, &block)
super
@ignore_parser_cache = true # don't use parser cache
@plugin_override = block
@_processed_subtitle = nil
calendar # initialize @years
end
attr_accessor :_processed_subtitle
 
def clear_cache(*args); end
def store_cache(*args); end
 
def transaction(date)
@io.transaction(date) do |*arg|
yield(*arg)
DIRTY_NONE
end
end
 
def each_month
@years.keys.sort.each do |year|
@years[year].sort.each do |month|
yield(Time.local(year.to_i, month.to_i))
end
end
end
 
def _eval_rhtml(rhtml)
r = ERB.new('<%= rhtml %>').result(binding)
r = ERB.new(r).src
if @plugin
@plugin.eval_src(r.untaint, @conf.secure)
else
r
end
end
 
def _body_enter_proc(date)
_eval_rhtml("<% body_enter_proc(Time.at(#{date.to_i})) %>")
end
def _body_leave_proc(date)
_eval_rhtml("<% body_leave_proc(Time.at(#{date.to_i})) %>")
end
 
def _dump_each_section
each_month do |month|
transaction(month) do |@diaries|
@diaries.each do |dstr, diary|
@date = diary.date
load_plugins
@plugin.instance_eval(&@plugin_override) if @plugin_override
_body_enter_proc(@date)
yield(diary)
_body_leave_proc(@date)
end
end
end
end
 
def _section_rhtml(sec)
@_current_rhtml = sec.body_to_html
end
 
def _section_html(rhtml, sec)
sep = "\n__tdiary_dump_processed_subtitle__#{rand(9999)}\n"
eval_body =
"<%= section_enter_proc(Time.at(#{@date.to_i})) %>" +
"<% _tdiary_dump_processed_subtitle = subtitle_proc(Time.at(#{@date.to_i}), #{(sec.subtitle || '').dump.gsub(/%/, '\\\\045')}) %>" +
rhtml +
"<%= section_leave_proc(Time.at(#{@date.to_i})) %>" +
"#{sep}<%= _tdiary_dump_processed_subtitle %>"
html, @_processed_subtitle = _eval_rhtml(eval_body).split(/#{Regexp.quote(sep)}/, 2)
html
end
end
 
ENV['REQUEST_METHOD'] = 'GET'
cgi = CGI.new
conf = TDiary::Config.new(cgi)
config_override.call(conf) if config_override
 
articles = []
 
tdiary = TDiaryProxy.new(cgi, nil, conf, &plugin_override)
tdiary._dump_each_section do |diary|
diary_hash = {
:title => diary.title,
:date => diary.date,
:last_modified => diary.last_modified,
:article => [],
:comment => [],
:trackback => [],
:visible => diary.visible?,
}
diary.each_section do |sec|
if section_modify
section_modify.call(sec)
end
rhtml = tdiary._section_rhtml(sec)
rhtml = '<a name="p<%= "%02d"%@section_index[@date] %>"></a>' + rhtml
html = tdiary._section_html(rhtml, sec)
html.sub!(%r{\A(<a name="p\d+"></a>)(.*?<p>)}) { "#{$2}#{$1}" }
 
diary_hash[:article] << {
:title => tdiary._processed_subtitle.sub(/^(\[[^\[\]]+\]\s*)+/, ''),
:body => html,
:category => sec.categories,
}
end
 
diary.each_comment do |com|
comment_hash = {
:time => com.date,
:visible => com.instance_eval { @show }, # avoid overriding of "visible?" on tb-show.rb, etc.
}
if com.name == 'TrackBack'
turl, tblogname, ttitle, tbody = com.body.split(/\n/, 4)
comment_hash.merge!({
:blog_name => tblogname,
:title => ttitle,
:url => turl,
:body => tbody,
})
type = :trackback
else
comment_hash.merge!({
        :author => com.name,
        :mail => com.mail,
        :body => "<p>#{com.body.make_link.gsub(/\n/, '<br>').gsub(/<br><br>\Z/, '')}</p>",
      })
      type = :comment
    end
    diary_hash[type] << comment_hash
  end
  articles << diary_hash
# break if articles.size > 10
end
 
Marshal.dump(articles, $stdout)