#!/usr/bin/env ruby
require 'date'
require 'rexml/document'
include REXML # so that we don't have to prefix everything with REXML::...
blogger_id = "111111111111"
blog_title = "my blog"
author_name = "my name"
author_email = "noreply@blogger.com"
blog_id = "222222222222222"
blog_name = "myblog"
file = File.new( ARGV[0] )
doc = REXML::Document.new file
feed_template = %q{
tag:blogger.com,1999:blog-#{blog_id}.archive
2008-12-04T11:08:02.017+08:00
#{blog_title}
#{author_name}
http://www.blogger.com/profile/#{blogger_id}
#{author_email}
Blogger
#{feed_content}
}
entry_template = %q{
tag:blogger.com,1999:blog-#{blog_id}.post-#{post_id}
#{post_date}
#{post_date}
#{post_title}
#{author_name}
http://www.blogger.com/profile/#{blogger_id}
#{author_email}
#{comments_size}
}
comment_template = %q{
tag:blogger.com,1999:blog-#{blog_id}.post-#{post_id}.comment-#{comment_id}
#{comment_date}
#{comment_date}
#{'' + "#{comment_author}" + '' if comment_author}
#{'' + "#{comment_author_email}" + '' if comment_author_email}
#{'' + "#{comment_author_url}" + '' if comment_author_url}
}
entries = []
all_entries = [entries]
post_id = 0
doc.elements.each("//item") { |element|
post_title = element.get_text("title")
datestr = "#{element.get_text('pubDate')}"
post_date = DateTime.parse(datestr).strftime("%Y-%m-%dT%H:%M:%S+08:00")
post_content = element.get_text("content:encoded")
post_category = element.get_text("category")
post_category = "Blogging" if post_category == "" or post_category.nil?
post_id += 1
comments = []
comment_id = 0
element.elements.each("wp:comment") { |comment|
comment_author = comment.get_text("wp:comment_author")
comment_author_email = comment.get_text("wp:comment_author_email")
comment_author_url = comment.get_text("wp:comment_author_url")
datestr = "#{comment.get_text('wp:comment_date_gmt')}"
comment_date = DateTime.parse(datestr).strftime("%Y-%m-%dT%H:%M:%S+08:00")
comment_content = comment.get_text("wp:comment_content")
comment_id += 1
comments << eval('%Q{' + comment_template + '}', binding)
}
# make sure we keep comments together with its entry while keeping within import limit
if entries.size + comments.size + 1 > 50
entries = []
all_entries << entries
end
comments_size = comments.size
entries << eval('%Q{' + entry_template + '}', binding)
entries.push *comments
}
all_entries.each_with_index do |entries, i|
feed_content = ""
entries.each do |entry|
feed_content += entry
end
destination = "p2blogger#{i}.xml"
puts "Writing #{entries.size} entries to #{destination}"
File.open(destination,"w") do |f|
f.write(eval('%Q{' + feed_template + '}', binding))
end
end