#!/usr/bin/env ruby require 'date' require 'rexml/document' include REXML # so that we don't have to prefix everything with REXML::... blogger_id = "111111111111" blog_title = "my blog" author_name = "my name" author_email = "noreply@blogger.com" blog_id = "222222222222222" blog_name = "myblog" file = File.new( ARGV[0] ) doc = REXML::Document.new file feed_template = %q{ tag:blogger.com,1999:blog-#{blog_id}.archive 2008-12-04T11:08:02.017+08:00 #{blog_title} #{author_name} http://www.blogger.com/profile/#{blogger_id} #{author_email} Blogger #{feed_content} } entry_template = %q{ tag:blogger.com,1999:blog-#{blog_id}.post-#{post_id} #{post_date} #{post_date} #{post_title} #{author_name} http://www.blogger.com/profile/#{blogger_id} #{author_email} #{comments_size} } comment_template = %q{ tag:blogger.com,1999:blog-#{blog_id}.post-#{post_id}.comment-#{comment_id} #{comment_date} #{comment_date} <![CDATA[#{comment_content}]]> #{'' + "#{comment_author}" + '' if comment_author} #{'' + "#{comment_author_email}" + '' if comment_author_email} #{'' + "#{comment_author_url}" + '' if comment_author_url} } entries = [] all_entries = [entries] post_id = 0 doc.elements.each("//item") { |element| post_title = element.get_text("title") datestr = "#{element.get_text('pubDate')}" post_date = DateTime.parse(datestr).strftime("%Y-%m-%dT%H:%M:%S+08:00") post_content = element.get_text("content:encoded") post_category = element.get_text("category") post_category = "Blogging" if post_category == "" or post_category.nil? post_id += 1 comments = [] comment_id = 0 element.elements.each("wp:comment") { |comment| comment_author = comment.get_text("wp:comment_author") comment_author_email = comment.get_text("wp:comment_author_email") comment_author_url = comment.get_text("wp:comment_author_url") datestr = "#{comment.get_text('wp:comment_date_gmt')}" comment_date = DateTime.parse(datestr).strftime("%Y-%m-%dT%H:%M:%S+08:00") comment_content = comment.get_text("wp:comment_content") comment_id += 1 comments << eval('%Q{' + comment_template + '}', binding) } # make sure we keep comments together with its entry while keeping within import limit if entries.size + comments.size + 1 > 50 entries = [] all_entries << entries end comments_size = comments.size entries << eval('%Q{' + entry_template + '}', binding) entries.push *comments } all_entries.each_with_index do |entries, i| feed_content = "" entries.each do |entry| feed_content += entry end destination = "p2blogger#{i}.xml" puts "Writing #{entries.size} entries to #{destination}" File.open(destination,"w") do |f| f.write(eval('%Q{' + feed_template + '}', binding)) end end