Skip to content

Instantly share code, notes, and snippets.

@wr0ngway
Created December 30, 2008 22:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wr0ngway/41781 to your computer and use it in GitHub Desktop.
Save wr0ngway/41781 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'date'
require 'rexml/document'
include REXML # so that we don't have to prefix everything with REXML::...
blogger_id = "111111111111"
blog_title = "my blog"
author_name = "my name"
author_email = "noreply@blogger.com"
blog_id = "222222222222222"
blog_name = "myblog"
file = File.new( ARGV[0] )
doc = REXML::Document.new file
feed_template = %q{<?xml version="1.0" encoding="UTF-8" ?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearchrss/1.0/" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0">
<id>tag:blogger.com,1999:blog-#{blog_id}.archive</id>
<updated>2008-12-04T11:08:02.017+08:00</updated>
<title type="text">#{blog_title}</title>
<link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://#{blog_name}.blogspot.com/feeds/archive'/>
<link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/#{blog_id}/archive'/>
<link rel='http://schemas.google.com/g/2005#post' type='application/atom+xml' href='http://www.blogger.com/feeds/#{blog_id}/archive'/>
<link rel='alternate' type='text/html' href='http://#{blog_name}.blogspot.com/'/>
<author>
<name>#{author_name}</name>
<uri>http://www.blogger.com/profile/#{blogger_id}</uri>
<email>#{author_email}</email>
</author>
<generator version="7.00" uri="http://www.blogger.com">Blogger</generator>
#{feed_content}
</feed>
}
entry_template = %q{
<entry>
<id>tag:blogger.com,1999:blog-#{blog_id}.post-#{post_id}</id>
<published>#{post_date}</published>
<updated>#{post_date}</updated>
<category scheme="http://schemas.google.com/g/2005#kind" term="http://schemas.google.com/blogger/2008/kind#post" />
<category scheme="http://www.blogger.com/atom/ns#" term="#{post_category}" />
<title type="text">#{post_title}</title>
<content type="html"><![CDATA[#{post_content}]]></content>
<author>
<name>#{author_name}</name>
<uri>http://www.blogger.com/profile/#{blogger_id}</uri>
<email>#{author_email}</email>
</author>
<thr:total>#{comments_size}</thr:total>
</entry>
}
comment_template = %q{
<entry>
<id>tag:blogger.com,1999:blog-#{blog_id}.post-#{post_id}.comment-#{comment_id}</id>
<published>#{comment_date}</published>
<updated>#{comment_date}</updated>
<category scheme='http://schemas.google.com/g/2005#kind'
term='http://schemas.google.com/blogger/2008/kind#comment'/>
<title type='text'><![CDATA[#{comment_content}]]></title>
<content type='html'><![CDATA[#{comment_content}]]></content>
<link rel='self'
type='application/atom+xml'
href='http://www.blogger.com/feeds/feh/comments/default/1'/>
<author>
#{'<name>' + "#{comment_author}" + '</name>' if comment_author}
#{'<email>' + "#{comment_author_email}" + '</email>' if comment_author_email}
#{'<uri>' + "#{comment_author_url}" + '</uri>' if comment_author_url}
</author>
<thr:in-reply-to href='http://www.blogger.com/feeds/#{blog_id}/posts/default/#{post_id}'
ref='tag:blogger.com,1999:blog-#{blog_id}.post-#{post_id}'
type='application/atom+xml'/>
</entry>
}
entries = []
all_entries = [entries]
post_id = 0
doc.elements.each("//item") { |element|
post_title = element.get_text("title")
datestr = "#{element.get_text('pubDate')}"
post_date = DateTime.parse(datestr).strftime("%Y-%m-%dT%H:%M:%S+08:00")
post_content = element.get_text("content:encoded")
post_category = element.get_text("category")
post_category = "Blogging" if post_category == "" or post_category.nil?
post_id += 1
comments = []
comment_id = 0
element.elements.each("wp:comment") { |comment|
comment_author = comment.get_text("wp:comment_author")
comment_author_email = comment.get_text("wp:comment_author_email")
comment_author_url = comment.get_text("wp:comment_author_url")
datestr = "#{comment.get_text('wp:comment_date_gmt')}"
comment_date = DateTime.parse(datestr).strftime("%Y-%m-%dT%H:%M:%S+08:00")
comment_content = comment.get_text("wp:comment_content")
comment_id += 1
comments << eval('%Q{' + comment_template + '}', binding)
}
# make sure we keep comments together with its entry while keeping within import limit
if entries.size + comments.size + 1 > 50
entries = []
all_entries << entries
end
comments_size = comments.size
entries << eval('%Q{' + entry_template + '}', binding)
entries.push *comments
}
all_entries.each_with_index do |entries, i|
feed_content = ""
entries.each do |entry|
feed_content += entry
end
destination = "p2blogger#{i}.xml"
puts "Writing #{entries.size} entries to #{destination}"
File.open(destination,"w") do |f|
f.write(eval('%Q{' + feed_template + '}', binding))
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment