-
-
Save juniorz/1564581 to your computer and use it in GitHub Desktop.
require 'rubygems' | |
require 'nokogiri' | |
require 'fileutils' | |
require 'date' | |
require 'uri' | |
# usage: ruby import.rb my-blog.xml | |
# my-blog.xml is a file from Settings -> Basic -> Export in blogger. | |
data = File.read ARGV[0] | |
doc = Nokogiri::XML(data) | |
@posts = {} | |
@drafts = {} | |
def add(node) | |
id = node.search('id').first.content | |
type = node.search('category').first.attr('term').split('#').last | |
case type | |
when 'post' | |
if published?(node) | |
@posts[id] = Post.new(node) | |
else | |
@drafts[id] = Post.new(node) | |
end | |
when 'comment' | |
reply_to = node.children.find {|c| c.name == 'in-reply-to' } | |
post_id = reply_to.attr('ref') | |
#post_id = node.search('thr').first.attr('ref') | |
@posts[post_id].add_comment(Comment.new(node)) | |
when 'template', 'settings', 'page' | |
else | |
raise 'dunno '+type | |
end | |
end | |
def published?(node) | |
node.at_css('app|control app|draft', 'app' => 'http://purl.org/atom/app#').nil? | |
end | |
def write(post, path='_posts') | |
puts "Post [#{post.title}] has #{post.comments.count} comments" | |
puts "writing #{post.file_name}" | |
File.open(File.join(path, post.file_name), 'w') do |file| | |
file.write post.header | |
file.write "\n\n" | |
#file.write "<h1>{{ page.title }}</h1>\n" | |
file.write "<div class='post'>\n" | |
file.write post.content | |
file.write "</div>\n" | |
unless post.comments.empty? | |
file.write "<h2>Comments</h2>\n" | |
file.write "<div class='comments'>\n" | |
post.comments.each do |comment| | |
file.write "<div class='comment'>\n" | |
file.write "<div class='author'>" | |
file.write comment.author | |
file.write "</div>\n" | |
file.write "<div class='content'>\n" | |
file.write comment.content | |
file.write "</div>\n" | |
file.write "</div>\n" | |
end | |
file.write "</div>\n" | |
end | |
end | |
end | |
class Post | |
attr_reader :comments | |
def initialize(node) | |
@node = node | |
@comments = [] | |
end | |
def add_comment(comment) | |
@comments.unshift comment | |
end | |
def title | |
@title ||= @node.at_css('title').content | |
end | |
def content | |
@content ||= @node.at_css('content').content | |
end | |
def creation_date | |
@creation_date ||= creation_datetime.strftime("%Y-%m-%d") | |
end | |
def creation_datetime | |
@creation_datetime ||= Date.parse(@node.search('published').first.content) | |
end | |
def permalink | |
return @permalink unless @permalink.nil? | |
link_node = @node.at_css('link[rel=alternate]') | |
@permalink = link_node && link_node.attr('href') | |
end | |
def param_name | |
if permalink.nil? | |
title.split(/[^a-zA-Z0-9]+/).join('-').downcase | |
else | |
File.basename(URI(permalink).path, '.*') | |
end | |
end | |
def file_name | |
%{#{creation_date}-#{param_name}.html} | |
end | |
def header | |
[ | |
'---', | |
%{layout: post}, | |
%{title: "#{title}"}, | |
%{date: #{creation_datetime}}, | |
%{comments: false}, | |
categories, | |
'---' | |
].compact.join("\n") | |
end | |
def categories | |
terms = @node.search('category[scheme="http://www.blogger.com/atom/ns#"]') | |
unless Array(terms).empty? | |
[ | |
'categories:', | |
terms.map{ |t| t.attr('term') && " - #{t.attr('term')}" }.compact.join("\n"), | |
].join("\n") | |
end | |
end | |
end | |
class Comment | |
def initialize(node) | |
@node = node | |
end | |
def author | |
@node.search('author name').first.content | |
end | |
def content | |
@node.search('content').first.content | |
end | |
end | |
entries = {} | |
doc.search('entry').each do |entry| | |
add entry | |
end | |
puts "** Writing PUBLISHED posts" | |
FileUtils.rm_rf('_posts') | |
Dir.mkdir("_posts") unless File.directory?("_posts") | |
@posts.each do |id, post| | |
write post | |
end | |
puts "\n" | |
puts "** Writing DRAFT posts" | |
FileUtils.rm_rf('_drafts') | |
Dir.mkdir("_drafts") unless File.directory?("_drafts") | |
@drafts.each do |id, post| | |
write post, '_drafts' | |
end |
Could you post the full error (in a separate gist/pastebin)? I guess you have not installed the nokogiri
gem (gem install nokogiri
- maybe with sudo).
nokogiri-1.5.0
is installed, I'm assuming that's the correct version?
Here is the full error message:
https://gist.github.com/1669618#file_shell_output
I also included the blog's exported XML (big file, ugh) in case it's of any use.
just fix the line 31 to:
when 'template', 'settings', 'page'
Thank you. Fixed. I'll try to import pages too.
Thanks for this work! It worked well, except that it doesn't handle double quotes or backslashes in titles, such that a "rake generate" will fail with YAML errors. For example, " needs to become " and \ needs to become .
Thanks a lot. It worked well.
I tried your script to migrate my Blogger blog. I installed ruby via pacman -S ruby
, and Nokogiri via gem install nokogiri
on Arch Linux.
But your script fails with the following:
[orschiro@thinkpad Blogger to Github]$ ruby import.rb blog-08-03-2013.xml
WARNING: Nokogiri was built against LibXML version 2.8.0, but has dynamically loaded 2.9.1
import.rb:27:in `add': dunno page (RuntimeError)
from import.rb:119:in `block in <main>'
from /home/orschiro/.gem/ruby/2.0.0/gems/nokogiri-1.6.0/lib/nokogiri/xml/node_set.rb:237:in `block in each'
from /home/orschiro/.gem/ruby/2.0.0/gems/nokogiri-1.6.0/lib/nokogiri/xml/node_set.rb:236:in `upto'
from /home/orschiro/.gem/ruby/2.0.0/gems/nokogiri-1.6.0/lib/nokogiri/xml/node_set.rb:236:in `each'
from import.rb:118:in `<main>'
Can you help me, please?
EDIT: Sorry, this did not happen with your script but with the original script you had forked from.
I'm seeing the same error as @IQAndreas - gisted here: https://gist.github.com/nbieber/a2c469538bdb79d2c520
Thanks for this. It converted almost all of my posts; the last one causes the script to crash Ruby entirely from line 42. I think it's because the post has some non-ascii in the name: "Some Quốc Ngữ History".
I have nokogiri installed, but I can't do it:
import.rb:30:in `add': undefined method `add_comment' for nil:NilClass (NoMethodError)
from import.rb:158:in `block in <main>'
from /var/lib/gems/2.1.0/gems/nokogiri-1.6.6.2/lib/nokogiri/xml/node_set.rb:187:in `block in each'
from /var/lib/gems/2.1.0/gems/nokogiri-1.6.6.2/lib/nokogiri/xml/node_set.rb:186:in `upto'
from /var/lib/gems/2.1.0/gems/nokogiri-1.6.6.2/lib/nokogiri/xml/node_set.rb:186:in `each'
from import.rb:157:in `<main>'
I'm having some trouble with the code, getting
dunno sql
on line #33. I commented out that line (so as to just ignore unknown types) but then get aimport.rb:30:in 'add': undefined method 'add_comment' for nil:NilClass".
(I'm not used to Ruby, but I'm assuming means it's trying to access a property of a null object?) Perhaps this is caused by comments being before their posts, or perhaps being defined for deleted posts?
Is this a problem on my end? Has this code been tested to work with the latest version of the exported Blogger/BlogSpot XML?