public
Last active — forked from juniorz/import.rb

Import a blogger archive to jekyll (octopress version)

  • Download Gist
import.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
require 'rubygems'
require 'nokogiri'
require 'fileutils'
require 'date'
require 'uri'
 
# usage: ruby import.rb my-blog.xml
# my-blog.xml is a file from Settings -> Basic -> Export in blogger.
 
data = File.read ARGV[0]
doc = Nokogiri::XML(data)
 
@posts = {}
@drafts = {}
 
def add(node)
id = node.search('id').first.content
type = node.search('category').first.attr('term').split('#').last
case type
when 'post'
if published?(node)
@posts[id] = Post.new(node)
else
@drafts[id] = Post.new(node)
end
when 'comment'
reply_to = node.children.find {|c| c.name == 'in-reply-to' }
post_id = reply_to.attr('ref')
#post_id = node.search('thr').first.attr('ref')
@posts[post_id].add_comment(Comment.new(node))
when 'template', 'settings'
else
raise 'dunno '+type
end
end
 
def published?(node)
node.at_css('app|control app|draft', 'app' => 'http://purl.org/atom/app#').nil?
end
 
def write(post, path='_posts')
puts "Post [#{post.title}] has #{post.comments.count} comments"
 
puts "writing #{post.file_name}"
File.open(File.join(path, post.file_name), 'w') do |file|
file.write post.header
file.write "\n\n"
#file.write "<h1>{{ page.title }}</h1>\n"
file.write "<div class='post'>\n"
file.write post.content
file.write "</div>\n"
 
unless post.comments.empty?
file.write "<h2>Comments</h2>\n"
file.write "<div class='comments'>\n"
post.comments.each do |comment|
file.write "<div class='comment'>\n"
file.write "<div class='author'>"
file.write comment.author
file.write "</div>\n"
file.write "<div class='content'>\n"
file.write comment.content
file.write "</div>\n"
file.write "</div>\n"
end
file.write "</div>\n"
end
 
end
end
 
class Post
attr_reader :comments
def initialize(node)
@node = node
@comments = []
end
 
def add_comment(comment)
@comments.unshift comment
end
 
def title
@title ||= @node.at_css('title').content
end
 
def content
@content ||= @node.at_css('content').content
end
 
def creation_date
@creation_date ||= creation_datetime.strftime("%Y-%m-%d")
end
 
def creation_datetime
@creation_datetime ||= DateTime.parse(@node.search('published').first.content)
end
 
def permalink
return @permalink unless @permalink.nil?
 
link_node = @node.at_css('link[rel=alternate]')
@permalink = link_node && link_node.attr('href')
end
 
def param_name
if permalink.nil?
title.split(/[^a-zA-Z0-9]+/).join('-').downcase
else
File.basename(URI(permalink).path, '.*')
end
end
 
def file_name
%{#{creation_date}-#{param_name}.html}
end
 
def header
[
'---',
%{layout: post},
%{title: "#{title}"},
%{date: #{creation_datetime}},
%{comments: false},
categories,
'---'
].compact.join("\n")
end
 
def categories
terms = @node.search('category[scheme="http://www.blogger.com/atom/ns#"]')
unless Array(terms).empty?
[
'categories:',
terms.map{ |t| t.attr('term') && " - #{t.attr('term')}" }.compact.join("\n"),
].join("\n")
end
end
end
 
class Comment
def initialize(node)
@node = node
end
 
def author
@node.search('author name').first.content
end
 
def content
@node.search('content').first.content
end
end
 
entries = {}
 
doc.search('entry').each do |entry|
add entry
end
 
puts "** Writing PUBLISHED posts"
FileUtils.rm_rf('_posts')
Dir.mkdir("_posts") unless File.directory?("_posts")
 
@posts.each do |id, post|
write post
end
 
puts "\n"
puts "** Writing DRAFT posts"
 
FileUtils.rm_rf('_drafts')
Dir.mkdir("_drafts") unless File.directory?("_drafts")
 
@drafts.each do |id, post|
write post, '_drafts'
end

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.