Skip to content

Instantly share code, notes, and snippets.

@amazedkoumei
Forked from ngauthier/import.rb
Last active December 19, 2015 22:29
Show Gist options
  • Save amazedkoumei/6027433 to your computer and use it in GitHub Desktop.
Save amazedkoumei/6027433 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'nokogiri'
require 'fileutils'
require 'date'
# usage: ruby import.rb my-blog.xml
# my-blog.xml is a file from Settings -> Basic -> Export in blogger.
data = File.read ARGV[0]
doc = Nokogiri::XML(data)
@@count = 0
@posts = {}
def add(node)
id = node.search('id').first.content
type = node.search('category').first.attr('term').split('#').last
case type
when 'post'
@posts[id] = Post.new(node)
when 'comment'
reply_to = node.children.find {|c| c.name == 'in-reply-to' }
post_id = reply_to.attr('ref')
#post_id = node.search('thr').first.attr('ref')
@posts[post_id].add_comment(Comment.new(node))
#when 'template', 'settings'
when 'template', 'settings', 'page'
else
raise 'dunno '+type
end
end
def write(post)
puts "Post [#{post.title}] has #{post.comments.count} comments"
puts "writing #{post.file_name}"
return if post.file_name.nil?
File.open(File.join('_posts', post.file_name), 'w') do |file|
file.write post.header
file.write "\n\n"
#file.write "<h1>{{ page.title }}</h1>\n"
file.write "<div class='post'>\n"
file.write post.content
file.write "</div>\n"
file.write "<h2>Comments</h2>\n"
file.write "<div class='comments'>\n"
post.comments.reverse_each do |comment|
file.write "<div class='comment'>\n"
file.write "<div class='author'>"
file.write comment.author
file.write "&nbsp;<span>"
file.write comment.creation_datetime.strftime("%Y/%m/%d %H:%M")
file.write "</span>\n"
file.write "</div>\n"
file.write "<div class='content'>\n"
file.write comment.content
file.write "</div>\n"
file.write "</div>\n"
file.write "<hr />\n"
end
file.write "</div>\n"
end
end
class Post
attr_reader :comments
def initialize(node)
@node = node
@comments = []
end
def add_comment(comment)
@comments.unshift comment
end
def title
@node.search('title').first.content
end
def content
@node.search('content').first.content
end
def creation_date
creation_datetime.strftime("%Y-%m-%d")
end
def creation_datetime
#Date.parse(@node.search('published').first.content)
DateTime.parse(@node.search('published').first.content)
end
def labels
arr = []
@node.search('category').each do |e|
v = e.attribute("term").value
if v != "http://schemas.google.com/blogger/2008/kind#post"
arr << v
end
end
arr.join(",")
end
def file_name
#p "URL " + @node.search('link[@rel="alternate"]').first
url_node = @node.search('link[@rel="alternate"]')
unless url_node.empty?
t = url_node.attribute("href").value
t.slice!(/.*\//)
else
return nil
end
=begin
t = title.strip
t = t.gsub(/\./, "")
t = t.split(/[^a-zA-Z0-9]+/).join('-').downcase
t = t.sub(/^-/, "")
if t == ""
t = "blog-post"
end
=end
#%{#{creation_date}-#{param_name}.html}
%{#{creation_date}-#{t}}
#%{#{t}.html}
end
def header
#p labels
[
'---',
%{layout: post},
%{title: #{escape_yaml(title)}},
%{date: #{creation_datetime.strftime("%Y/%m/%d %H:%M")}},
%{tags: [#{labels}]},
%{comments: false},
'---'
].join("\n")
end
def escape_yaml(str)
=begin
str = str.gsub(/\[/, "\\\\[")
str = str.gsub(/\]/, "\\\\]")
str = str.gsub(/-/, "\\\\-")
str = str.gsub(/:/, "\\\\:")
=end
str = "'#{str}'"
end
end
class Comment
def initialize(node)
@node = node
end
def author
@node.search('author name').first.content
end
def content
@node.search('content').first.content
end
def creation_date
creation_datetime.strftime("%Y-%m-%d")
end
def creation_datetime
#Date.parse(@node.search('published').first.content)
DateTime.parse(@node.search('published').first.content)
end
end
entries = {}
doc.search('entry').each do |entry|
add entry
end
FileUtils.rm_rf('_posts')
Dir.mkdir("_posts") unless File.directory?("_posts")
@posts.each do |id, post|
write post
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment