amazedkoumei/import.rb

## import.rb
require 'rubygems'
require 'nokogiri'
require 'fileutils'
require 'date'

# usage: ruby import.rb my-blog.xml
# my-blog.xml is a file from Settings -> Basic -> Export in blogger.

data = File.read ARGV[0]
doc = Nokogiri::XML(data)

@@count = 0
@posts = {}

def add(node)
  id = node.search('id').first.content
  type = node.search('category').first.attr('term').split('#').last
  case type
  when 'post'
    @posts[id] = Post.new(node)
  when 'comment'
    reply_to = node.children.find {|c| c.name == 'in-reply-to' }
    post_id = reply_to.attr('ref')
    #post_id = node.search('thr').first.attr('ref')
    @posts[post_id].add_comment(Comment.new(node))
  #when 'template', 'settings'
  when 'template', 'settings', 'page'
  else
    raise 'dunno '+type
  end
end

def write(post)
  puts "Post [#{post.title}] has #{post.comments.count} comments"

  puts "writing #{post.file_name}"
  return if post.file_name.nil?
  File.open(File.join('_posts', post.file_name), 'w') do |file|
    file.write post.header
    file.write "\n\n"
    #file.write "<h1>{{ page.title }}</h1>\n"
    file.write "<div class='post'>\n"
    file.write post.content
    file.write "</div>\n"
    file.write "<h2>Comments</h2>\n"
    file.write "<div class='comments'>\n"
    post.comments.reverse_each do |comment|
      file.write "<div class='comment'>\n"
      file.write "<div class='author'>"
      file.write comment.author
      file.write "&nbsp;<span>"
      file.write comment.creation_datetime.strftime("%Y/%m/%d %H:%M")
      file.write "</span>\n"
      file.write "</div>\n"
      file.write "<div class='content'>\n"
      file.write comment.content
      file.write "</div>\n"
      file.write "</div>\n"
      file.write "<hr />\n"
    end
    file.write "</div>\n"
  end
end

class Post
  attr_reader :comments
  def initialize(node)
    @node = node
    @comments = []
  end

  def add_comment(comment)
    @comments.unshift comment
  end

  def title
    @node.search('title').first.content
  end

  def content
    @node.search('content').first.content
  end

  def creation_date
    creation_datetime.strftime("%Y-%m-%d")
  end

  def creation_datetime
    #Date.parse(@node.search('published').first.content)
    DateTime.parse(@node.search('published').first.content)
  end

  def labels
    arr = []
    @node.search('category').each do |e|
      v = e.attribute("term").value
      if v != "http://schemas.google.com/blogger/2008/kind#post"
        arr << v
      end
    end
    arr.join(",")
  end

  def file_name
    #p "URL " + @node.search('link[@rel="alternate"]').first
    url_node = @node.search('link[@rel="alternate"]')
    unless url_node.empty?
      t = url_node.attribute("href").value
      t.slice!(/.*\//)
    else
      return nil
    end
=begin
    t = title.strip
    t = t.gsub(/\./, "")
    t = t.split(/[^a-zA-Z0-9]+/).join('-').downcase
    t = t.sub(/^-/, "")
    if t == ""
      t = "blog-post"
    end
=end
    #%{#{creation_date}-#{param_name}.html}
    %{#{creation_date}-#{t}}
    #%{#{t}.html}
  end

  def header
    #p labels
    [
      '---',
      %{layout: post},
      %{title: #{escape_yaml(title)}},
      %{date: #{creation_datetime.strftime("%Y/%m/%d %H:%M")}},
      %{tags: [#{labels}]},
      %{comments: false},
      '---'
    ].join("\n")
  end

  def escape_yaml(str)
=begin
    str = str.gsub(/\[/, "\\\\[")
    str = str.gsub(/\]/, "\\\\]")
    str = str.gsub(/-/, "\\\\-")
    str = str.gsub(/:/, "\\\\:")
=end
    str = "'#{str}'"
  end
end

class Comment
  def initialize(node)
    @node = node
  end

  def author
    @node.search('author name').first.content
  end

  def content
    @node.search('content').first.content
  end

  def creation_date
    creation_datetime.strftime("%Y-%m-%d")
  end

  def creation_datetime
    #Date.parse(@node.search('published').first.content)
    DateTime.parse(@node.search('published').first.content)
  end
end

entries = {}

doc.search('entry').each do |entry|
  add entry
end

FileUtils.rm_rf('_posts')
Dir.mkdir("_posts") unless File.directory?("_posts")

@posts.each do |id, post|
  write post
end
	require 'rubygems'
	require 'nokogiri'
	require 'fileutils'
	require 'date'

	# usage: ruby import.rb my-blog.xml
	# my-blog.xml is a file from Settings -> Basic -> Export in blogger.

	data = File.read ARGV[0]
	doc = Nokogiri::XML(data)

	@@count = 0
	@posts = {}

	def add(node)
	id = node.search('id').first.content
	type = node.search('category').first.attr('term').split('#').last
	case type
	when 'post'
	@posts[id] = Post.new(node)
	when 'comment'
	reply_to = node.children.find {\|c\| c.name == 'in-reply-to' }
	post_id = reply_to.attr('ref')
	#post_id = node.search('thr').first.attr('ref')
	@posts[post_id].add_comment(Comment.new(node))
	#when 'template', 'settings'
	when 'template', 'settings', 'page'
	else
	raise 'dunno '+type
	end
	end

	def write(post)
	puts "Post [#{post.title}] has #{post.comments.count} comments"

	puts "writing #{post.file_name}"
	return if post.file_name.nil?
	File.open(File.join('_posts', post.file_name), 'w') do \|file\|
	file.write post.header
	file.write "\n\n"
	#file.write "<h1>{{ page.title }}</h1>\n"
	file.write "<div class='post'>\n"
	file.write post.content
	file.write "</div>\n"
	file.write "<h2>Comments</h2>\n"
	file.write "<div class='comments'>\n"
	post.comments.reverse_each do \|comment\|
	file.write "<div class='comment'>\n"
	file.write "<div class='author'>"
	file.write comment.author
	file.write " <span>"
	file.write comment.creation_datetime.strftime("%Y/%m/%d %H:%M")
	file.write "</span>\n"
	file.write "</div>\n"
	file.write "<div class='content'>\n"
	file.write comment.content
	file.write "</div>\n"
	file.write "</div>\n"
	file.write "<hr />\n"
	end
	file.write "</div>\n"
	end
	end

	class Post
	attr_reader :comments
	def initialize(node)
	@node = node
	@comments = []
	end

	def add_comment(comment)
	@comments.unshift comment
	end

	def title
	@node.search('title').first.content
	end

	def content
	@node.search('content').first.content
	end

	def creation_date
	creation_datetime.strftime("%Y-%m-%d")
	end

	def creation_datetime
	#Date.parse(@node.search('published').first.content)
	DateTime.parse(@node.search('published').first.content)
	end

	def labels
	arr = []
	@node.search('category').each do \|e\|
	v = e.attribute("term").value
	if v != "http://schemas.google.com/blogger/2008/kind#post"
	arr << v
	end
	end
	arr.join(",")
	end

	def file_name
	#p "URL " + @node.search('link[@rel="alternate"]').first
	url_node = @node.search('link[@rel="alternate"]')
	unless url_node.empty?
	t = url_node.attribute("href").value
	t.slice!(/.*\//)
	else
	return nil
	end
	=begin
	t = title.strip
	t = t.gsub(/\./, "")
	t = t.split(/[^a-zA-Z0-9]+/).join('-').downcase
	t = t.sub(/^-/, "")
	if t == ""
	t = "blog-post"
	end
	=end
	#%{#{creation_date}-#{param_name}.html}
	%{#{creation_date}-#{t}}
	#%{#{t}.html}
	end

	def header
	#p labels
	[
	'---',
	%{layout: post},
	%{title: #{escape_yaml(title)}},
	%{date: #{creation_datetime.strftime("%Y/%m/%d %H:%M")}},
	%{tags: [#{labels}]},
	%{comments: false},
	'---'
	].join("\n")
	end

	def escape_yaml(str)
	=begin
	str = str.gsub(/\[/, "\\\\[")
	str = str.gsub(/\]/, "\\\\]")
	str = str.gsub(/-/, "\\\\-")
	str = str.gsub(/:/, "\\\\:")
	=end
	str = "'#{str}'"
	end
	end

	class Comment
	def initialize(node)
	@node = node
	end

	def author
	@node.search('author name').first.content
	end

	def content
	@node.search('content').first.content
	end

	def creation_date
	creation_datetime.strftime("%Y-%m-%d")
	end

	def creation_datetime
	#Date.parse(@node.search('published').first.content)
	DateTime.parse(@node.search('published').first.content)
	end
	end

	entries = {}

	doc.search('entry').each do \|entry\|
	add entry
	end

	FileUtils.rm_rf('_posts')
	Dir.mkdir("_posts") unless File.directory?("_posts")

	@posts.each do \|id, post\|
	write post
	end