#!/usr/bin/env ruby | |
# Input: WordPress XML export file. | |
# Outputs: a series of Textile files ready to be included in a Jekyll site, | |
# and comments.yml which contains all approved comments with metadata which | |
# can be used for a Disqus import. | |
require 'rubygems' | |
require 'hpricot' | |
require 'clothred' | |
require 'time' | |
require 'yaml' | |
WORDPRESS_XML_FILE_PATH = "/home/marko/Documents/wordpress.2010-01-01.xml" | |
OUTPUT_PATH = "/tmp/export" | |
ORIGINAL_DOMAIN = "http://example.com" | |
class Post | |
attr_accessor :title, :post_date, :created_at, :slug, :url, :content, :textile_content | |
attr_accessor :hpricot_element | |
def initialize(item) | |
@hpricot_element = item | |
@title = item.search("title").first.inner_text | |
@post_date = item.search("wp:post_date").first.inner_text | |
@created_at = Date.parse(post_date) | |
@slug = item.search("wp:post_name").first.inner_text | |
@url = ORIGINAL_DOMAIN + "/" + item.search("wp:post_date_gmt").first.inner_text[0, 10].gsub(/-/, "/") + "/" + @slug | |
@content = item.search("content:encoded").first.inner_text | |
text = ClothRed.new(content) | |
@textile_content = text.to_textile | |
end | |
def to_jekyll | |
buf = "" | |
buf << "---\n" | |
buf << "layout: post\n" | |
buf << "title: #{title}\n" | |
buf << "---\n\n" | |
buf << textile_content | |
end | |
def save(root_path) | |
File.open("#{root_path}/#{created_at}-#{slug}.textile", "w") { |file| file.write self.to_jekyll } | |
self | |
end | |
def save_comments(path) | |
comment_elements = @hpricot_element.search("wp:comment").reject do |c| | |
c.search("wp:comment_approved").inner_text != "1" | |
end | |
File.open("#{path}/comments.yml", "a") do |yaml_file| | |
comment_elements.collect { |el| Comment.new(self, el) }.each { |comment| comment.write_to yaml_file } | |
end | |
end | |
class << self | |
def parse(element, path) | |
return nil unless element.is_a?(Hpricot::Elem) | |
post = Post.new(element) | |
post.save(path) | |
end | |
end | |
end | |
class Comment | |
attr_accessor :author_name, :author_email, :author_url, :content, :post | |
def initialize(post, element) | |
@post_url = post.url + "/" | |
@author_name = element.search("wp:comment_author").first.inner_text | |
@author_email = element.search("wp:comment_author_email").first.inner_text | |
@author_url = element.search("wp:comment_author_url").first.inner_text | |
@content = element.search("wp:comment_content").first.inner_text || "" | |
comment_date = element.search("wp:comment_date_gmt").first.inner_text | |
@created_at = Time.parse("#{comment_date} GMT") | |
end | |
def write_to(file) | |
file.write self.to_yaml + "\n" unless @content.size == 0 | |
end | |
end | |
# main | |
doc = Hpricot(File.open(WORDPRESS_XML_FILE_PATH)) | |
File.open("#{OUTPUT_PATH}/comments.yml", "w") { |f| } | |
(doc / "item").each do |item| | |
post = Post.parse(item, OUTPUT_PATH) | |
post.save_comments(OUTPUT_PATH) | |
end |
#!/usr/bin/env ruby | |
# Takes comments.yml generated by wordpressxml2jekyll.rb and posts them to your Disqus forum. | |
# sudo gem install disqus | |
require 'disqus' | |
require 'disqus/api' | |
COMMENTS_YAML_FILE = '/tmp/export/comments.yml' | |
Disqus::defaults[:api_key] = "N4wWciM45UAfBJe6QbylR0mfQ340WH7kdEKlBi7q5Tb0QeAKHOxP7wC6W5WyJWWz" | |
forum_id = Disqus::Api.get_forum_list["message"].first["id"] | |
fak = Disqus::Api.get_forum_api_key(:forum_id => forum_id)["message"] | |
File.open(COMMENTS_YAML_FILE) do |yf| | |
YAML.each_document( yf ) do |c| | |
thread = Disqus::Api.get_thread_by_url(:forum_api_key => fak, :url => c.ivars["post_url"]) | |
Disqus::Api.create_post(:forum_api_key => fak, | |
:thread_id => thread["message"]["id"], | |
:message => c.ivars["content"], | |
:author_name => c.ivars["author_name"], | |
:author_email => c.ivars["author_email"], | |
:author_url => c.ivars["author_url"], | |
:created_at => Time.parse(c.ivars["created_at"].to_s).strftime("%Y-%m-%dT%H:%M")) | |
end | |
end |
This comment has been minimized.
This comment has been minimized.
This looks awesome! |
This comment has been minimized.
This comment has been minimized.
@neozheng I don't know, my blog posts don't have them. But I think that it would. If you try it please let me know. |
This comment has been minimized.
This comment has been minimized.
@markoa just tried. perfectly transformed the file contents but not the names. so much better than the built-in converter which breaks everything. |
This comment has been minimized.
This comment has been minimized.
@neozhang since slug and content are read pretty much the same way I suspect something special needs to be done in the File.open call, but I haven't really worked with such issues before. Feel free to fork the gist and send more comments. |
This comment has been minimized.
This comment has been minimized.
The title in the YAML Front Matter is not properly escaped. If the title of the post is for example "command not found: clear" the YAML Front Matter becomes
but and it should be
I fixed that in my gist http://gist.github.com/500506 |
This comment has been minimized.
Amazingly useful - thank you!