Created
September 26, 2011 23:56
-
-
Save benben/1243825 to your computer and use it in GitHub Desktop.
pimped octopress wordpress migrator script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'sequel' | |
require 'fileutils' | |
require 'yaml' | |
require 'active_support/inflector' | |
# pimped by http://github.com/benben | |
# original script from here: https://github.com/mojombo/jekyll/blob/master/lib/jekyll/migrators/wordpress.rb | |
# some parts stolen from here: http://vitobotta.com/how-to-migrate-from-wordpress-to-jekyll/#importing-from-wordpress | |
# NOTE: This converter requires Sequel and the MySQL gems. | |
# The MySQL gem can be difficult to install on OS X. Once you have MySQL | |
# installed, running the following commands should work: | |
# $ sudo gem install sequel | |
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config | |
module Jekyll | |
module WordPress | |
def self.process(dbname, user, pass, host = 'localhost', table_prefix = 'wp_') | |
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8') | |
FileUtils.mkdir_p("source/_posts") | |
# Reads a MySQL database via Sequel and creates a post file for each | |
# post in wp_posts that has post_status = 'publish'. This restriction is | |
# made because 'draft' posts are not guaranteed to have valid dates. | |
query = "SELECT post_title, \ | |
post_name, \ | |
post_date, \ | |
post_content, \ | |
post_excerpt, \ | |
post_author, \ | |
post_status, \ | |
ID, \ | |
guid \ | |
FROM #{table_prefix}posts \ | |
WHERE post_status = 'publish' AND \ | |
post_type = 'post'" | |
categories_and_tags_query = <<-EOS | |
SELECT t.taxonomy, term.name, term.slug | |
FROM #{table_prefix}term_relationships AS tr | |
INNER JOIN #{table_prefix}term_taxonomy AS t ON t.term_taxonomy_id = tr.term_taxonomy_id | |
INNER JOIN #{table_prefix}terms AS term ON term.term_id = t.term_id | |
WHERE tr.object_id = %d | |
ORDER BY tr.term_order | |
EOS | |
db[query].each do |post| | |
# Get required fields and construct Jekyll compatible name. | |
title = post[:post_title] | |
slug = post[:post_name] | |
date = post[:post_date] | |
status = post[:post_status] | |
author = db["SELECT display_name FROM #{table_prefix}users WHERE ID = #{post[:post_author]} LIMIT 1"][:display_name][:display_name] | |
content = post[:post_content] | |
content.gsub!(/<pre lang="(.+)">(\n|$|\s)/, '``` \1') | |
content.gsub!(/<pre lang="(.+)">/, '``` \1' + "\n") | |
content.gsub!(/(\n|$|\s)<\/pre>/, '\1```') | |
content.gsub!(/<\/pre>/, "\n" + '```') | |
name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day, slug] | |
categories = [] | |
post_tags = [] | |
db[categories_and_tags_query % post[:ID]].each do |category_or_tag| | |
eval(category_or_tag[:taxonomy].pluralize) << category_or_tag[:name] | |
end | |
# Get the relevant fields as a hash, delete empty fields and convert | |
# to YAML for the header. | |
data = { | |
'layout' => 'post', | |
'title' => title.to_s, | |
'excerpt' => post[:post_excerpt].to_s, | |
'wordpress_id' => post[:ID], | |
'wordpress_url' => post[:guid], | |
'date' => date, | |
'comments' => true, | |
'author' => author, | |
'categories' => categories, | |
'tags' => post_tags | |
}.delete_if { |k,v| v.nil? || v == '' }.to_yaml | |
# Write out the data and content to file | |
File.open("source/#{status == 'publish' ? '_posts' : '_drafts'}/#{name}", "w") do |f| | |
f.puts data | |
f.puts "---" | |
f.puts content | |
end | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment