public
Last active

Import a WordPress database and generate markdown files for Jekyll

  • Download Gist
wordpress_importer.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
# based on the import script by icebreaker, which is based on mojombo's
# https://github.com/mojombo/jekyll/blob/master/lib/jekyll/migrators/wordpress.rb
# https://gist.github.com/303570
# edited to rewrite image URLs to use my CloudFront URL
 
require 'rubygems'
require 'sequel'
require 'fileutils'
 
# $ export DB=my_wpdb
# $ export USER=dbuser
# $ export PASS=dbpass
# $ ruby -r './lib/jekyll/migrators/wordpress' -e 'Jekyll::WordPress.process( "#{ENV["DB"]}", "#{ENV["USER"]}", "#{ENV["PASS"]}")'
 
# NOTE: This converter requires Sequel and the MySQL gems.
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
# installed, running the following commands should work:
# $ sudo gem install sequel
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
 
module Jekyll
module WordPress
 
# Reads a MySQL database via Sequel and creates a post file for each
# post in wp_posts that has post_status = 'publish'.
# This restriction is made because 'draft' posts are not guaranteed to
# have valid dates.
QUERY = "select post_title, post_name, post_date, post_content, post_excerpt, ID, guid from wp_posts where post_status = 'publish' and post_type = 'post'"
# Fetch all tags for a given POST ID
TAGS_QUERY = "select tm.term_id,tm.name from wp_term_relationships tr
inner join wp_term_taxonomy tt on tr.term_taxonomy_id = tt.term_taxonomy_id
inner join wp_terms tm on tm.term_id=tt.term_id
where tr.object_id=%d and tt.taxonomy = 'post_tag'";
 
def self.process(dbname = '', user='', pass='', host = 'localhost', domain = '')
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host)
 
FileUtils.mkdir_p "_posts"
 
db[QUERY].each do |post|
# Get required fields and construct Jekyll compatible name
title = post[:post_title]
slug = post[:post_name]
date = post[:post_date]
content = post[:post_content]
name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day, slug]
 
# Get associated taxonomy terms (tags)
# We replace + with nothing and transform to lower case
# TODO: figure out what other characters would fuck up YAML
tags = []
db[TAGS_QUERY % post[:ID]].each do |tag|
tags << tag[:name].to_s.gsub('+','').downcase
end
 
# Process content to rewrite some URLs
if domain
content = self.transformUrls(domain,content)
end
 
# Get the relevant fields as a hash, delete empty fields and convert
# to YAML for the header
data = {
'layout' => 'post',
'title' => title.to_s,
'excerpt' => post[:post_excerpt].to_s,
'tags' => tags
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
 
# Write out the data and content to file
File.open("_posts/#{name}", "w") do |f|
f.puts data
f.puts "---"
f.puts content
end
end
 
end
 
# Process the content and replace URLs pointing to wp-content/uploads/
# with CloudFront CNAME'd URL turbo.paulstamatiou.com/uploads/
def self.transformUrls(domain,content)
baseurl = "%s/wp-content/uploads/" % domain
return content.gsub(baseurl,"turbo.paulstamatiou.com/uploads/")
end
 
end
end

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.