Skip to content

Instantly share code, notes, and snippets.

@talarczykco
Created March 14, 2015 16:08
Show Gist options
  • Save talarczykco/ce53f8a2c7ef1b1f1499 to your computer and use it in GitHub Desktop.
Save talarczykco/ce53f8a2c7ef1b1f1499 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'mysql2'
require 'stringex'
require 'preamble'
outdir = ARGV[0]
if outdir.nil? || !File.directory?(outdir)
puts 'Usage: wordpress-to-jekyll.rb <outdir>'
exit 1
end
client = Mysql2::Client.new(:host => 'YOURHOST',
:username => 'YOURUSER',
:password => 'YOURPASSWORD',
:database => 'YOURDATABASE')
query_posts = client.query """
select ID,
guid,
post_category,
post_content,
post_date,
post_title
from wp_posts
where post_type = 'post'
and post_status = 'publish'
order by ID
"""
query_categories = client.query """
SELECT p.ID,
t.name
FROM wp_posts AS p
INNER JOIN wp_postmeta AS pm ON p.ID = pm.post_id
INNER JOIN wp_term_relationships AS tr ON p.ID = tr.object_id
INNER JOIN wp_term_taxonomy AS tt ON tr.term_taxonomy_id = tt.term_taxonomy_id
INNER JOIN wp_terms AS t ON tt.term_id = t.term_id
WHERE p.post_type = 'post'
AND p.post_status = 'publish'
AND tt.taxonomy = 'category'
GROUP BY p.ID, t.name
"""
categories = {}
query_categories.each do |row|
categories[row['ID']] ||= []
categories[row['ID']].push(row['name'])
end
query_posts.each do |row|
if row['post_content'].match(/\S/) and row['post_title'].match(/\S/)
# quotes in titles confuse front preamble and front matter
row['post_title'].gsub!('"', '')
row['post_title'].gsub!("'", '')
# link to local images
row['post_content'].gsub!(%r{https?://www.example.com/wp-content/uploads/}, '/images/')
# re-code incorrectly re-encoded(?) UTF-8 strings
row['post_content'].gsub!('ä', 'ä')
row['post_content'].gsub!('é', 'é')
row['post_content'].gsub!('ö', 'ö')
row['post_content'].gsub!('--', '–')
row['post_content'].gsub!('–', '–')
row['post_content'].gsub!('—', '—')
row['post_content'].gsub!('‘', '‘')
row['post_content'].gsub!('’', '’')
row['post_content'].gsub!('“', '“')
row['post_content'].gsub!('â€', '”')
# remove carriage returns
row['post_content'].gsub!("\r", '')
# construct YYYY-MM-DD-foo-bar-baz.md slug
datestr = row['post_date'].strftime('%Y-%m-%d')
slugstr = row['post_title'].to_url
row['slug'] = [ datestr, slugstr ].join('-')
# write files to current directory
metadata = { 'id' => row['ID'],
'categories' => categories[row['ID']],
'title' => row['post_title'],
'layout' => 'post',
'wpurl' => "http://www.example.com/?p=#{ row['ID'] }",
'slug' => row['slug']
}
file = Preamble.new(metadata, row['post_content'])
file.save(File.join(outdir, row['slug'] + '.md'))
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment