Created
March 14, 2015 16:08
-
-
Save talarczykco/ce53f8a2c7ef1b1f1499 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'mysql2' | |
require 'stringex' | |
require 'preamble' | |
outdir = ARGV[0] | |
if outdir.nil? || !File.directory?(outdir) | |
puts 'Usage: wordpress-to-jekyll.rb <outdir>' | |
exit 1 | |
end | |
client = Mysql2::Client.new(:host => 'YOURHOST', | |
:username => 'YOURUSER', | |
:password => 'YOURPASSWORD', | |
:database => 'YOURDATABASE') | |
query_posts = client.query """ | |
select ID, | |
guid, | |
post_category, | |
post_content, | |
post_date, | |
post_title | |
from wp_posts | |
where post_type = 'post' | |
and post_status = 'publish' | |
order by ID | |
""" | |
query_categories = client.query """ | |
SELECT p.ID, | |
t.name | |
FROM wp_posts AS p | |
INNER JOIN wp_postmeta AS pm ON p.ID = pm.post_id | |
INNER JOIN wp_term_relationships AS tr ON p.ID = tr.object_id | |
INNER JOIN wp_term_taxonomy AS tt ON tr.term_taxonomy_id = tt.term_taxonomy_id | |
INNER JOIN wp_terms AS t ON tt.term_id = t.term_id | |
WHERE p.post_type = 'post' | |
AND p.post_status = 'publish' | |
AND tt.taxonomy = 'category' | |
GROUP BY p.ID, t.name | |
""" | |
categories = {} | |
query_categories.each do |row| | |
categories[row['ID']] ||= [] | |
categories[row['ID']].push(row['name']) | |
end | |
query_posts.each do |row| | |
if row['post_content'].match(/\S/) and row['post_title'].match(/\S/) | |
# quotes in titles confuse front preamble and front matter | |
row['post_title'].gsub!('"', '') | |
row['post_title'].gsub!("'", '') | |
# link to local images | |
row['post_content'].gsub!(%r{https?://www.example.com/wp-content/uploads/}, '/images/') | |
# re-code incorrectly re-encoded(?) UTF-8 strings | |
row['post_content'].gsub!('ä', 'ä') | |
row['post_content'].gsub!('é', 'é') | |
row['post_content'].gsub!('ö', 'ö') | |
row['post_content'].gsub!('--', '–') | |
row['post_content'].gsub!('–', '–') | |
row['post_content'].gsub!('—', '—') | |
row['post_content'].gsub!('‘', '‘') | |
row['post_content'].gsub!('’', '’') | |
row['post_content'].gsub!('“', '“') | |
row['post_content'].gsub!('â€', '”') | |
# remove carriage returns | |
row['post_content'].gsub!("\r", '') | |
# construct YYYY-MM-DD-foo-bar-baz.md slug | |
datestr = row['post_date'].strftime('%Y-%m-%d') | |
slugstr = row['post_title'].to_url | |
row['slug'] = [ datestr, slugstr ].join('-') | |
# write files to current directory | |
metadata = { 'id' => row['ID'], | |
'categories' => categories[row['ID']], | |
'title' => row['post_title'], | |
'layout' => 'post', | |
'wpurl' => "http://www.example.com/?p=#{ row['ID'] }", | |
'slug' => row['slug'] | |
} | |
file = Preamble.new(metadata, row['post_content']) | |
file.save(File.join(outdir, row['slug'] + '.md')) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment