Skip to content

Instantly share code, notes, and snippets.

@lukeholder
Created May 27, 2013 10:20
Show Gist options
  • Save lukeholder/5656358 to your computer and use it in GitHub Desktop.
Save lukeholder/5656358 to your computer and use it in GitHub Desktop.
ruby script to get at a wordpress export and put it into a sqlite db
#!/bin/env ruby
# encoding: utf-8
require 'rubygems'
require 'bundler/setup'
# require your gems as usual
require 'sequel'
require 'nokogiri'
require 'time'
require 'active_support/core_ext/string'
# require 'reverse_markdown'
def simple_format(text)
text = '' if text.nil?
start_tag = "<p>"
text = text.to_str
text.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
text.gsub!(/\n\n+/, "</p>\n\n#{start_tag}") # 2+ newline -> paragraph
text.gsub!(/([^>])(\n)([^\n<])/, '\1<br>\2\3')
text.insert 0, start_tag
text.concat("</p>")
end
# connect to an in-memory database
DB = Sequel.sqlite('recipes.db')
DB.run("DROP TABLE 'recipes'")
puts "Creating recipes table"
DB.create_table :recipes do
primary_key :id
String :title
Fixnum :title_length
String :tags, :text => true
String :raw_body, :text => true
String :status
DateTime :date_time
String :old_url_a
String :old_url_b
String :slug
end
# create an items table
recipes = DB[:recipes]
f = File.open("tenina.wordpress.2013-05-27.xml")
items = Nokogiri::XML(f).xpath("//channel//item")
items.each do |item|
post_id = item.at_xpath('wp:post_id').text.to_i
title = item.at_xpath('title').text.to_s.titleize
categories = (item/"category[@domain=category]").map{|c| c.inner_text.singularize.titleize}.reject{|c| c == 'Uncategorized'}.uniq
tags = (item/"category[@domain=post_tag]").map{|t| t.inner_text.singularize.titleize}.uniq
tags << categories
tags = tags.uniq
tags = tags.join(',')
type = item.at_xpath('wp:post_type').text.singularize.titleize
status = item.at_xpath('wp:status').text.singularize.titleize
body = simple_format(item.at_xpath("content:encoded").text)
date = item.at_xpath('wp:post_date').text
old_url_a = item.at_xpath('link').text.to_s
old_url_b = item.at_xpath('guid').text.to_s
recipes.insert(
:id => post_id,
:title => title,
:status => status,
:raw_body => body,
:title_length => title.length,
:tags => tags,
:date_time => date,
:old_url_a => old_url_a,
:old_url_b => old_url_b,
:slug
)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment