Skip to content

Instantly share code, notes, and snippets.

@d6rkaiz
Forked from melwin/wp-xml-import.rb
Created March 11, 2012 05:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save d6rkaiz/2015210 to your computer and use it in GitHub Desktop.
Save d6rkaiz/2015210 to your computer and use it in GitHub Desktop.
# -*- encoding: utf-8 -*-
require 'fileutils'
require 'date'
require 'yaml'
require 'rexml/document'
include REXML
doc = Document.new File.new(ARGV[0])
FileUtils.mkdir_p "_posts"
doc.elements.each("rss/channel/item[wp:status = 'publish' and wp:post_type = 'post']") do |e|
post = e.elements
slug = post['wp:post_name'].text
date = DateTime.parse(post['wp:post_date'].text)
name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day, slug]
date_string = "#{date.year}-#{date.month}-#{date.day} #{date.hour}:#{date.min}"
# content
content = post['content:encoded'].text.encode("UTF-8")
content_array = []
bq = 0
content.split(/\n/).each do |t|
# code
t = t.gsub(/(\[|<)(php|code)(\]|>)(.*)(\[|<)\/(php|code)(\]|>)/, "\n```\n" + '\4' + "\n```\n")
t = t.gsub(/(\[|<)(php|code)(\]|>)(.*)$/, "\n```\n" + '\4')
t = t.gsub(/\[(php|code) ([^=]*)=\"([^\"]*)\"\](.*)\[\/(php|code)\]/, "\n```\n" + '\4'+"\n```")
t = t.gsub(/\[(php|code) ([^=]*)=\"([^\"]*)\"\](.*)$/, "\n```\n" + '\4')
t = t.gsub(/^(.*)(\[|<)\/(php|code)(\]|>)/, '\1'+"\n```")
t = t.gsub(/^(\s*)<li>([^<]*)<\/li>/, '* \2')
if t =~ /^<\/?ul>$/
next
end
if t =~ /^<\/?ol>$/
next
end
content_array << t
end
content = content_array.flatten.join(" \n")
content = content.gsub(/<p>(.*)<\/p>/, '\1')
content = content.gsub(/&lt;/, '<')
content = content.gsub(/&gt;/, '>')
content = content.gsub(/&quot;/, '"')
content = content.gsub(/<br \/>/, ' '+"\n")
content = content.gsub(/<strong>([^<]*)<\/strong>/, '**\1**')
content = content.gsub(/<b>([^<]*)<\/b>/, '**\1**')
content = content.gsub(/<a href=[\"\']([^\"\']*)[\"\']>([^<]*)<\/a>/, '[\2](\1)')
(1..5).each do |i|
content = content.gsub(/<h#{i}>([^<]*)<\/h#{i}>/, ('#'*i) + ' \1')
content = content.gsub(/<h#{i} class=([^>]*)>([^<]*)<\/h#{i}>/, ('#'*i) + ' \2')
end
# categories & tags
category_string = ""
tags_array = []
post.each("category") do |e|
if e.attributes["domain"] == "category"
category_string = e.attributes["nicename"].to_s
end
if e.attributes["domain"] == "post_tag"
tags_array << e.attributes["nicename"].to_s
end
end
title_text = post['title'].text
puts "Converting: #{name}"
data = {
'layout' => 'post',
'title' => title_text,
'date' => date_string,
'comments' => false,
'categories' => category_string,
'tags' => tags_array,
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
File.open("_posts/#{name}", "w") do |f|
f.puts data
f.puts "---"
f.puts content
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment