Skip to content

Instantly share code, notes, and snippets.

@bashcoder
Forked from czottmann/wordpress-to-tumblr.rb
Created March 12, 2013 23:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bashcoder/5147976 to your computer and use it in GitHub Desktop.
Save bashcoder/5147976 to your computer and use it in GitHub Desktop.
#!/usr/local/bin/ruby
require "hpricot"
require "net/http"
require "uri"
TUMBLR_USER = "user@example.com"
TUMBLR_PASS = "thepasswordlol"
TUMBLR_DOMAIN = "mytumblrsubdomain.tumblr.com"
ORIGINAL_DOMAIN = "http://myoriginaldomain.example.com/"
FILE_URLS_TO_TIDS = "old-new.txt"
ported_urls = File.readlines(FILE_URLS_TO_TIDS).collect {|l| l.strip.match(/^(.*);/)[1] }
doc = Hpricot( File.open("wordpress.2009-10-10.xml") )
(doc/"item").each do |item|
comments = item.search("wp:comment_approved").reject {|ct| ct.inner_text != "1" }.size
pingbacks = item.search("wp:comment_type").reject {|ct| ct.inner_text != "pingback" }.size
is_private = ( item.search("wp:status").first.inner_text == "private" )
has_comments = ( comments > 0 && comments > pingbacks )
tags = item.search("category[@domain='tag']").collect(&:inner_text).uniq
next if item.search("wp:post_type").first.inner_text != "post"
title = item.search("title").first.inner_text
postdate = item.search("wp:post_date").first.inner_text
link = item.search("link").first.inner_text
# link = ORIGINAL_DOMAIN + item.search("wp:post_date_gmt").first.inner_text[0, 10].gsub(/-/, "/") + "/" + item.search("wp:post_name").first.inner_text + "/" # do this if the link is empty
next if ported_urls.include?(link)
content = item.search("content:encoded").first.inner_text
# My WP blog was using Disqus. I'd like to retain the comments.
content += "\n\n<script type='text/javascript'>var disqus_url = '#{link}';</script>" if has_comments
next if content.strip.empty?
puts "- #{title} [#{comments}/#{pingbacks}]"
puts " #{link}"
response = Net::HTTP.post_form(
URI.parse("http://www.tumblr.com/api/write"),
{
:email => TUMBLR_USER,
:password => TUMBLR_PASS,
:type => "regular",
:date => postdate,
:title => title,
:body => content,
:tags => tags.join(","),
:format => "markdown",
:group => TUMBLR_DOMAIN,
:private => is_private ? 1 : 0
}
)
tumblr_id = response.read_body
File.open(FILE_URLS_TO_TIDS, "a") do |f|
f.puts "#{link};#{tumblr_id}"
f.close
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment