carlo (owner)

Revisions

gist: 210359 Download_button fork
public
Description:
Parses a Wordpress XML export file and imports the posts into Tumblr. See http://blog.zottmann.org/post/213103984/the-big-blog-move-on-2009 for info.
Public Clone URL: git://gist.github.com/210359.git
Embed All Files: show embed
wordpress-to-tumblr.rb #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/local/bin/ruby
 
require "hpricot"
require "net/http"
require "uri"
 
TUMBLR_USER = "user@example.com"
TUMBLR_PASS = "thepasswordlol"
TUMBLR_DOMAIN = "mytumblrsubdomain.tumblr.com"
ORIGINAL_DOMAIN = "http://myoriginaldomain.example.com/"
FILE_URLS_TO_TIDS = "old-new.txt"
 
ported_urls = File.readlines(FILE_URLS_TO_TIDS).collect {|l| l.strip.match(/^(.*);/)[1] }
 
doc = Hpricot( File.open("wordpress.2009-10-10.xml") )
 
(doc/"item").each do |item|
  comments = item.search("wp:comment_approved").reject {|ct| ct.inner_text != "1" }.size
  pingbacks = item.search("wp:comment_type").reject {|ct| ct.inner_text != "pingback" }.size
  
  is_private = ( item.search("wp:status").first.inner_text == "private" )
  has_comments = ( comments > 0 && comments > pingbacks )
  tags = item.search("category[@domain='tag']").collect(&:inner_text).uniq
  
  next if item.search("wp:post_type").first.inner_text != "post"
  
  title = item.search("title").first.inner_text
 
  postdate = item.search("wp:post_date").first.inner_text
  link = item.search("link").first.inner_text
  # link = ORIGINAL_DOMAIN + item.search("wp:post_date_gmt").first.inner_text[0, 10].gsub(/-/, "/") + "/" + item.search("wp:post_name").first.inner_text + "/" # do this if the link is empty
  
  next if ported_urls.include?(link)
  
  content = item.search("content:encoded").first.inner_text
 
  # My WP blog was using Disqus. I'd like to retain the comments.
  content += "\n\n<script type='text/javascript'>var disqus_url = '#{link}';</script>" if has_comments
 
  next if content.strip.empty?
  
  puts "- #{title} [#{comments}/#{pingbacks}]"
  puts " #{link}"
  
  response = Net::HTTP.post_form(
    URI.parse("http://www.tumblr.com/api/write"),
    {
      :email => TUMBLR_USER,
      :password => TUMBLR_PASS,
      :type => "regular",
      :date => postdate,
      :title => title,
      :body => content,
      :tags => tags.join(","),
      :format => "markdown",
      :group => TUMBLR_DOMAIN,
      :private => is_private ? 1 : 0
    }
  )
  
  tumblr_id = response.read_body
  File.open(FILE_URLS_TO_TIDS, "a") do |f|
    f.puts "#{link};#{tumblr_id}"
    f.close
  end
end