wr0ngway (owner)

Revisions

gist: 41781 Download_button fork
public
Public Clone URL: git://gist.github.com/41781.git
Embed All Files: show embed
wordpress2blogger.rb #
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env ruby
require 'date'
require 'rexml/document'
include REXML # so that we don't have to prefix everything with REXML::...
 
blogger_id = "111111111111"
blog_title = "my blog"
author_name = "my name"
author_email = "noreply@blogger.com"
blog_id = "222222222222222"
blog_name = "myblog"
 
file = File.new( ARGV[0] )
 
doc = REXML::Document.new file
 
feed_template = %q{<?xml version="1.0" encoding="UTF-8" ?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:openSearch="http://a9.com/-/spec/opensearchrss/1.0/" xmlns:gd="http://schemas.google.com/g/2005" xmlns:thr="http://purl.org/syndication/thread/1.0">
<id>tag:blogger.com,1999:blog-#{blog_id}.archive</id>
<updated>2008-12-04T11:08:02.017+08:00</updated>
<title type="text">#{blog_title}</title>
<link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://#{blog_name}.blogspot.com/feeds/archive'/>
<link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/#{blog_id}/archive'/>
<link rel='http://schemas.google.com/g/2005#post' type='application/atom+xml' href='http://www.blogger.com/feeds/#{blog_id}/archive'/>
<link rel='alternate' type='text/html' href='http://#{blog_name}.blogspot.com/'/>
<author>
<name>#{author_name}</name>
<uri>http://www.blogger.com/profile/#{blogger_id}</uri>
<email>#{author_email}</email>
</author>
<generator version="7.00" uri="http://www.blogger.com">Blogger</generator>
#{feed_content}
</feed>
}
 
entry_template = %q{
<entry>
<id>tag:blogger.com,1999:blog-#{blog_id}.post-#{post_id}</id>
<published>#{post_date}</published>
<updated>#{post_date}</updated>
<category scheme="http://schemas.google.com/g/2005#kind" term="http://schemas.google.com/blogger/2008/kind#post" />
<category scheme="http://www.blogger.com/atom/ns#" term="#{post_category}" />
<title type="text">#{post_title}</title>
<content type="html"><![CDATA[#{post_content}]]></content>
<author>
<name>#{author_name}</name>
<uri>http://www.blogger.com/profile/#{blogger_id}</uri>
<email>#{author_email}</email>
</author>
<thr:total>#{comments_size}</thr:total>
</entry>
}
 
comment_template = %q{
<entry>
<id>tag:blogger.com,1999:blog-#{blog_id}.post-#{post_id}.comment-#{comment_id}</id>
<published>#{comment_date}</published>
<updated>#{comment_date}</updated>
<category scheme='http://schemas.google.com/g/2005#kind'
term='http://schemas.google.com/blogger/2008/kind#comment'/>
<title type='text'><![CDATA[#{comment_content}]]></title>
<content type='html'><![CDATA[#{comment_content}]]></content>
<link rel='self'
type='application/atom+xml'
href='http://www.blogger.com/feeds/feh/comments/default/1'/>
<author>
#{'<name>' + "#{comment_author}" + '</name>' if comment_author}
#{'<email>' + "#{comment_author_email}" + '</email>' if comment_author_email}
#{'<uri>' + "#{comment_author_url}" + '</uri>' if comment_author_url}
</author>
<thr:in-reply-to href='http://www.blogger.com/feeds/#{blog_id}/posts/default/#{post_id}'
ref='tag:blogger.com,1999:blog-#{blog_id}.post-#{post_id}'
type='application/atom+xml'/>
</entry>
}
 
entries = []
all_entries = [entries]
post_id = 0
 
doc.elements.each("//item") { |element|
  post_title = element.get_text("title")
  datestr = "#{element.get_text('pubDate')}"
  post_date = DateTime.parse(datestr).strftime("%Y-%m-%dT%H:%M:%S+08:00")
  post_content = element.get_text("content:encoded")
  post_category = element.get_text("category")
  post_category = "Blogging" if post_category == "" or post_category.nil?
  post_id += 1
  
  comments = []
  comment_id = 0
  element.elements.each("wp:comment") { |comment|
    comment_author = comment.get_text("wp:comment_author")
    comment_author_email = comment.get_text("wp:comment_author_email")
    comment_author_url = comment.get_text("wp:comment_author_url")
    datestr = "#{comment.get_text('wp:comment_date_gmt')}"
    comment_date = DateTime.parse(datestr).strftime("%Y-%m-%dT%H:%M:%S+08:00")
    comment_content = comment.get_text("wp:comment_content")
    comment_id += 1
    comments << eval('%Q{' + comment_template + '}', binding)
  }
 
  # make sure we keep comments together with its entry while keeping within import limit
  if entries.size + comments.size + 1 > 50
    entries = []
    all_entries << entries
  end
 
  comments_size = comments.size
  entries << eval('%Q{' + entry_template + '}', binding)
  entries.push *comments
}
 
 
all_entries.each_with_index do |entries, i|
  feed_content = ""
  entries.each do |entry|
    feed_content += entry
  end
  destination = "p2blogger#{i}.xml"
  puts "Writing #{entries.size} entries to #{destination}"
  File.open(destination,"w") do |f|
    f.write(eval('%Q{' + feed_template + '}', binding))
  end
end