Skip to content

Instantly share code, notes, and snippets.

@neuromusic
Created December 5, 2012 20:12
Show Gist options
  • Save neuromusic/4219079 to your computer and use it in GitHub Desktop.
Save neuromusic/4219079 to your computer and use it in GitHub Desktop.
script which migrates a blog from Drupal 6 to WordPress 3.4 using sqlalchemy & wordpress_xmlrpc
#!/usr/bin/python
import json
import datetime
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from wordpress_xmlrpc import Client, WordPressPost, WordPressComment, WordPressTerm
from wordpress_xmlrpc.methods import media, posts, users, comments, taxonomies
from wordpress_xmlrpc.compat import xmlrpc_client
# works with...
# - wordpress_xmlrpc v2.2+ (comment timestamps fail < 2.2)
# - sqlalchemy 0.7.8
# connect to drupal6 db w/ sqlalchemy
_engine = create_engine('mysql+mysqldb://root:root@localhost/drupal_archive?unix_socket=/Applications/MAMP/tmp/mysql/mysql.sock&charset=utf8', echo=False)
_Base = declarative_base(_engine)
# connect to wordpress w/ wordpress_xmlrpc
_client = Client('http://localhost/wordpress/xmlrpc.php', 'admin', 'password')
_wp_authors = _client.call(users.GetAuthors())
# classes for building the sqlalchemy ORM from tables
class Nodes(_Base):
""" node for blog posts"""
__tablename__ = 'node'
__table_args__ = {'autoload':True}
class NodeRevisions(_Base):
""" revisions of nodes"""
__tablename__ = 'node_revisions'
__table_args__ = {'autoload':True}
class Comments(_Base):
"""um. comments. """
__tablename__ = 'comments'
__table_args__ = {'autoload':True}
class FieldBlogImage(_Base):
""" CCK field for blog images"""
__tablename__ = 'content_field_blog_image'
__table_args__ = {'autoload':True}
class Files(_Base):
""" the actual image files"""
__tablename__ = 'files'
__table_args__ = {'autoload':True}
class Terms(_Base):
""" tags & categories"""
__tablename__ = 'term_data'
__table_args__ = {'autoload':True}
class TermNodes(_Base):
""" maps tags to categories"""
__tablename__ = 'term_node'
__table_args__ = {'autoload':True}
# functions for the migration
def grab_drupal_blog(session):
""" gets the blog items from the drupal installation and builds nested dictionary """
blog_nodes = session.query(Nodes).filter(Nodes.status > 0, Nodes.type=='blog')
blog = {};
for item in blog_nodes:
post = {}
if item.uid == 1:
post['author'] = 'Hodgkin'
elif item.uid == 2:
post['author'] = 'Huxley'
post['created'] = datetime.datetime.fromtimestamp(item.created)
post['changed'] = datetime.datetime.fromtimestamp(item.changed)
# grab the latest revision
latest = session.query(NodeRevisions).get(item.vid)
post['title'] = latest.title#.decode("utf-8", "replace")
post['content'] = latest.body#.decode("utf-8", "replace")
post['excerpt'] = latest.teaser#.decode("utf-8", "replace")
post['timestamp'] = datetime.datetime.fromtimestamp(latest.timestamp)
post['images'] = {}
images = session.query(FieldBlogImage).join(Nodes, Nodes.nid==FieldBlogImage.nid).filter(FieldBlogImage.nid==item.nid)
for image in images:
image_info = {}
image_info['title'] = image.field_blog_image_title
image_info['alt'] = image.field_blog_image_alt
image_info['delta'] = image.delta
file_info = session.query(Files).get(image.field_blog_image_fid)
image_info['filename'] = file_info.filepath.split('/')[-1]
image_info['mimetype'] = file_info.filemime
post['images'][image.delta] = image_info
post['categories'] = []
terms = session.query(TermNodes).join(Nodes, Nodes.nid==TermNodes.nid).filter(TermNodes.nid==item.nid)
for term in terms:
term_name = session.query(Terms).get(term.tid).name
post['categories'].append(term_name)
post['comments'] = {}
comments = session.query(Comments).join(Nodes, Nodes.nid==Comments.nid).filter(Comments.nid==item.nid)
for comment in comments:
comment_info = {}
comment_info['subject'] = comment.subject
comment_info['content'] = comment.comment
comment_info['author_IP'] = comment.hostname
comment_info['author'] = comment.name
comment_info['author_email'] = comment.mail
comment_info['author_url'] = comment.homepage
comment_info['date'] = datetime.datetime.fromtimestamp(comment.timestamp)
post['comments'][comment.cid] = comment_info
blog[item.nid] = post
return blog
def loadSession():
""" connect to drupal database
returns:
- sqlalchemy session handler
"""
metadata = _Base.metadata
Session = sessionmaker(bind=_engine)
session = Session()
return session
def author_id_from_name(name):
""" get wp author id from a name
parameter:
- name: string of author display name
returns:
- author's id
"""
for author in _wp_authors:
if name.lower() == author.display_name.lower():
return author.id
return 0
def upload_image(image):
""" adds image to wordpress & returns image info
(assumes images have all been copied to ./images/)
"""
data = {}
data['name'] = image['filename']
data['type'] = image['mimetype']
filename = "./images/" + data['name']
print "uploading image... %s" % (data['name'])
with open(filename, 'rb') as img:
data['bits'] = xmlrpc_client.Binary(img.read())
return _client.call(media.UploadFile(data))
def upload_comment(comment,post_id):
"""adds comment to post & returns comment id"""
new_comment = WordPressComment()
# new_comment.post = post_id
new_comment.content = '<b>%s</b>\n%s' % (comment_info['subject'] , comment_info['content'],)
new_comment.date_created = comment_info['date']
comment_id = _client.call(comments.NewComment(post_id,new_comment))
new_comment.author_ip = comment_info['author_IP']
new_comment.author_url = comment_info['author_url']
new_comment.author_email = comment_info['author_email']
new_comment.author = comment_info['author']
print "adding comment... %s,%s" % (comment_info['date'],comment_info['author'])
return _client.call(comments.EditComment(comment_id,new_comment))
def upload_post(post):
""" adds post to wordpress and returns post id"""
# upload all of the images
image_html = []
for index in sorted(post['images'].keys()):
image = post['images'][index]
response = upload_image(image)
html = '<a href="%s"><img class="alignnone size-large wp-image-616" title="%s" src="%s" alt="%s" width="1024" height="682" /></a>' % (response['url'],image['title'],response['url'],image['title'],)
image_html.append(html)
# add post to wordpress
new_post = WordPressPost()
new_post.title = post['title']
new_post.content = '\n'.join([post['content'],]+image_html) # appends images to end of post
new_post.excerpt = post['excerpt']
new_post.date = post['created']
new_post.post_status = 'publish'
new_post.comment_status = True
new_post.ping_status = True
new_post.terms_names = {
'post_tag': post['categories'],
}
new_post.user = author_id_from_name(post['author'])
print "submitting post... %s: %s" % (new_post.date,new_post.title)
return _client.call(posts.NewPost(new_post))
if __name__ == "__main__":
# connect to drupal 6 db
session = loadSession()
# pull blog content from drupal 6 db
blog = grab_drupal_blog(session)
# dump blog content into json file
with open('export_blog.json','wb') as f:
dthandler = lambda obj: obj.isoformat() if isinstance(obj, datetime.datetime) else None
json.dump(blog,f,indent=4, sort_keys=True, default=dthandler)
# submit blog posts to wordpress
for node_id, post in blog.iteritems():
post_id = upload_post(post)
for comment_id, comment_info in post['comments'].iteritems():
upload_comment(comment_info,post_id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment