Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Created August 30, 2010 12:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save psychemedia/557335 to your computer and use it in GitHub Desktop.
Save psychemedia/557335 to your computer and use it in GitHub Desktop.
import string
from xml.dom import minidom
#based on http://code.activestate.com/recipes/551792-convert-wordpress-export-file-to-multiple-html-fil/
infile='wpexport.xml'
# NB the WP export file seemed incomplete - I had to add the atom namespace
# xmlns:atom="http://www.w3.org/2005/Atom/"
dotfile='internalstructure.dot'
csvfile='internalstructure.csv'
dom = minidom.parse(infile)
f = open(dotfile, 'w')
f2 = open(csvfile,'w')
blog=[]
f.write('digraph blogstruct{')
for node in dom.getElementsByTagName('item'):
post = dict()
post["title"] = node.getElementsByTagName('title')[0].firstChild.data
post["date"] = node.getElementsByTagName('pubDate')[0].firstChild.data
post["link"] = node.getElementsByTagName('link')[0].firstChild.data
post['comments'] =[]
for comment in node.getElementsByTagName('wp:comment'):
commentInfo = dict()
if comment.getElementsByTagName('wp:comment_type')[0]:
c= comment.getElementsByTagName('wp:comment_type')[0]
if c.firstChild:
commentInfo['type']= comment.getElementsByTagName('wp:comment_type')[0].firstChild.data
commentInfo['url']= comment.getElementsByTagName('wp:comment_author_url')[0].firstChild.data
commentInfo['date']= comment.getElementsByTagName('wp:comment_date')[0].firstChild.data
if commentInfo['type']=='pingback' and commentInfo['url'].find('http://blog.ouseful.info')!=-1:
cID=commentInfo['url'].strip('/')
cID=cID.rpartition('/')
rID=post["link"].strip('/')
rID=rID.rpartition('/')
f.write('"'+cID[2]+'"->"'+rID[2]+'"\n')
f2.write('"'+cID[2]+'","'+rID[2]+'"\n')
#post['comments'].append(comments)
#blog.append(post)
f.write('}')
f.close()
f2.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment