Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist
View gist:338292
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
########################
# wp2toto #
# by Joseph Weissman #
########################
 
# config
 
author = "user"
wpfile = "user_blog_export.xml"
outdir = "../articles/"
 
 
import pprint
import time
from time import strftime
import xml.dom.minidom
from xml.dom.minidom import Node
 
def extract_string(xml_node, tag_name):
string = ""
n = xml_node.getElementsByTagName(tag_name)
for node in n:
for node2 in node.childNodes:
# if tag_name == "content:encoded":
# print node2.nodeType
if node2.nodeType == Node.TEXT_NODE or node2.nodeType == Node.CDATA_SECTION_NODE:
string += node2.data
return string
doc = xml.dom.minidom.parse(wpfile)
wp_date_format = "%a, %d %b %Y"
toto_date_format = "%y-%m-%d"
for node in doc.getElementsByTagName("item"):
yaml = ""
title = extract_string(node, "title").replace("/","").replace(" "," ").replace(":"," --")
date = extract_string(node, "pubDate")[0:17].strip()
content = extract_string(node, "content:encoded").strip()
if not title == "" and not date.endswith("-0001") and not content == "":
converted_date = time.strftime(toto_date_format, time.strptime(date, wp_date_format) )
filename = "20"+converted_date+"-"+title.replace(" ","-")+".txt"
yaml += "title: "+title+"\n"
yaml += "author: "+author+"\n"
yaml += "date: 20"+converted_date.replace("-","/")+"\n" #" ("+date+")\n"
yaml += "\n\n\n"
yaml += content +"\n"
# yaml += "--------------\n\n"
print yaml
# filename = "example.dat" # debug
file = open(outdir+filename.lower(), "w")
file.writelines(yaml.encode('ascii', 'ignore'))
file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.