Skip to content

Instantly share code, notes, and snippets.

@ongspxm
Created August 30, 2016 04:05
Show Gist options
  • Save ongspxm/4f100a7c89061c91539f2828ecf0107e to your computer and use it in GitHub Desktop.
Save ongspxm/4f100a7c89061c91539f2828ecf0107e to your computer and use it in GitHub Desktop.
Extract posts from WordPress to Jekyll posts
import xml.etree.ElementTree as etree
ns = {'wp': 'http://wordpress.org/export/1.2/', 'content': 'http://purl.org/rss/1.0/modules/content/'}
blog = etree.parse('data.xml').getroot().find('channel')
for item in blog.findall('item'):
header = []
title = item.find('title').text
slug = item.find('wp:post_name', ns).text
content = item.find('content:encoded', ns).text
date = item.find('wp:post_date', ns).text.split(' ')[0]
tags = []; cate = []
for cate in item.findall('category'):
if cate.get('domain')=='post_tag':
tags.append(cate.get('nicename'))
else:
cate.append(cate.get('nicename'))
tags = ' '.join(tags); cate = ' '.join(cate)
fname = '_posts/%s-%s.md'%(date, slug)
header.append('---')
header.append('title: %s'%title)
header.append('category: %s'%cate)
header.append('tags: %s'%tags)
header.append('---')
with open(fname, 'w') as f:
f.write('\n'.join(header))
f.write('\n\n')
f.write(unicode(content).encode('utf-8'))
print fname
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment