Created
August 30, 2016 04:05
-
-
Save ongspxm/4f100a7c89061c91539f2828ecf0107e to your computer and use it in GitHub Desktop.
Extract posts from WordPress to Jekyll posts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree as etree | |
ns = {'wp': 'http://wordpress.org/export/1.2/', 'content': 'http://purl.org/rss/1.0/modules/content/'} | |
blog = etree.parse('data.xml').getroot().find('channel') | |
for item in blog.findall('item'): | |
header = [] | |
title = item.find('title').text | |
slug = item.find('wp:post_name', ns).text | |
content = item.find('content:encoded', ns).text | |
date = item.find('wp:post_date', ns).text.split(' ')[0] | |
tags = []; cate = [] | |
for cate in item.findall('category'): | |
if cate.get('domain')=='post_tag': | |
tags.append(cate.get('nicename')) | |
else: | |
cate.append(cate.get('nicename')) | |
tags = ' '.join(tags); cate = ' '.join(cate) | |
fname = '_posts/%s-%s.md'%(date, slug) | |
header.append('---') | |
header.append('title: %s'%title) | |
header.append('category: %s'%cate) | |
header.append('tags: %s'%tags) | |
header.append('---') | |
with open(fname, 'w') as f: | |
f.write('\n'.join(header)) | |
f.write('\n\n') | |
f.write(unicode(content).encode('utf-8')) | |
print fname |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment