Skip to content

Instantly share code, notes, and snippets.

@dreid
Created January 21, 2011 09:45
Show Gist options
  • Save dreid/789478 to your computer and use it in GitHub Desktop.
Save dreid/789478 to your computer and use it in GitHub Desktop.
Import an atom feed to blogofile markdown files.
#!/usr/bin/env python
import os
import re
import sys
import urllib
import datetime
from xml.etree import ElementTree as ET
MARKDOWN_TEMPLATE="""
---
title: "%(title)s"
date: %(date)s
categories: %(categories)s
permalink: %(permalink)s
---
%(content)s
"""
ATOM_NS="{http://www.w3.org/2005/Atom}"
ENTRY=ATOM_NS+"entry"
TITLE=ATOM_NS+"title"
DATE=ATOM_NS+"published"
CONTENT=ATOM_NS+"content"
CATEGORY=ATOM_NS+"category"
LINK=ATOM_NS+"link"
def fix_title(t):
replacements = [('\'', ''),
('.', ''),
('/', '-')]
for a, b in replacements:
t = t.replace(a, b)
return t
def categories(entry):
cats = []
for c in entry.findall(CATEGORY):
cats.append(c.attrib['term'])
return ','.join(cats)
def permalink(entry):
for l in entry.findall(LINK):
if l.attrib['rel'] == 'alternate':
return '/'+'/'.join(l.attrib['href'].split('/')[3:])
def content(c):
nuke_tags = [r'<div.*?>',
r'<img width=\'1\'.*?googleusercontent.*?>',
r'</div>',
r'<span.*?>',
r'</span>',
r' style=\".*?\"']
for r in nuke_tags:
c = re.sub(r, '', c)
return c
def convert(url):
e = ET.parse(urllib.urlopen(url))
files = []
for entry in e.findall(ENTRY):
t = entry.find(TITLE).text
files.append((t, MARKDOWN_TEMPLATE % {
'title': t,
'date': datetime.datetime.strptime(
entry.find(DATE).text.split('.')[0],
"%Y-%m-%dT%H:%M:%S").strftime('%Y/%m/%d %H:%M:%S'),
'content': content(entry.find(CONTENT).text),
'categories': categories(entry),
'permalink': permalink(entry)
}))
files.reverse()
if not os.path.exists('_posts'):
os.mkdir('_posts')
c = 1
for title, f in files:
file('_posts/%04d - %s.markdown' % (c,
fix_title(title).encode('utf-8')),
'w').write(f.encode('utf-8'))
c += 1
if __name__ == '__main__':
convert(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment