Generates an RSS item from kramdown and metadata sourcefiles
#!/usr/bin/env python | |
# | |
# rssmaker sourcefile | Kyle Barbour | Summer 2013 | |
# Generates an RSS item from kramdown and metadata sourcefiles | |
# | |
# The sourcefile should be formatted as follows: | |
# | |
# Title of RSS entry | |
# * http://urlforpost.org | |
# Text of RSS entry. Text can be in any format supported by Markdown. | |
# | |
# Newlines are OK, but the next paragraph needs to be similarly indented | |
# with a standard four space tab. | |
# guid: guid (optional) | |
# | |
# | |
# Title of second, already-processed RSS entry. Titles can extend multiple | |
# lines so long as there is no whitespace around them. | |
# * http://urlforpost.org | |
# Entries are separated by two newlines. rssmaker only processes the first | |
# entry, assuming subsequent entries have already been processed, as this | |
# one has. | |
# Date: RFC 2822 compliant date | |
# guid: guid | |
# | |
# etc. | |
# | |
# rssmaker finds the link by testing for whether '* ' begins a line. If a guid | |
# is not specified, one is created using the date. | |
# | |
# Directions: Allow default URL, allow reprocessing of entries, allow | |
# different destinations and default destinations, choose whether CDATA is | |
# necessary or not, check oldfeed for guid. Rewrite using Beautiful Soup? | |
# | |
############################################################################## | |
import os | |
import sys | |
import re | |
import argparse | |
from subprocess import Popen, PIPE | |
from email.Utils import formatdate | |
from datetime import datetime | |
from xml.sax.saxutils import escape | |
# Options | |
parser = argparse.ArgumentParser(description="Generates an RSS item from " | |
"kramdown and metadata sourcefiles.") | |
parser.add_argument('source', type=str, help="Location of RSS sourcefile.") | |
args = parser.parse_args() | |
# Default data | |
timestamp = formatdate(localtime = True) | |
autoguid = datetime.now().strftime("%Y-%m-%d-%H%M%S") | |
rsslist = args.source | |
rssfeed = "%s.xml" % os.path.splitext(rsslist)[0] | |
newitem = { | |
'title': '', | |
'link': '', | |
'desc': '', | |
'date': timestamp, | |
'guid': autoguid, | |
'perm': 'false', | |
} | |
hasguid = False | |
newsource = "" | |
oldsource = "" | |
# Read RSS data from top of sourcefile | |
source = "" | |
with open(rsslist, 'r') as sourcefile: | |
for line in sourcefile: | |
source += line | |
newsource, oldsource = source.split('\n\n\n', 1) | |
# Check that the item hasn't already been processed | |
if re.search('\nDate:', newsource): | |
sys.exit("All items have been processed.") | |
# Process the new item | |
print newsource | |
readlines = iter(newsource.splitlines()) | |
for line in readlines: | |
if line.startswith('*'): | |
newitem['link'] = line.split('*')[1].strip() | |
break | |
else: | |
newitem['title'] = ' '.join((newitem['title'], line)) | |
for line in readlines: | |
if line.startswith('guid:'): | |
newitem['guid'] = line.split('guid:')[1].strip() | |
hasguid = True | |
if newitem['guid'].startswith('http://'): | |
newitem['perm'] = 'true' | |
print line | |
else: | |
newitem['desc'] = "\n".join([newitem['desc'], line.strip()]) | |
# Clean up extra whitespace | |
newitem['title'] = newitem['title'].strip() | |
# Run kramdown on the description and escape the resulting XHTML entities | |
kr = Popen('kr', stdin=PIPE, stdout=PIPE) | |
newitem['desc'] = kr.communicate(newitem['desc'])[0] | |
# Define RSS item output | |
item = """\ | |
<item> | |
<title>%(title)s</title> | |
<link>%(link)s</link> | |
<description><![CDATA[%(desc)s]]></description> | |
<pubDate>%(date)s</pubDate> | |
<guid isPermaLink="%(perm)s">%(guid)s</guid> | |
</item> | |
""" | |
rssitem = item % newitem | |
print "Generated item, adding to RSS feed at: " + timestamp + "\n%s" % rssitem | |
# Read the RSS feed, find the first item in it | |
oldfeed = "" | |
with open (rssfeed, 'r') as feedfile: | |
for line in feedfile: | |
oldfeed += line | |
# Generate the new feed and write it | |
if 'item' in oldfeed: | |
regex = "(<item>\n)" | |
else: | |
regex = "(</channel>\n)" | |
header, delimiter, rest = re.split(regex, oldfeed, 1) | |
rest = delimiter + rest | |
header = re.sub(r'(<lastBuildDate>).*(</lastBuildDate>)', r'\1' + timestamp + r'\2', header) | |
newfeed = "".join((header, rssitem, rest)) | |
with open(rssfeed, 'w') as feedfile: | |
feedfile.write(newfeed) | |
# Mark sourcefile as processed | |
with open(rsslist, 'w') as sourcefile: | |
if hasguid: | |
sourcefile.write(''.join((newsource, "\nDate: ", timestamp, '\n\n\n', | |
oldsource))) | |
print "Timestamped item." | |
else: | |
sourcefile.write(''.join((newsource, "\nguid: " , newitem['guid'], | |
"\nDate: ", timestamp, '\n\n\n', oldsource))) | |
print "Timestamped item and generated guid." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment