Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Generates an RSS item from kramdown and metadata sourcefiles
#!/usr/bin/env python
#
# rssmaker sourcefile | Kyle Barbour | Summer 2013
# Generates an RSS item from kramdown and metadata sourcefiles
#
# The sourcefile should be formatted as follows:
#
# Title of RSS entry
# * http://urlforpost.org
# Text of RSS entry. Text can be in any format supported by Markdown.
#
# Newlines are OK, but the next paragraph needs to be similarly indented
# with a standard four space tab.
# guid: guid (optional)
#
#
# Title of second, already-processed RSS entry. Titles can extend multiple
# lines so long as there is no whitespace around them.
# * http://urlforpost.org
# Entries are separated by two newlines. rssmaker only processes the first
# entry, assuming subsequent entries have already been processed, as this
# one has.
# Date: RFC 2822 compliant date
# guid: guid
#
# etc.
#
# rssmaker finds the link by testing for whether '* ' begins a line. If a guid
# is not specified, one is created using the date.
#
# Directions: Allow default URL, allow reprocessing of entries, allow
# different destinations and default destinations, choose whether CDATA is
# necessary or not, check oldfeed for guid. Rewrite using Beautiful Soup?
#
##############################################################################
import os
import sys
import re
import argparse
from subprocess import Popen, PIPE
from email.Utils import formatdate
from datetime import datetime
from xml.sax.saxutils import escape
# Options
parser = argparse.ArgumentParser(description="Generates an RSS item from "
"kramdown and metadata sourcefiles.")
parser.add_argument('source', type=str, help="Location of RSS sourcefile.")
args = parser.parse_args()
# Default data
timestamp = formatdate(localtime = True)
autoguid = datetime.now().strftime("%Y-%m-%d-%H%M%S")
rsslist = args.source
rssfeed = "%s.xml" % os.path.splitext(rsslist)[0]
newitem = {
'title': '',
'link': '',
'desc': '',
'date': timestamp,
'guid': autoguid,
'perm': 'false',
}
hasguid = False
newsource = ""
oldsource = ""
# Read RSS data from top of sourcefile
source = ""
with open(rsslist, 'r') as sourcefile:
for line in sourcefile:
source += line
newsource, oldsource = source.split('\n\n\n', 1)
# Check that the item hasn't already been processed
if re.search('\nDate:', newsource):
sys.exit("All items have been processed.")
# Process the new item
print newsource
readlines = iter(newsource.splitlines())
for line in readlines:
if line.startswith('*'):
newitem['link'] = line.split('*')[1].strip()
break
else:
newitem['title'] = ' '.join((newitem['title'], line))
for line in readlines:
if line.startswith('guid:'):
newitem['guid'] = line.split('guid:')[1].strip()
hasguid = True
if newitem['guid'].startswith('http://'):
newitem['perm'] = 'true'
print line
else:
newitem['desc'] = "\n".join([newitem['desc'], line.strip()])
# Clean up extra whitespace
newitem['title'] = newitem['title'].strip()
# Run kramdown on the description and escape the resulting XHTML entities
kr = Popen('kr', stdin=PIPE, stdout=PIPE)
newitem['desc'] = kr.communicate(newitem['desc'])[0]
# Define RSS item output
item = """\
<item>
<title>%(title)s</title>
<link>%(link)s</link>
<description><![CDATA[%(desc)s]]></description>
<pubDate>%(date)s</pubDate>
<guid isPermaLink="%(perm)s">%(guid)s</guid>
</item>
"""
rssitem = item % newitem
print "Generated item, adding to RSS feed at: " + timestamp + "\n%s" % rssitem
# Read the RSS feed, find the first item in it
oldfeed = ""
with open (rssfeed, 'r') as feedfile:
for line in feedfile:
oldfeed += line
# Generate the new feed and write it
if 'item' in oldfeed:
regex = "(<item>\n)"
else:
regex = "(</channel>\n)"
header, delimiter, rest = re.split(regex, oldfeed, 1)
rest = delimiter + rest
header = re.sub(r'(<lastBuildDate>).*(</lastBuildDate>)', r'\1' + timestamp + r'\2', header)
newfeed = "".join((header, rssitem, rest))
with open(rssfeed, 'w') as feedfile:
feedfile.write(newfeed)
# Mark sourcefile as processed
with open(rsslist, 'w') as sourcefile:
if hasguid:
sourcefile.write(''.join((newsource, "\nDate: ", timestamp, '\n\n\n',
oldsource)))
print "Timestamped item."
else:
sourcefile.write(''.join((newsource, "\nguid: " , newitem['guid'],
"\nDate: ", timestamp, '\n\n\n', oldsource)))
print "Timestamped item and generated guid."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.