Skip to content

Instantly share code, notes, and snippets.

@kylebarbour
Created December 26, 2016 22:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kylebarbour/511fb88684280e9fda6de4e69ba45ad0 to your computer and use it in GitHub Desktop.
Save kylebarbour/511fb88684280e9fda6de4e69ba45ad0 to your computer and use it in GitHub Desktop.
Generates an RSS item from kramdown and metadata sourcefiles
#!/usr/bin/env python
#
# rssmaker sourcefile | Kyle Barbour | Summer 2013
# Generates an RSS item from kramdown and metadata sourcefiles
#
# The sourcefile should be formatted as follows:
#
# Title of RSS entry
# * http://urlforpost.org
# Text of RSS entry. Text can be in any format supported by Markdown.
#
# Newlines are OK, but the next paragraph needs to be similarly indented
# with a standard four space tab.
# guid: guid (optional)
#
#
# Title of second, already-processed RSS entry. Titles can extend multiple
# lines so long as there is no whitespace around them.
# * http://urlforpost.org
# Entries are separated by two newlines. rssmaker only processes the first
# entry, assuming subsequent entries have already been processed, as this
# one has.
# Date: RFC 2822 compliant date
# guid: guid
#
# etc.
#
# rssmaker finds the link by testing for whether '* ' begins a line. If a guid
# is not specified, one is created using the date.
#
# Directions: Allow default URL, allow reprocessing of entries, allow
# different destinations and default destinations, choose whether CDATA is
# necessary or not, check oldfeed for guid. Rewrite using Beautiful Soup?
#
##############################################################################
import os
import sys
import re
import argparse
from subprocess import Popen, PIPE
from email.Utils import formatdate
from datetime import datetime
from xml.sax.saxutils import escape
# Options
parser = argparse.ArgumentParser(description="Generates an RSS item from "
"kramdown and metadata sourcefiles.")
parser.add_argument('source', type=str, help="Location of RSS sourcefile.")
args = parser.parse_args()
# Default data
timestamp = formatdate(localtime = True)
autoguid = datetime.now().strftime("%Y-%m-%d-%H%M%S")
rsslist = args.source
rssfeed = "%s.xml" % os.path.splitext(rsslist)[0]
newitem = {
'title': '',
'link': '',
'desc': '',
'date': timestamp,
'guid': autoguid,
'perm': 'false',
}
hasguid = False
newsource = ""
oldsource = ""
# Read RSS data from top of sourcefile
source = ""
with open(rsslist, 'r') as sourcefile:
for line in sourcefile:
source += line
newsource, oldsource = source.split('\n\n\n', 1)
# Check that the item hasn't already been processed
if re.search('\nDate:', newsource):
sys.exit("All items have been processed.")
# Process the new item
print newsource
readlines = iter(newsource.splitlines())
for line in readlines:
if line.startswith('*'):
newitem['link'] = line.split('*')[1].strip()
break
else:
newitem['title'] = ' '.join((newitem['title'], line))
for line in readlines:
if line.startswith('guid:'):
newitem['guid'] = line.split('guid:')[1].strip()
hasguid = True
if newitem['guid'].startswith('http://'):
newitem['perm'] = 'true'
print line
else:
newitem['desc'] = "\n".join([newitem['desc'], line.strip()])
# Clean up extra whitespace
newitem['title'] = newitem['title'].strip()
# Run kramdown on the description and escape the resulting XHTML entities
kr = Popen('kr', stdin=PIPE, stdout=PIPE)
newitem['desc'] = kr.communicate(newitem['desc'])[0]
# Define RSS item output
item = """\
<item>
<title>%(title)s</title>
<link>%(link)s</link>
<description><![CDATA[%(desc)s]]></description>
<pubDate>%(date)s</pubDate>
<guid isPermaLink="%(perm)s">%(guid)s</guid>
</item>
"""
rssitem = item % newitem
print "Generated item, adding to RSS feed at: " + timestamp + "\n%s" % rssitem
# Read the RSS feed, find the first item in it
oldfeed = ""
with open (rssfeed, 'r') as feedfile:
for line in feedfile:
oldfeed += line
# Generate the new feed and write it
if 'item' in oldfeed:
regex = "(<item>\n)"
else:
regex = "(</channel>\n)"
header, delimiter, rest = re.split(regex, oldfeed, 1)
rest = delimiter + rest
header = re.sub(r'(<lastBuildDate>).*(</lastBuildDate>)', r'\1' + timestamp + r'\2', header)
newfeed = "".join((header, rssitem, rest))
with open(rssfeed, 'w') as feedfile:
feedfile.write(newfeed)
# Mark sourcefile as processed
with open(rsslist, 'w') as sourcefile:
if hasguid:
sourcefile.write(''.join((newsource, "\nDate: ", timestamp, '\n\n\n',
oldsource)))
print "Timestamped item."
else:
sourcefile.write(''.join((newsource, "\nguid: " , newitem['guid'],
"\nDate: ", timestamp, '\n\n\n', oldsource)))
print "Timestamped item and generated guid."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment