Created
December 26, 2016 22:39
-
-
Save kylebarbour/511fb88684280e9fda6de4e69ba45ad0 to your computer and use it in GitHub Desktop.
Generates an RSS item from kramdown and metadata sourcefiles
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# rssmaker sourcefile | Kyle Barbour | Summer 2013 | |
# Generates an RSS item from kramdown and metadata sourcefiles | |
# | |
# The sourcefile should be formatted as follows: | |
# | |
# Title of RSS entry | |
# * http://urlforpost.org | |
# Text of RSS entry. Text can be in any format supported by Markdown. | |
# | |
# Newlines are OK, but the next paragraph needs to be similarly indented | |
# with a standard four space tab. | |
# guid: guid (optional) | |
# | |
# | |
# Title of second, already-processed RSS entry. Titles can extend multiple | |
# lines so long as there is no whitespace around them. | |
# * http://urlforpost.org | |
# Entries are separated by two newlines. rssmaker only processes the first | |
# entry, assuming subsequent entries have already been processed, as this | |
# one has. | |
# Date: RFC 2822 compliant date | |
# guid: guid | |
# | |
# etc. | |
# | |
# rssmaker finds the link by testing for whether '* ' begins a line. If a guid | |
# is not specified, one is created using the date. | |
# | |
# Directions: Allow default URL, allow reprocessing of entries, allow | |
# different destinations and default destinations, choose whether CDATA is | |
# necessary or not, check oldfeed for guid. Rewrite using Beautiful Soup? | |
# | |
############################################################################## | |
import os | |
import sys | |
import re | |
import argparse | |
from subprocess import Popen, PIPE | |
from email.Utils import formatdate | |
from datetime import datetime | |
from xml.sax.saxutils import escape | |
# Options | |
parser = argparse.ArgumentParser(description="Generates an RSS item from " | |
"kramdown and metadata sourcefiles.") | |
parser.add_argument('source', type=str, help="Location of RSS sourcefile.") | |
args = parser.parse_args() | |
# Default data | |
timestamp = formatdate(localtime = True) | |
autoguid = datetime.now().strftime("%Y-%m-%d-%H%M%S") | |
rsslist = args.source | |
rssfeed = "%s.xml" % os.path.splitext(rsslist)[0] | |
newitem = { | |
'title': '', | |
'link': '', | |
'desc': '', | |
'date': timestamp, | |
'guid': autoguid, | |
'perm': 'false', | |
} | |
hasguid = False | |
newsource = "" | |
oldsource = "" | |
# Read RSS data from top of sourcefile | |
source = "" | |
with open(rsslist, 'r') as sourcefile: | |
for line in sourcefile: | |
source += line | |
newsource, oldsource = source.split('\n\n\n', 1) | |
# Check that the item hasn't already been processed | |
if re.search('\nDate:', newsource): | |
sys.exit("All items have been processed.") | |
# Process the new item | |
print newsource | |
readlines = iter(newsource.splitlines()) | |
for line in readlines: | |
if line.startswith('*'): | |
newitem['link'] = line.split('*')[1].strip() | |
break | |
else: | |
newitem['title'] = ' '.join((newitem['title'], line)) | |
for line in readlines: | |
if line.startswith('guid:'): | |
newitem['guid'] = line.split('guid:')[1].strip() | |
hasguid = True | |
if newitem['guid'].startswith('http://'): | |
newitem['perm'] = 'true' | |
print line | |
else: | |
newitem['desc'] = "\n".join([newitem['desc'], line.strip()]) | |
# Clean up extra whitespace | |
newitem['title'] = newitem['title'].strip() | |
# Run kramdown on the description and escape the resulting XHTML entities | |
kr = Popen('kr', stdin=PIPE, stdout=PIPE) | |
newitem['desc'] = kr.communicate(newitem['desc'])[0] | |
# Define RSS item output | |
item = """\ | |
<item> | |
<title>%(title)s</title> | |
<link>%(link)s</link> | |
<description><![CDATA[%(desc)s]]></description> | |
<pubDate>%(date)s</pubDate> | |
<guid isPermaLink="%(perm)s">%(guid)s</guid> | |
</item> | |
""" | |
rssitem = item % newitem | |
print "Generated item, adding to RSS feed at: " + timestamp + "\n%s" % rssitem | |
# Read the RSS feed, find the first item in it | |
oldfeed = "" | |
with open (rssfeed, 'r') as feedfile: | |
for line in feedfile: | |
oldfeed += line | |
# Generate the new feed and write it | |
if 'item' in oldfeed: | |
regex = "(<item>\n)" | |
else: | |
regex = "(</channel>\n)" | |
header, delimiter, rest = re.split(regex, oldfeed, 1) | |
rest = delimiter + rest | |
header = re.sub(r'(<lastBuildDate>).*(</lastBuildDate>)', r'\1' + timestamp + r'\2', header) | |
newfeed = "".join((header, rssitem, rest)) | |
with open(rssfeed, 'w') as feedfile: | |
feedfile.write(newfeed) | |
# Mark sourcefile as processed | |
with open(rsslist, 'w') as sourcefile: | |
if hasguid: | |
sourcefile.write(''.join((newsource, "\nDate: ", timestamp, '\n\n\n', | |
oldsource))) | |
print "Timestamped item." | |
else: | |
sourcefile.write(''.join((newsource, "\nguid: " , newitem['guid'], | |
"\nDate: ", timestamp, '\n\n\n', oldsource))) | |
print "Timestamped item and generated guid." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment