kylebarbour/rssmaker.py

## rssmaker.py
#!/usr/bin/env python
#
# rssmaker sourcefile | Kyle Barbour | Summer 2013
#     Generates an RSS item from kramdown and metadata sourcefiles
#
# The sourcefile should be formatted as follows:
#
# Title of RSS entry
# * http://urlforpost.org
#     Text of RSS entry. Text can be in any format supported by Markdown.
#
#     Newlines are OK, but the next paragraph needs to be similarly indented
#     with a standard four space tab.
# guid: guid (optional)
#
#
# Title of second, already-processed RSS entry. Titles can extend multiple
# lines so long as there is no whitespace around them.
# * http://urlforpost.org
#     Entries are separated by two newlines. rssmaker only processes the first
#     entry, assuming subsequent entries have already been processed, as this
#     one has.
# Date: RFC 2822 compliant date
# guid: guid
#
# etc.
#
# rssmaker finds the link by testing for whether '* ' begins a line. If a guid
# is not specified, one is created using the date.
#
# Directions: Allow default URL, allow reprocessing of entries, allow
# different destinations and default destinations, choose whether CDATA is
# necessary or not, check oldfeed for guid. Rewrite using Beautiful Soup?
#
##############################################################################

import os
import sys
import re
import argparse
from subprocess import Popen, PIPE
from email.Utils import formatdate
from datetime import datetime
from xml.sax.saxutils import escape

# Options
parser = argparse.ArgumentParser(description="Generates an RSS item from "
                                 "kramdown and metadata sourcefiles.")
parser.add_argument('source', type=str, help="Location of RSS sourcefile.")
args = parser.parse_args()

# Default data
timestamp = formatdate(localtime = True)
autoguid = datetime.now().strftime("%Y-%m-%d-%H%M%S")
rsslist = args.source
rssfeed = "%s.xml" % os.path.splitext(rsslist)[0]

newitem = {
    'title':    '',
    'link':     '',
    'desc':     '',
    'date':     timestamp,
    'guid':     autoguid,
    'perm':     'false',
}
hasguid = False
newsource = ""
oldsource = ""

# Read RSS data from top of sourcefile
source = ""
with open(rsslist, 'r') as sourcefile:
    for line in sourcefile:
        source += line
newsource, oldsource = source.split('\n\n\n', 1)

# Check that the item hasn't already been processed
if re.search('\nDate:', newsource):
    sys.exit("All items have been processed.")

# Process the new item
print newsource
readlines = iter(newsource.splitlines())
for line in readlines:
    if line.startswith('*'):
        newitem['link'] = line.split('*')[1].strip()
        break
    else:
        newitem['title'] = ' '.join((newitem['title'], line))

for line in readlines:
    if line.startswith('guid:'):
        newitem['guid'] = line.split('guid:')[1].strip()
        hasguid = True
        if newitem['guid'].startswith('http://'):
            newitem['perm'] = 'true'
        print line
    else:
        newitem['desc'] = "\n".join([newitem['desc'], line.strip()])

# Clean up extra whitespace
newitem['title'] = newitem['title'].strip()

# Run kramdown on the description and escape the resulting XHTML entities
kr = Popen('kr', stdin=PIPE, stdout=PIPE)
newitem['desc'] = kr.communicate(newitem['desc'])[0]

# Define RSS item output
item = """\
<item>
    <title>%(title)s</title>
    <link>%(link)s</link>
    <description><![CDATA[%(desc)s]]></description>
    <pubDate>%(date)s</pubDate>
    <guid isPermaLink="%(perm)s">%(guid)s</guid>
</item>
"""
rssitem = item % newitem
print "Generated item, adding to RSS feed at: " + timestamp + "\n%s" % rssitem

# Read the RSS feed, find the first item in it
oldfeed = ""
with open (rssfeed, 'r') as feedfile:
    for line in feedfile:
        oldfeed += line

# Generate the new feed and write it
if 'item' in oldfeed:
    regex = "(<item>\n)"
else:
    regex = "(</channel>\n)"

header, delimiter, rest = re.split(regex, oldfeed, 1)
rest = delimiter + rest
header = re.sub(r'(<lastBuildDate>).*(</lastBuildDate>)', r'\1' + timestamp + r'\2', header)
newfeed = "".join((header, rssitem, rest))

with open(rssfeed, 'w') as feedfile:
    feedfile.write(newfeed)

# Mark sourcefile as processed
with open(rsslist, 'w') as sourcefile:
    if hasguid:
        sourcefile.write(''.join((newsource, "\nDate: ", timestamp, '\n\n\n',
                         oldsource)))
        print "Timestamped item."
    else:
        sourcefile.write(''.join((newsource, "\nguid: " , newitem['guid'],
                         "\nDate: ", timestamp, '\n\n\n', oldsource)))
        print "Timestamped item and generated guid."
	#!/usr/bin/env python
	#
	# rssmaker sourcefile \| Kyle Barbour \| Summer 2013
	# Generates an RSS item from kramdown and metadata sourcefiles
	#
	# The sourcefile should be formatted as follows:
	#
	# Title of RSS entry
	# * http://urlforpost.org
	# Text of RSS entry. Text can be in any format supported by Markdown.
	#
	# Newlines are OK, but the next paragraph needs to be similarly indented
	# with a standard four space tab.
	# guid: guid (optional)
	#
	#
	# Title of second, already-processed RSS entry. Titles can extend multiple
	# lines so long as there is no whitespace around them.
	# * http://urlforpost.org
	# Entries are separated by two newlines. rssmaker only processes the first
	# entry, assuming subsequent entries have already been processed, as this
	# one has.
	# Date: RFC 2822 compliant date
	# guid: guid
	#
	# etc.
	#
	# rssmaker finds the link by testing for whether '* ' begins a line. If a guid
	# is not specified, one is created using the date.
	#
	# Directions: Allow default URL, allow reprocessing of entries, allow
	# different destinations and default destinations, choose whether CDATA is
	# necessary or not, check oldfeed for guid. Rewrite using Beautiful Soup?
	#
	##############################################################################

	import os
	import sys
	import re
	import argparse
	from subprocess import Popen, PIPE
	from email.Utils import formatdate
	from datetime import datetime
	from xml.sax.saxutils import escape

	# Options
	parser = argparse.ArgumentParser(description="Generates an RSS item from "
	"kramdown and metadata sourcefiles.")
	parser.add_argument('source', type=str, help="Location of RSS sourcefile.")
	args = parser.parse_args()

	# Default data
	timestamp = formatdate(localtime = True)
	autoguid = datetime.now().strftime("%Y-%m-%d-%H%M%S")
	rsslist = args.source
	rssfeed = "%s.xml" % os.path.splitext(rsslist)[0]

	newitem = {
	'title': '',
	'link': '',
	'desc': '',
	'date': timestamp,
	'guid': autoguid,
	'perm': 'false',
	}
	hasguid = False
	newsource = ""
	oldsource = ""

	# Read RSS data from top of sourcefile
	source = ""
	with open(rsslist, 'r') as sourcefile:
	for line in sourcefile:
	source += line
	newsource, oldsource = source.split('\n\n\n', 1)

	# Check that the item hasn't already been processed
	if re.search('\nDate:', newsource):
	sys.exit("All items have been processed.")

	# Process the new item
	print newsource
	readlines = iter(newsource.splitlines())
	for line in readlines:
	if line.startswith('*'):
	newitem['link'] = line.split('*')[1].strip()
	break
	else:
	newitem['title'] = ' '.join((newitem['title'], line))

	for line in readlines:
	if line.startswith('guid:'):
	newitem['guid'] = line.split('guid:')[1].strip()
	hasguid = True
	if newitem['guid'].startswith('http://'):
	newitem['perm'] = 'true'
	print line
	else:
	newitem['desc'] = "\n".join([newitem['desc'], line.strip()])

	# Clean up extra whitespace
	newitem['title'] = newitem['title'].strip()

	# Run kramdown on the description and escape the resulting XHTML entities
	kr = Popen('kr', stdin=PIPE, stdout=PIPE)
	newitem['desc'] = kr.communicate(newitem['desc'])[0]

	# Define RSS item output
	item = """\
	<item>
	<title>%(title)s</title>
	<link>%(link)s</link>
	<description><![CDATA[%(desc)s]]></description>
	<pubDate>%(date)s</pubDate>
	<guid isPermaLink="%(perm)s">%(guid)s</guid>
	</item>
	"""
	rssitem = item % newitem
	print "Generated item, adding to RSS feed at: " + timestamp + "\n%s" % rssitem

	# Read the RSS feed, find the first item in it
	oldfeed = ""
	with open (rssfeed, 'r') as feedfile:
	for line in feedfile:
	oldfeed += line

	# Generate the new feed and write it
	if 'item' in oldfeed:
	regex = "(<item>\n)"
	else:
	regex = "(</channel>\n)"

	header, delimiter, rest = re.split(regex, oldfeed, 1)
	rest = delimiter + rest
	header = re.sub(r'(<lastBuildDate>).*(</lastBuildDate>)', r'\1' + timestamp + r'\2', header)
	newfeed = "".join((header, rssitem, rest))

	with open(rssfeed, 'w') as feedfile:
	feedfile.write(newfeed)

	# Mark sourcefile as processed
	with open(rsslist, 'w') as sourcefile:
	if hasguid:
	sourcefile.write(''.join((newsource, "\nDate: ", timestamp, '\n\n\n',
	oldsource)))
	print "Timestamped item."
	else:
	sourcefile.write(''.join((newsource, "\nguid: " , newitem['guid'],
	"\nDate: ", timestamp, '\n\n\n', oldsource)))
	print "Timestamped item and generated guid."