JohnEarnest/bam.py

## bam.py
#!/usr/bin/python

# A script for fetching the latest episode of
# the podcast "My Brother, My Brother and Me" automagically

# every podcast hosts the XML RSS feed somewhere different,
# but feedburner appears to be a fairly popular aggregator.
# unfortunately, there's no way to just fetch the most N recent entries;
# for long-running podcasts the rss blob can get big.
print 'fetching rss feed...'
import httplib
c = httplib.HTTPConnection('feeds.feedburner.com')
c.request('GET','/mbmbam?format=xml')

# see https://cyber.harvard.edu/rss/rss.html
# we want the most-recent 'item' in the 'channel' element.
print 'parsing rss payload...'
import xml.etree.ElementTree as tree, re
root   = tree.fromstring(c.getresponse().read())
newest = root.find('channel').find('item')
title  = newest.find('title').text
desc   = re.sub('</?p[^>]*>','',newest.find('description').text) # strip HTML tags

# url may contain trailing arguments like below, so strip anything after a '?':
# http://traffic.libsyn.com/mbmbam/MyBrotherMyBrotherandMe459.mp3?dest-id=18443
url    = re.sub('\?.*$','',newest.find('enclosure').attrib['url'])

# pass the buck to wget to actually download the file!
print '\n', title, '\n', '-'*len(title), '\n', desc, '\n'
import os
os.system('wget --no-clobber '+url)
print 'done!'
	#!/usr/bin/python

	# A script for fetching the latest episode of
	# the podcast "My Brother, My Brother and Me" automagically

	# every podcast hosts the XML RSS feed somewhere different,
	# but feedburner appears to be a fairly popular aggregator.
	# unfortunately, there's no way to just fetch the most N recent entries;
	# for long-running podcasts the rss blob can get big.
	print 'fetching rss feed...'
	import httplib
	c = httplib.HTTPConnection('feeds.feedburner.com')
	c.request('GET','/mbmbam?format=xml')

	# see https://cyber.harvard.edu/rss/rss.html
	# we want the most-recent 'item' in the 'channel' element.
	print 'parsing rss payload...'
	import xml.etree.ElementTree as tree, re
	root = tree.fromstring(c.getresponse().read())
	newest = root.find('channel').find('item')
	title = newest.find('title').text
	desc = re.sub('</?p[^>]*>','',newest.find('description').text) # strip HTML tags

	# url may contain trailing arguments like below, so strip anything after a '?':
	# http://traffic.libsyn.com/mbmbam/MyBrotherMyBrotherandMe459.mp3?dest-id=18443
	url = re.sub('\?.*$','',newest.find('enclosure').attrib['url'])

	# pass the buck to wget to actually download the file!
	print '\n', title, '\n', '-'*len(title), '\n', desc, '\n'
	import os
	os.system('wget --no-clobber '+url)
	print 'done!'