EnigmaCurry/archive_podcast.py

## archive_podcast.py
"""Convert podcast feeds into an aria2 download script

 - Setup feed output directories and URLS
 - Run: python archive_podcast.py > aria2.txt
 - Run: aria2c -i aria2.txt

Aria2 will download all the episodes and supports resuming of partial downloads

"""
import feedparser
import dateutil.parser
import os

def feed_to_aria(feed, output_dir, filename_format="{date} - {entry.title}.mp4"):
    print("# Aria2 download script")
    print("# download via: ")
    print("#    aria2c -i {name of this file}")
    try:
        os.makedirs(output_dir)
    except:
        pass
    rss = feedparser.parse(feed)
    for entry in reversed(rss.entries):
        date = dateutil.parser.parse(entry.published).strftime("%Y-%m-%d_%H:%M:%S")
        path = os.path.join(output_dir, filename_format.format(date=date, entry=entry))
        try:
            enclosure = [l for l in entry.links if l['rel'] == 'enclosure'][0]['href']
        except:
            print("Could not find enclosure link for: {}".format(path))
            raise
        print(enclosure)
        print("  out={}".format(path))
        print("  continue")

if __name__ == "__main__":
    feeds = [
        ("path/to/feed1","http://example.com/feed/1"),
        ("path/to/feed2","http://example.com/feed/2"),
        ("path/to/feed3","http://example.com/feed/3")
    ]

    for name, feed_url in feeds:
        feed_to_aria(feed_url, name)
	"""Convert podcast feeds into an aria2 download script

	- Setup feed output directories and URLS
	- Run: python archive_podcast.py > aria2.txt
	- Run: aria2c -i aria2.txt

	Aria2 will download all the episodes and supports resuming of partial downloads

	"""
	import feedparser
	import dateutil.parser
	import os

	def feed_to_aria(feed, output_dir, filename_format="{date} - {entry.title}.mp4"):
	print("# Aria2 download script")
	print("# download via: ")
	print("# aria2c -i {name of this file}")
	try:
	os.makedirs(output_dir)
	except:
	pass
	rss = feedparser.parse(feed)
	for entry in reversed(rss.entries):
	date = dateutil.parser.parse(entry.published).strftime("%Y-%m-%d_%H:%M:%S")
	path = os.path.join(output_dir, filename_format.format(date=date, entry=entry))
	try:
	enclosure = [l for l in entry.links if l['rel'] == 'enclosure'][0]['href']
	except:
	print("Could not find enclosure link for: {}".format(path))
	raise
	print(enclosure)
	print(" out={}".format(path))
	print(" continue")

	if __name__ == "__main__":
	feeds = [
	("path/to/feed1","http://example.com/feed/1"),
	("path/to/feed2","http://example.com/feed/2"),
	("path/to/feed3","http://example.com/feed/3")
	]

	for name, feed_url in feeds:
	feed_to_aria(feed_url, name)