Last active
March 9, 2020 15:51
-
-
Save EnigmaCurry/55c288a95a891409527f38370924cb7f to your computer and use it in GitHub Desktop.
Basic podcast archiving script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Convert podcast feeds into an aria2 download script | |
- Setup feed output directories and URLS | |
- Run: python archive_podcast.py > aria2.txt | |
- Run: aria2c -i aria2.txt | |
Aria2 will download all the episodes and supports resuming of partial downloads | |
""" | |
import feedparser | |
import dateutil.parser | |
import os | |
def feed_to_aria(feed, output_dir, filename_format="{date} - {entry.title}.mp4"): | |
print("# Aria2 download script") | |
print("# download via: ") | |
print("# aria2c -i {name of this file}") | |
try: | |
os.makedirs(output_dir) | |
except: | |
pass | |
rss = feedparser.parse(feed) | |
for entry in reversed(rss.entries): | |
date = dateutil.parser.parse(entry.published).strftime("%Y-%m-%d_%H:%M:%S") | |
path = os.path.join(output_dir, filename_format.format(date=date, entry=entry)) | |
try: | |
enclosure = [l for l in entry.links if l['rel'] == 'enclosure'][0]['href'] | |
except: | |
print("Could not find enclosure link for: {}".format(path)) | |
raise | |
print(enclosure) | |
print(" out={}".format(path)) | |
print(" continue") | |
if __name__ == "__main__": | |
feeds = [ | |
("path/to/feed1","http://example.com/feed/1"), | |
("path/to/feed2","http://example.com/feed/2"), | |
("path/to/feed3","http://example.com/feed/3") | |
] | |
for name, feed_url in feeds: | |
feed_to_aria(feed_url, name) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment