Download podcast episodes from RSS feed
import feedparser
import argparse
import requests
import sys
from rich.console import Console
from os import listdir
from os.path import isfile, join
from pathlib import Path
from pathlib import PurePath
# Globals
podcasts = [
console = Console()
# References
def parse_rss_url(rss_url):
''' Parse RSS feed URL and return custom FeedParser Dict object '''
feed = feedparser.parse(rss_url)
return feed
def format_filenames(title):
''' Replace unnecessary characters from Episode Title and append .mp3 to downloaded file '''
# What is the title from the RSS feed metadata
# console.print(f'[*] RSS feed episode title is: [blue]{title}[/blue]', style="bold green")
# Add file extension for Windows
tmp_filename = title + '.mp3'
# Replace spaces with _ first
tmp_filename = tmp_filename.replace(' ', '_')
# Replace characters from title before file creation
tmp_filename = tmp_filename.replace(':', '')
tmp_filename = tmp_filename.replace('-', '')
tmp_filename = tmp_filename.replace('?', '')
tmp_filename = tmp_filename.replace('+', '')
tmp_filename = tmp_filename.replace('@', '')
tmp_filename = tmp_filename.replace(',', '')
# Added to remove | character from EFF podcast title
tmp_filename = tmp_filename.replace('|', '')
filename = tmp_filename.replace('"', '')
return filename
def map_files(directory):
''' Accept a directory and list all files underneath it '''
# :input: Path absolute path object
# :returns: List of filenames
file_list = []
for child in directory.iterdir():
return file_list
def download_episodes(feed_dict, existing_files, podcast_absolute_path, warnings):
''' Download all URLs to destination folder '''
# .entries is easy way to access all individual episodes in an RSS feed
for episode in feed_dict.entries:
# .links usually has multiple links, a text/html link with description and a audio/mpeg with actual audio file
for link in episode.links:
if link['type'] == 'audio/mpeg':
# Format the filename by stripping non-compliant characters
filename = format_filenames(episode.title)
# Generate joined path for output
download_path = podcast_absolute_path.joinpath(filename)
# If we have warnings enabled and file already downloaded, write to console
if filename in existing_files and warnings:
console.print(f'[-] WARN - Episode already downloaded [green]{filename}[/green] skipping...', style="bold red")
# If file isn't in our existing list, download it and send info to console
elif filename not in existing_files:
console.print(f'[+] INFO - New episode, downloading [blue]{filename}[/blue]', style="bold green")
response = requests.get(link['href'])
with open(download_path, 'wb') as f:
def check_directory(directory):
''' Verify the directory provided exists '''
if directory.is_dir():
return directory
return None
def make_directory(absolute_path):
''' Create podcast directory '''
except FileNotFoundError:
console.print(f'[!] Parent directory was not found for provided folder name: {name}', style="bold yellow")
return None
def main():
''' Download all podcasts episodes not already downloaded '''
parser = argparse.ArgumentParser()
parser.add_argument("--directory", type=Path, action="store", dest="parent_directory", help="Parent directory for your local Podcast files")
parser.add_argument("--warnings", action="store_true", dest="warnings", help="Enable warning messages")
args = parser.parse_args()
parent_directory = args.parent_directory
warnings = args.warnings
if not parent_directory:
existing_directory = check_directory(parent_directory)
if existing_directory is None:
console.print(f'[!] Directory supplied is not a valid existing directory', style="bold yellow")
# Iterate over all of the defined podcast RSS URLs
for podcast in podcasts:
# Process the RSS feed and get a list of entries for the podcast
feed = parse_rss_url(podcast)
# Define a directory name as a string from the feed title
podcast_directory_name = feed['feed']['title']
# Create a Pathlib path object for the directory (not an absolute path yet)
podcast_directory = Path(podcast_directory_name)
# Join the podcast directory name with parent provided by user
podcast_absolute_path = parent_directory.joinpath(podcast_directory)
# Check if the new podcast directory already exists or not
existing = check_directory(podcast_absolute_path)
# If the podcast directory does not exist, create it
if existing is None:
# Otherwise generate a list of all the files in the existing directory
if existing:
existing_files = map_files(podcast_absolute_path)
download_episodes(feed, existing_files, podcast_absolute_path, warnings)
if __name__ == "__main__":
