Skip to content

Instantly share code, notes, and snippets.

@noman-land
Last active September 1, 2023 05:32
Show Gist options
  • Save noman-land/a27b48f82acf3e17964c90e3422d03ca to your computer and use it in GitHub Desktop.
Save noman-land/a27b48f82acf3e17964c90e3422d03ca to your computer and use it in GitHub Desktop.
Download all No Such Thing As A Fish episodes to current folder.
####
#
# Download all No Such Thing As A Fish episodes to current folder
#
# How to use: `python get_episodes.py`
#
# THIS WILL DOWNLOAD OVER 20 GIGABYTES OF MP3s
#
####
from datetime import datetime
from pathlib import Path
import feedparser
import urllib.request
import sys
def now():
return datetime.now().strftime('%H:%M:%S')
def log(episode_num, *msg):
print(f'-- {now()} -- Episode {episode_num} --', *msg)
def is_episode(episode):
return True if getattr(episode,'itunes_episode', None) else False
def is_audio(media):
return media['medium'] == 'audio'
def make_audio_file_path(episode_num):
return f'{episode_num}.mp3'
def get_audio_url(episode):
audio, *_ = filter(is_audio, episode['media_content'])
return audio['url']
def get_episode_num(episode):
return int(episode['itunes_episode'])
def download_episode_audio(episode):
episode_num = get_episode_num(episode)
audio_url = get_audio_url(episode)
audio_path = make_audio_file_path(episode_num)
if Path(audio_path).exists():
log(episode_num, 'Already downloaded audio. Skipping.')
else:
log(episode_num, f'Downloading audio at {audio_url}.')
urllib.request.urlretrieve(audio_url, audio_path)
opener = urllib.request.build_opener()
opener.addheaders = [(
'User-Agent',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'
)]
rss_feed_url = 'https://audioboom.com/channels/2399216.rss'
def get_rss_episodes():
return filter(lambda e: is_episode(e), reversed(feedparser.parse(rss_feed_url)['entries']))
# All of this is idempotent
# Run it as many times as you like
# It will fill in any gaps
if __name__ == '__main__':
try:
# Download all episodes to current folder
for episode in get_rss_episodes():
download_episode_audio(episode)
except KeyboardInterrupt:
print('\n[CTRL + C DETECTED] Script interrupted. Cleaning up.')
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment