Skip to content

Instantly share code, notes, and snippets.

Last active Jul 31, 2021
What would you like to do?
Scrape A Prairie Home Companion website for MP3s.
#!env python3
from bs4 import BeautifulSoup;
import urllib3;
import urllib.parse as urlparse;
import time;
import random;
class Main:
def __init__(self):
startURL = "";
self.http = urllib3.PoolManager();
self.urls = [startURL];
self.urls.extend(f"{index}.html" for index in range(2, 115));
def getAllAudio(self):
for url in self.urls:
for show in self.findAllShowPages(url):
for clip in self.getAudioPlayers(show):
yield clip;
def findAllShowPages(self, url):
response = self.http.request("GET", url);
document =;
soup = BeautifulSoup(document, 'html.parser');
return [urlparse.urljoin(url, link.get('href')) for link in soup.find_all(name='a', attrs={'class' : 'mod_header'}, recursive=True)];
def getAudioPlayers(self, showPage):
showresponse = self.http.request("GET", showPage);
showdocument =;
showsoup = BeautifulSoup(showdocument, 'html.parser');
return [ audio.get('data-src') for audio in showsoup.find_all(name='div', attrs={'data-playlist': "#story-playlist"}, recursive=True) ];
for x in Main().getAllAudio():
# Wait a little bit so that we don't get banned.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment