Create a gist now

Instantly share code, notes, and snippets.

CBC Radio 2 broadcast log history download Python script
#!/usr/bin/env python
# Python 2.7.x
import argparse
import csv
import datetime
import sys
from collections import defaultdict
# Requires requests and beautifulsoup4: pip install beautifulsoup4 requests
import requests
from bs4 import BeautifulSoup
# Constants
date_format = '%Y-%m-%d'
default_days_back = 7
cbc_radio_2_broadcast_logs_url = ''
parser = argparse.ArgumentParser(description='Search CBC Radio 2 Broadcast logs')
parser.add_argument('--search-artist', default='', dest='search_artist', help='Artist to filter by; if not supplied, all entries will be shown.')
parser.add_argument('--start', help='Start date. If not supplied, will search back {} days from the end date'.format(default_days_back))
parser.add_argument('--end', help='End date. If not supplied, the current date is used.')
args = parser.parse_args()
# Use datetime since only it has a .strptime() class method.
end_date = datetime.datetime.strptime(args.end, date_format) if args.end else
start_date = datetime.datetime.strptime(args.start, date_format) if args.start else (end_date - datetime.timedelta(default_days_back))
if start_date > end_date:
raise ValueError('Start={} cannot be after end={}'.format(start_date, end_date))
# CSV output to stdout.
print '# Results from {} to {}.'.format(start_date.strftime(date_format), end_date.strftime(date_format))
fields = 'date,time,label,artist,composer,album,title,duration'.split(',')
csv_writer = csv.DictWriter(sys.stdout, fieldnames=fields)
date = start_date
while (end_date - date).days >= 0:
r = requests.get(cbc_radio_2_broadcast_logs_url, params={'broadcastdate': date.strftime(date_format)})
soup = BeautifulSoup(r.text)
# Could also use: soup.find_all('div', class_='logShowEntry')
entries ='div.logShowEntry')
for entry in entries:
# Entry attributes:
# Assuming equal numbers of these and that the order matches.
dts = entry.find_all('dt')
dds = entry.find_all('dd')
# NOTE: There is not always a 'composer' entry, so assume that not all
# of the dt/dd entries are available. Convert to map.
# ALSO: "Choral Concert" entries typically have many more attributes that
# will be ignored.
# Use a defaultdict in case not all attributes are available.
attributes = defaultdict(str)
for i in range(len(dts)):
# Sometimes, an attribute is empty, so just skip over it. (label is an example)
if dts[i].string is None or dds[i].string is None:
attributes[dts[i].string.strip()] = dds[i].string.strip()
artist = attributes['artist']
# Data not in attributes: Assumptions about where data will be. May break over time.
attributes['date'] = date.strftime(date_format)
attributes['time'] = entry.find('div', class_='logEntryTime').string.strip()
attributes['title'] = entry.find('h3').string.strip()
# Filter attributes to only the defined field names.
# Encode values as UTF-8 since CSV library doesn't support Unicode.
attributes = {k:v.encode('utf-8') for (k, v) in attributes.iteritems() if k in fields}
# Normalize (for case insensitivity) and do a simple substring match.
if artist.lower().find(args.search_artist.strip().lower()) != -1:
date = date + datetime.timedelta(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment