Skip to content

Instantly share code, notes, and snippets.

@andering
Last active January 12, 2020 02:33
Show Gist options
  • Save andering/2136a7266bb02e7029b94ae77d7a983e to your computer and use it in GitHub Desktop.
Save andering/2136a7266bb02e7029b94ae77d7a983e to your computer and use it in GitHub Desktop.
from urllib import request
from bs4 import BeautifulSoup
from pprint import pprint
from pathlib import Path
import re, os
regexp_get_id = re.compile("[0-9]{8}")
regexp_remove_non_alphanumeric = re.compile(r'[^a-zA-Z0-9]+')
quote_page = 'http://feeds.gty.org/gtyradiobroadcast'
page = request.urlopen(quote_page)
soup = BeautifulSoup(page,'lxml')
list_mp3 = soup.find_all('item')
for one_mp3 in list_mp3:
title = one_mp3.title.text.lower()
title = regexp_remove_non_alphanumeric.sub('_',title)
link = one_mp3.find('feedburner:origlink').text
id = regexp_get_id.search(link).group(0)
filename = id + '_' + title + '.mp3'
filepath = os.path.join(Path.home(), 'PODCAST', filename)
print(link + ' -> ' + filepath)
request.urlretrieve(link,filepath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment