Skip to content

Instantly share code, notes, and snippets.

@contrequarte
Forked from xflr6/iceportal.py
Last active November 10, 2023 06:29
Show Gist options
  • Save contrequarte/236a3ad965a4fbd5aefe29b6e2d2201c to your computer and use it in GitHub Desktop.
Save contrequarte/236a3ad965a4fbd5aefe29b6e2d2201c to your computer and use it in GitHub Desktop.
Download all available audio books from ICE portal
"""Download all available audio books from DB ICE Portal."""
import json
import os
import urllib.parse
import urllib.request
BASE = 'http://iceportal.de/api1/rs/'
def load_json(url: str, *, verbose: bool = True):
if verbose:
print(url)
with urllib.request.urlopen(url) as f:
doc = json.load(f)
return doc
def get_page(href: str, *,
base: str = urllib.parse.urljoin(BASE, 'page/')):
url = urllib.parse.urljoin(base, href.lstrip('/'))
return load_json(url, verbose=False)
def retrieve(source, target, *,
base: str = urllib.parse.urljoin(BASE, 'audiobooks/path/')) -> None:
sheet = urllib.parse.urljoin(base, source.lstrip('/'))
path = load_json(sheet)['path']
url = urllib.parse.urljoin(base, path)
urllib.request.urlretrieve(url, filename=target)
audiobooks = get_page('hoerbuecher')
for group in audiobooks['teaserGroups']:
for item in group['items']:
page = get_page(item['navigation']['href'])
content_type = page['contentType']
print('', item['title'], sep='\n')
print(page['contentType'])
dirname = page['title']
# adding serial number for naming podcast files
# to avoid stopping after the first episode has been downloaded
paths = [[p['serialNumber'], p['path']] for p in page['files']]
# fix invalid
dirname = dirname.replace('.', '_')
for remove_char in ('"', '?', '&', '/', '|'):
dirname = dirname.replace(remove_char, '')
dirname, _, _ = dirname.partition(':')
if not os.path.exists(dirname):
os.makedirs(dirname)
for url in paths:
u = url[1].replace('.',"{0}.".format(url[0]))
if content_type == 'podcast':
target = os.path.join(dirname, u.rpartition('/')[2])
else:
target = os.path.join(dirname, url[1].rpartition('/')[2])
print(target)
if not os.path.exists(target):
retrieve(url[1], target)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment