contrequarte/iceportal.py

## iceportal.py
"""Download all available audio books from DB ICE Portal."""

import json
import os
import urllib.parse
import urllib.request

BASE = 'http://iceportal.de/api1/rs/'


def load_json(url: str, *, verbose: bool = True):
    if verbose:
        print(url)
    with urllib.request.urlopen(url) as f:
        doc = json.load(f)
    return doc


def get_page(href: str, *,
             base: str = urllib.parse.urljoin(BASE, 'page/')):
    url = urllib.parse.urljoin(base, href.lstrip('/'))
    return load_json(url, verbose=False)


def retrieve(source, target, *,
             base: str = urllib.parse.urljoin(BASE, 'audiobooks/path/')) -> None:
    sheet = urllib.parse.urljoin(base, source.lstrip('/'))
    path = load_json(sheet)['path']
    url = urllib.parse.urljoin(base, path)
    urllib.request.urlretrieve(url, filename=target)


audiobooks = get_page('hoerbuecher')


for group in audiobooks['teaserGroups']:
    for item in group['items']:
        page = get_page(item['navigation']['href'])
        content_type = page['contentType']
        print('', item['title'], sep='\n')
        print(page['contentType'])
        dirname = page['title']
        # adding serial number for naming podcast files
        # to avoid stopping after the first episode has been downloaded
        paths = [[p['serialNumber'], p['path']] for p in page['files']]
        # fix invalid
        dirname = dirname.replace('.', '_')
        for remove_char in ('"', '?', '&', '/', '|'):
            dirname = dirname.replace(remove_char, '')
        dirname, _, _ = dirname.partition(':')

        if not os.path.exists(dirname):
            os.makedirs(dirname)

        for url in paths:
            u = url[1].replace('.',"{0}.".format(url[0]))

            if content_type == 'podcast':
                target = os.path.join(dirname, u.rpartition('/')[2])
            else:
                target = os.path.join(dirname, url[1].rpartition('/')[2])
            print(target)
            if not os.path.exists(target):
                retrieve(url[1], target)
	"""Download all available audio books from DB ICE Portal."""

	import json
	import os
	import urllib.parse
	import urllib.request

	BASE = 'http://iceportal.de/api1/rs/'


	def load_json(url: str, *, verbose: bool = True):
	if verbose:
	print(url)
	with urllib.request.urlopen(url) as f:
	doc = json.load(f)
	return doc


	def get_page(href: str, *,
	base: str = urllib.parse.urljoin(BASE, 'page/')):
	url = urllib.parse.urljoin(base, href.lstrip('/'))
	return load_json(url, verbose=False)


	def retrieve(source, target, *,
	base: str = urllib.parse.urljoin(BASE, 'audiobooks/path/')) -> None:
	sheet = urllib.parse.urljoin(base, source.lstrip('/'))
	path = load_json(sheet)['path']
	url = urllib.parse.urljoin(base, path)
	urllib.request.urlretrieve(url, filename=target)


	audiobooks = get_page('hoerbuecher')



	for group in audiobooks['teaserGroups']:
	for item in group['items']:
	page = get_page(item['navigation']['href'])
	content_type = page['contentType']
	print('', item['title'], sep='\n')
	print(page['contentType'])
	dirname = page['title']
	# adding serial number for naming podcast files
	# to avoid stopping after the first episode has been downloaded
	paths = [[p['serialNumber'], p['path']] for p in page['files']]
	# fix invalid
	dirname = dirname.replace('.', '_')
	for remove_char in ('"', '?', '&', '/', '\|'):
	dirname = dirname.replace(remove_char, '')
	dirname, _, _ = dirname.partition(':')

	if not os.path.exists(dirname):
	os.makedirs(dirname)

	for url in paths:
	u = url[1].replace('.',"{0}.".format(url[0]))

	if content_type == 'podcast':
	target = os.path.join(dirname, u.rpartition('/')[2])
	else:
	target = os.path.join(dirname, url[1].rpartition('/')[2])
	print(target)
	if not os.path.exists(target):
	retrieve(url[1], target)