ceres-c/mlol_audiobook_downloader.py

## mlol_audiobook_downloader.py
#! /bin/python3

import os
from urllib.parse import unquote

import httpx
import lxml.html

unit_id = 100009140 # Numeric unit id, can be found in URL as "id" parameter
cookies = {'ASP.NET_SessionId': ''} # Alphanumeric cookie
current_path = os.path.dirname(__file__)

def create_download_folder():
	if not os.path.exists(os.path.join(current_path, 'downloaded')):
		os.mkdir(os.path.join(current_path, 'downloaded'))

def get_chapters(cookies, unit_id):
	url = 'https://bergamo.medialibrary.it/media/mmPlayer.aspx'
	params = (
		('unid', unit_id),
		('type', '720'), # Can't be modified, otherwise will get some error about javascript not being supported
		('sbox', 1), # Same as above
	)
	res = httpx.get(url, cookies=cookies, params=params)
	if not res:
		raise Exception(f"Couldn't download chapter data, got HTTP error {res.status_code}")
	root = lxml.html.fromstring(res.text)
	chapters = root.xpath('//div[@class="trackscontainer hidden"]/ol/*/a')

	if len(chapters) == 0:
		raise Exception('Did not find any chapter. Is the unit id correct?')

	def get_data_src(lxml_element):
		return lxml_element.get('data-src')

	return list(map(get_data_src, chapters))

def download_chapter(cookies, index, aspx_url):
	url = f'https://bergamo.medialibrary.it/media/{aspx_url}'
	res = httpx.get(url, cookies=cookies) # Suboptimal download in ram, but can safely assume (TM) files are small enough
	filename = unquote(str(res.url)[str(res.url).rfind('/')+1:])
	local_path = os.path.join(
		current_path,
		'downloaded',
		'{} - {}'.format(index, filename.replace('..', '').replace('/', '').replace('\\', '')), # Poor man's sanitization
	)

	with open(local_path, 'wb') as out_file:
		out_file.write(res.content)

chapters = get_chapters(cookies, unit_id)
create_download_folder()
for index, chap_url in enumerate(chapters):
	print(f'Downloading file {index + 1}/{len(chapters)}', end='\r')
	download_chapter(cookies, index + 1, chap_url)
	#! /bin/python3

	import os
	from urllib.parse import unquote

	import httpx
	import lxml.html

	unit_id = 100009140 # Numeric unit id, can be found in URL as "id" parameter
	cookies = {'ASP.NET_SessionId': ''} # Alphanumeric cookie
	current_path = os.path.dirname(__file__)

	def create_download_folder():
	if not os.path.exists(os.path.join(current_path, 'downloaded')):
	os.mkdir(os.path.join(current_path, 'downloaded'))

	def get_chapters(cookies, unit_id):
	url = 'https://bergamo.medialibrary.it/media/mmPlayer.aspx'
	params = (
	('unid', unit_id),
	('type', '720'), # Can't be modified, otherwise will get some error about javascript not being supported
	('sbox', 1), # Same as above
	)
	res = httpx.get(url, cookies=cookies, params=params)
	if not res:
	raise Exception(f"Couldn't download chapter data, got HTTP error {res.status_code}")
	root = lxml.html.fromstring(res.text)
	chapters = root.xpath('//div[@class="trackscontainer hidden"]/ol/*/a')

	if len(chapters) == 0:
	raise Exception('Did not find any chapter. Is the unit id correct?')

	def get_data_src(lxml_element):
	return lxml_element.get('data-src')

	return list(map(get_data_src, chapters))

	def download_chapter(cookies, index, aspx_url):
	url = f'https://bergamo.medialibrary.it/media/{aspx_url}'
	res = httpx.get(url, cookies=cookies) # Suboptimal download in ram, but can safely assume (TM) files are small enough
	filename = unquote(str(res.url)[str(res.url).rfind('/')+1:])
	local_path = os.path.join(
	current_path,
	'downloaded',
	'{} - {}'.format(index, filename.replace('..', '').replace('/', '').replace('\\', '')), # Poor man's sanitization
	)

	with open(local_path, 'wb') as out_file:
	out_file.write(res.content)

	chapters = get_chapters(cookies, unit_id)
	create_download_folder()
	for index, chap_url in enumerate(chapters):
	print(f'Downloading file {index + 1}/{len(chapters)}', end='\r')
	download_chapter(cookies, index + 1, chap_url)