ayyybe/reconstruct-epub.py

## reconstruct-epub.py
#!/usr/bin/env python3
import os
import shutil
import zipfile
import tempfile
try:
    import readline # input gets limited to 1024 characters for some reason if this isn't imported (macos only?)
except ImportError:
    pass
import urllib.request
import xml.etree.ElementTree as ET
from threading import Thread

# EPUB spec:
# https://www.w3.org/publishing/epub3/epub-spec.html

# this script is not fully spec compliant and does not support epubs with multiple renditions/root files, and also doesn't check any optional META-INF stuff (encryption.xml, manifest.xml, metadata.xml, rights.xml, signatures.xml)
# that being said, i have yet to find any epubs that actually use any of these features

def dl(path):
	print('Downloading ' + path)
	url = os.path.join(root, path)
	staging_path = os.path.join(staging, path)
	os.makedirs(os.path.dirname(staging_path), exist_ok=True)
	urllib.request.urlretrieve(url, staging_path)
	return staging_path

if __name__ == '__main__':
	root = input('EPUB Root URL: ') or 'https://education.wiley.com/content/Hughes_Hallett_Calculus_7e/ebook/epub/9781119320494/'
	cookie = input('Cookie: ') or ''
	dest = os.path.abspath(input('Destination filename: ') or 'book.epub')
	staging = tempfile.mkdtemp()

	opener = urllib.request.build_opener()
	opener.addheaders = [('cookie', cookie)]
	urllib.request.install_opener(opener)

	print('\n====================\n')

	print('EPUB Root URL: ' + root)
	print('Cookie: ' + cookie)
	print('Destination filename: ' + dest)
	print('Staging directory: ' + staging)

	input('\nPress any key to begin download')
	print('')

	# required mimetype file
	dl('mimetype')

	# required container.xml file, also contains path to rendition/rootfile
	container = ET.parse(dl('META-INF/container.xml')).getroot()
	rootfile_path = container[0][0].attrib['full-path']
	root_dir = os.path.dirname(rootfile_path)
	print('Found rootfile: ' + rootfile_path)

	# download rootfile & rip all linked resources
	rootfile = ET.parse(dl(rootfile_path)).getroot()
	threads = []
	for el in rootfile.iter():
		if el.tag.split('}', 1)[1] == 'item':
			url = os.path.join(root_dir, el.attrib['href'])
			thread = Thread(target=dl, args=(url,))
			threads.append(thread)
			thread.start()

	for thread in threads:
		thread.join()

	# zip everything up
	print('Creating epub...')
	shutil.make_archive(dest, 'zip', staging)
	os.rename(dest + '.zip', os.path.basename(dest))

	# delete staging dir
	print('Cleaning up...')
	shutil.rmtree(staging)

	print('\nDone! EPUB has been reconstructed at ' + dest)
	#!/usr/bin/env python3
	import os
	import shutil
	import zipfile
	import tempfile
	try:
	import readline # input gets limited to 1024 characters for some reason if this isn't imported (macos only?)
	except ImportError:
	pass
	import urllib.request
	import xml.etree.ElementTree as ET
	from threading import Thread

	# EPUB spec:
	# https://www.w3.org/publishing/epub3/epub-spec.html

	# this script is not fully spec compliant and does not support epubs with multiple renditions/root files, and also doesn't check any optional META-INF stuff (encryption.xml, manifest.xml, metadata.xml, rights.xml, signatures.xml)
	# that being said, i have yet to find any epubs that actually use any of these features

	def dl(path):
	print('Downloading ' + path)
	url = os.path.join(root, path)
	staging_path = os.path.join(staging, path)
	os.makedirs(os.path.dirname(staging_path), exist_ok=True)
	urllib.request.urlretrieve(url, staging_path)
	return staging_path

	if __name__ == '__main__':
	root = input('EPUB Root URL: ') or 'https://education.wiley.com/content/Hughes_Hallett_Calculus_7e/ebook/epub/9781119320494/'
	cookie = input('Cookie: ') or ''
	dest = os.path.abspath(input('Destination filename: ') or 'book.epub')
	staging = tempfile.mkdtemp()

	opener = urllib.request.build_opener()
	opener.addheaders = [('cookie', cookie)]
	urllib.request.install_opener(opener)

	print('\n====================\n')

	print('EPUB Root URL: ' + root)
	print('Cookie: ' + cookie)
	print('Destination filename: ' + dest)
	print('Staging directory: ' + staging)

	input('\nPress any key to begin download')
	print('')

	# required mimetype file
	dl('mimetype')

	# required container.xml file, also contains path to rendition/rootfile
	container = ET.parse(dl('META-INF/container.xml')).getroot()
	rootfile_path = container[0][0].attrib['full-path']
	root_dir = os.path.dirname(rootfile_path)
	print('Found rootfile: ' + rootfile_path)

	# download rootfile & rip all linked resources
	rootfile = ET.parse(dl(rootfile_path)).getroot()
	threads = []
	for el in rootfile.iter():
	if el.tag.split('}', 1)[1] == 'item':
	url = os.path.join(root_dir, el.attrib['href'])
	thread = Thread(target=dl, args=(url,))
	threads.append(thread)
	thread.start()

	for thread in threads:
	thread.join()

	# zip everything up
	print('Creating epub...')
	shutil.make_archive(dest, 'zip', staging)
	os.rename(dest + '.zip', os.path.basename(dest))

	# delete staging dir
	print('Cleaning up...')
	shutil.rmtree(staging)

	print('\nDone! EPUB has been reconstructed at ' + dest)