modeco80/ia_collection_dl.py

## ia_collection_dl.py
#!/usr/bin/env python3

# Script to download a collection from archive.org using aria2c.
# Useful for if torrents aren't an option, but can be used on collections
# which provide them as well.

import xml.etree.ElementTree as ElementTree
import tempfile
import requests
import subprocess
import argparse
import urllib.parse

class IACollection():
    def __init__(self, collection: str):
        self.files = []
        self.collection = collection

    def IADownloadUrl(self, path):
        return f'https://archive.org/download/{self.collection}/{urllib.parse.quote(path)}'

    def PopulateFileList(self):
        xmlRes = requests.get(self.IADownloadUrl(f'{self.collection}_files.xml'))
        xmlRoot = ElementTree.fromstring(xmlRes.text)
        for xmlChild in xmlRoot:
            if xmlChild.tag == 'file':
                self.files.append({
                    'url': self.IADownloadUrl(xmlChild.attrib['name']),
                    'path': xmlChild.attrib['name']
                })

    def Download(self):
        with tempfile.NamedTemporaryFile() as tmpList:
            for file in self.files:
                tmpList.write(f'{file["url"]}\n out={file["path"]}\n'.encode())
            tmpList.flush()
            # Execute aria2c with the temporary list file. "--follow-torrent=false" is intended to stop aria2
            # from using the download list to start downloading from the torrent, and "--continue=true" allows continuation of partial downloads.
            try:
                aria2CommandLine = f'aria2c -x 6 --follow-torrent=false --continue=true --input-file={tmpList.name}'
                print(f'Executing aria2c with "{aria2CommandLine}"')
                subprocess.run(aria2CommandLine, shell=True, capture_output=False, check=True)
            except:
                print('aria2c failed to run :(')

def main():
    parser = argparse.ArgumentParser(description='Download a Internet Archive collection using aria2c.')
    parser.add_argument('--collection', dest='collection', required=True, type=str, help='Collection to download from. Duh!')
    args = parser.parse_args()
    collection = IACollection(args.collection)
    collection.PopulateFileList()
    collection.Download()

if __name__ == "__main__":
    main()
	#!/usr/bin/env python3

	# Script to download a collection from archive.org using aria2c.
	# Useful for if torrents aren't an option, but can be used on collections
	# which provide them as well.

	import xml.etree.ElementTree as ElementTree
	import tempfile
	import requests
	import subprocess
	import argparse
	import urllib.parse

	class IACollection():
	def __init__(self, collection: str):
	self.files = []
	self.collection = collection

	def IADownloadUrl(self, path):
	return f'https://archive.org/download/{self.collection}/{urllib.parse.quote(path)}'

	def PopulateFileList(self):
	xmlRes = requests.get(self.IADownloadUrl(f'{self.collection}_files.xml'))
	xmlRoot = ElementTree.fromstring(xmlRes.text)
	for xmlChild in xmlRoot:
	if xmlChild.tag == 'file':
	self.files.append({
	'url': self.IADownloadUrl(xmlChild.attrib['name']),
	'path': xmlChild.attrib['name']
	})

	def Download(self):
	with tempfile.NamedTemporaryFile() as tmpList:
	for file in self.files:
	tmpList.write(f'{file["url"]}\n out={file["path"]}\n'.encode())
	tmpList.flush()
	# Execute aria2c with the temporary list file. "--follow-torrent=false" is intended to stop aria2
	# from using the download list to start downloading from the torrent, and "--continue=true" allows continuation of partial downloads.
	try:
	aria2CommandLine = f'aria2c -x 6 --follow-torrent=false --continue=true --input-file={tmpList.name}'
	print(f'Executing aria2c with "{aria2CommandLine}"')
	subprocess.run(aria2CommandLine, shell=True, capture_output=False, check=True)
	except:
	print('aria2c failed to run :(')

	def main():
	parser = argparse.ArgumentParser(description='Download a Internet Archive collection using aria2c.')
	parser.add_argument('--collection', dest='collection', required=True, type=str, help='Collection to download from. Duh!')
	args = parser.parse_args()
	collection = IACollection(args.collection)
	collection.PopulateFileList()
	collection.Download()

	if __name__ == "__main__":
	main()