Minimalistic Python3 command line download manager for uploaded.net files, uses asyncio for concurrent download tasks. Supports DLCs
#!/usr/bin/env python3 | |
import argparse | |
import asyncio | |
import requests | |
import os | |
import sys | |
import re | |
import time | |
import signal | |
import configparser | |
import subprocess | |
import tempfile | |
home = os.getenv('HOME') | |
config = configparser.ConfigParser() | |
config.read(os.path.join(home, '.downer.conf')) | |
try: | |
login_data = { | |
'id': config['UPLOADED']['id'], | |
'pw': config['UPLOADED']['pw'], | |
} | |
except KeyError: | |
login_data = None | |
try: | |
download_dir = config['UPLOADED']['dir'] | |
except KeyError: | |
download_dir = os.path.join(home, 'Downloads') | |
colors = { | |
'resume': '\033[93m', | |
'working': '\033[94m', | |
'error': '\033[91m', | |
'done': '\033[92m', | |
'new': '\033[93m', | |
'end': '\033[0m', | |
} | |
progress = {} | |
chunk_size = 4096 | |
running = 0 | |
workers = 1 | |
signal.signal(signal.SIGINT, lambda *_: sys.exit(0)) # die with style... not 100% working yet.. investigate | |
session = requests.Session() | |
url_pattern = r'https?://(uploaded\.net/file|ul\.to)/.+' | |
downloads = [] | |
def current_millis(): | |
return int(round(time.time() * 1000)) | |
def human_readable_size(num, suffix='B'): | |
for unit in ('', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi'): | |
if abs(num) < 1024.0: | |
return "%3.1f%s%s" % (num, unit, suffix) | |
num /= 1024.0 | |
return "%.1f%s%s" % (num, 'Yi', suffix) | |
def login(): | |
"""start a session and login using the login credentials""" | |
r = session.post('https://uploaded.net/io/login', data=login_data, headers={'Referer': 'https://uploaded.net/'}) | |
if r.text != '{loc:"me"}': | |
exit('Invalid login') | |
def resolve_dlc(dlc): | |
"""resolve all urls for a dlc container""" | |
r = requests.post('http://dcrypt.it/decrypt/paste', data={'content': dlc}) | |
json = r.json() | |
if 'success' not in json: | |
exit('error resolving DLC') | |
return [link for link in r.json()['success']['links'] if re.match(url_pattern, link)] | |
def resolve_link(in_url): | |
"""get the real download url for the public link""" | |
return in_url # FIXME not needed anymore, allow_redirects fixed it | |
time.sleep(0.1) | |
r = session.get(in_url, allow_redirects=True) | |
try: | |
out_url = re.findall('<form method="post" action="(.+?)"', r.text)[0] | |
print('{} -> {}'.format(in_url, out_url)) | |
except IndexError: | |
exit('error resolving link: {}'.format(in_url)) | |
return out_url | |
def resolve_file_info(url): | |
"""get the file name and content length for the download""" | |
r = session.head(url, allow_redirects=True) | |
# print(r.headers) | |
file_name = re.findall('filename="([^"]+)"', r.headers['content-disposition'])[0] | |
file_length_total = int(r.headers['content-length']) | |
return file_name, file_length_total | |
def resolve_uploaded_folder(url): | |
print('resolving uploaded folder url {}'.format(url)) | |
r = session.get(url, allow_redirects=True) | |
urls = re.findall('href="(file/[^""]+)"', r.text) | |
return ['https://uploaded.net/{}'.format(url) for url in urls] | |
def resolve_linkcrypt(url): | |
print('resolving linkcrypt url {}'.format(url)) | |
with tempfile.NamedTemporaryFile(delete=False) as f: | |
f.write(bytes(''' | |
var page = require('webpage').create(); | |
page.open('{}', function () {{ | |
console.log(page.content); | |
phantom.exit(); | |
}}); | |
'''.format(url), 'UTF-8')) | |
result = subprocess.check_output(['phantomjs', f.name]) | |
os.unlink(f.name) | |
matches = re.search('<a href="(http://linkcrypt.ws/container/[^"]+)" target="_blank" alt="Click">', result.decode()) | |
if matches: | |
dlc_url = matches.groups()[0] | |
r = requests.get(dlc_url, allow_redirects=True) | |
return resolve_dlc(r.text) | |
@asyncio.coroutine | |
def read_source(download): | |
try: | |
yield from _read_source(download) | |
except: | |
sys.exit(0) | |
@asyncio.coroutine | |
def _read_source(download): | |
"""download a single download""" | |
global running | |
running += 1 | |
r = session.post(download['real_url'], headers=download['headers'], stream=True) | |
download['status'] = 'working' | |
# yield chunks of data | |
with open(download['file_path'], download['file_mode']) as fp: | |
for chunk in r.iter_content(chunk_size): | |
fp.write(chunk) | |
download['progress'] += len(chunk) | |
yield | |
if download['progress'] == download['file_length_total']: | |
download['status'] = 'done' | |
else: | |
download['status'] = 'error' # resume? | |
running -= 1 | |
# start a new download worker | |
if running < workers and len(downloads) > 0: | |
queue_next_download() | |
@asyncio.coroutine | |
def print_progress(): | |
"""clear the screen and print out the progress for every download""" | |
last_millis = current_millis() | |
last_total_progress = 0 | |
while True: | |
all_done = True | |
os.system('clear') | |
downloads_print = [] | |
total_length = 0 | |
total_progress = 0 | |
for download in downloads: | |
total_length += download['file_length_total'] | |
total_progress += download['progress'] | |
percent = download['progress'] / download['file_length_total'] | |
percent_str = '{:.2%}'.format(percent) | |
downloads_print.append('{color}{percent:>7} of {size:>9} {file_name} {end}'.format( | |
percent=percent_str, file_name=download['file_name'], color=colors[download['status']], size=human_readable_size(download['file_length_total']), end=colors['end'])) | |
if download['status'] == 'working': | |
all_done = False # at least one download is not finished | |
now_millis = current_millis() | |
diff_millis = now_millis - last_millis | |
diff_progress = total_progress - last_total_progress | |
last_millis = now_millis | |
last_total_progress = total_progress | |
rate = 1000 * diff_progress / diff_millis | |
rate = human_readable_size(rate) | |
total_percent = total_progress / total_length | |
print('Downloading {num_downloads} files to {download_dir} ({num_workers} workers) progress: {percent} of {size} ({rate}/s)'.format( | |
num_downloads=len(downloads), download_dir=download_dir, num_workers=workers, percent='{:.2%}'.format(total_percent), size=human_readable_size(total_length), rate=rate)) | |
print('\n'.join(downloads_print)) | |
if all_done is True: | |
print('all done') | |
exit() | |
yield from asyncio.sleep(1) | |
def queue_next_download(): | |
"""queue the next download""" | |
try: | |
while True: # skip finished downloads | |
download = next(downloads_gen) | |
if download['status'] in ('new', 'resume'): | |
asyncio.async(read_source(download)) | |
return | |
except StopIteration: # no more downloads left | |
pass | |
def add_download(url, dlc=None): | |
"""add a public uploaded url to the list of downloads | |
resolve the real url, file size... | |
""" | |
real_url = resolve_link(url) | |
print(real_url) | |
file_name, file_length_total = resolve_file_info(real_url) | |
file_path = os.path.join(download_dir, file_name) | |
headers = { | |
'User-agent': 'Mozilla/5.0', | |
} | |
try: | |
# resume | |
downloaded_size = os.path.getsize(file_path) | |
status = 'resume' | |
# file is already complete | |
if downloaded_size >= file_length_total: | |
status = 'done' | |
headers['Range'] = 'bytes={}-'.format(downloaded_size) | |
file_mode = 'ab' | |
except FileNotFoundError: | |
# start a new download | |
status = 'new' | |
downloaded_size = 0 | |
file_mode = 'wb' | |
downloads.append({ | |
'url': url, | |
'dlc': dlc, | |
'real_url': real_url, | |
'progress': downloaded_size, | |
'file_name': file_name, | |
'file_length_total': file_length_total, | |
'file_path': file_path, | |
'headers': headers, | |
'downloaded_size': downloaded_size, | |
'file_mode': file_mode, | |
'status': status, | |
}) | |
if __name__ == "__main__": | |
# create the file .downer.conf in your home dir: | |
''' | |
[UPLOADED] | |
id = XXXXXXX | |
pw = XXXXXX | |
dir = /home/XXXXX/Downloads | |
''' | |
parser = argparse.ArgumentParser(description='uploaded') | |
parser.add_argument('--workers', '-w', help='number of simultaneus downloads', type=int, default=3) | |
parser.add_argument('urls', nargs='+', help='list of urls or .dlc files to download (can be mixed). eg: http://uploaded.net/file/abcdefgh foo.dlc') | |
parser.add_argument('--id', help='login id') | |
parser.add_argument('--pw', help='login password') | |
parser.add_argument('--download_dir', '-d', help='download directory') | |
args = parser.parse_args() | |
if args.id and args.pw: | |
login_data = { | |
'id': args.id, | |
'pw': args.pw, | |
} | |
if not login_data: | |
exit('error: please provide valid login credentials!') | |
if args.download_dir: | |
download_dir = args.download_dir | |
os.system('clear') | |
print('resolving urls...') | |
workers = args.workers | |
urls = args.urls | |
login() | |
for url in urls: | |
if url.endswith('.dlc'): | |
dlc = url | |
for url in resolve_dlc(open(dlc).read()): | |
add_download(url, dlc=dlc) | |
elif re.match('https://uploaded.net/f/\w+', url): | |
for url in resolve_uploaded_folder(url): | |
add_download(url) | |
elif re.match('http://linkcrypt.ws/\w+', url): | |
for url in resolve_linkcrypt(url): | |
add_download(url) | |
else: | |
if not re.match(url_pattern, url): | |
exit('invalid filename {}'.format(url)) | |
add_download(url) | |
downloads.sort(key=lambda download: download['file_name']) | |
downloads_gen = (download for download in downloads) | |
# start the first n workers | |
for i in range(workers): | |
queue_next_download() | |
loop = asyncio.get_event_loop() | |
loop.run_until_complete(print_progress()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment