Created
April 2, 2020 08:22
-
-
Save caiwan/985ae492606967a50ae91bde84d8bda3 to your computer and use it in GitHub Desktop.
Download assets from GoogleDrive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
taken from this StackOverflow answer: https://stackoverflow.com/a/39225039 | |
""" | |
import requests | |
import argparse | |
import json | |
import os | |
import pathlib | |
from hashlib import md5 | |
import tqdm | |
parser = argparse.ArgumentParser( | |
description="Downloads and verifies files from Google drive." | |
) | |
parser.add_argument("input", type=str, nargs=1, help="Asset descriptor file") | |
parser.add_argument( | |
"--force", | |
"-f", | |
dest="is_force_download", | |
action="store_true", | |
default=False, | |
help="Force download all files.", | |
) | |
parser.add_argument( | |
"--update-hash", | |
"-u", | |
dest="is_update_hash", | |
action="store_true", | |
default=False, | |
help="Update file hashes.", | |
) | |
parser.add_argument( | |
"--cmd", | |
"-c", | |
dest="command", | |
type=str, | |
default="", | |
help="""Operation to be committed (add, remove, list) on list file. | |
NOTE: Does not do any checks, just updates list.""", | |
) | |
parser.add_argument( | |
"--file-id", | |
"-a", | |
dest="file_id", | |
type=str, | |
default="", | |
help="File id to to add with -c add", | |
) | |
parser.add_argument( | |
"--index", | |
"-i", | |
dest="item_id", | |
type=int, | |
default=0, | |
help="Item index to be removed with -c remove.", | |
) | |
parser.add_argument( | |
"--destination", | |
"-d", | |
dest="destination", | |
type=str, | |
default="", | |
help="File destination path to be added with -c add", | |
) | |
args = parser.parse_args() | |
def download_file_from_google_drive(id, destination): | |
URL = "https://docs.google.com/uc?export=download" | |
session = requests.Session() | |
response = session.get(URL, params={"id": id}, stream=True) | |
token = get_confirm_token(response) | |
if token: | |
params = {"id": id, "confirm": token} | |
response = session.get(URL, params=params, stream=True) | |
save_response_content(response, destination) | |
def get_confirm_token(response): | |
for key, value in response.cookies.items(): | |
if key.startswith("download_warning"): | |
return value | |
return None | |
def save_response_content(response, destination): | |
CHUNK_SIZE = 65536 | |
total_size = int(response.headers["content-length"]) | |
with open(destination, "wb") as f: | |
for chunk in tqdm.tqdm( | |
desc=destination, | |
iterable=response.iter_content(chunk_size=CHUNK_SIZE), | |
total=total_size / CHUNK_SIZE, | |
unit="KB", | |
): | |
if chunk: # filter out keep-alive new chunks | |
f.write(chunk) | |
def file_md5(file): | |
CHUNK_SIZE = 65536 | |
hasher = md5() | |
with open(file, "rb") as f: | |
while True: | |
data = f.read(CHUNK_SIZE) | |
hasher.update(data) | |
if not data: | |
break | |
return hasher.hexdigest() | |
def download_or_update_all(assets, args): | |
for asset in assets: | |
destination = asset["destination"] | |
pathlib.Path(os.path.dirname(destination)).mkdir(parents=True, exist_ok=True) | |
file_hash = "" | |
if not args.is_force_download and os.path.exists(destination): | |
file_hash = file_md5(destination) | |
if file_hash == asset["hash"]: | |
print("{}: Skipping".format(destination)) | |
continue | |
download_file_from_google_drive(asset["file_id"], destination) | |
if args.is_update_hash: | |
asset["hash"] = file_md5(destination) | |
pass | |
return assets | |
def operation_add(assets, args): | |
assets.append( | |
{"file_id": args.file_id, "destination": args.destination, "hash": ""} | |
) | |
return assets | |
pass | |
def operation_remove(assets, args): | |
del assets[args.item_id] | |
return assets | |
def operation_list(assets, args): | |
for i in range(len(assets)): | |
asset = assets[i] | |
print("{:4}: ".format(i)) | |
for item in asset.items(): | |
print("{:>6} {:<12}: {}".format("", *item)) | |
print("") | |
pass | |
return assets | |
if __name__ == "__main__": | |
for input_file in args.input: | |
if not os.path.exists(input_file): | |
print("File {} does not exists".format(args.input)) | |
exit(-1) | |
# TODO: Create file if not exists and add operation is going to be performed | |
input_file = args.input[0] | |
assets = [] | |
with open(input_file, "rb") as f: | |
assets = json.load(f) | |
is_update_file = args.is_update_hash | |
if args.command: | |
function_table = { | |
"add": operation_add, | |
"remove": operation_remove, | |
"list": operation_list, | |
} | |
if args.command not in function_table: | |
print("invlaid command") | |
parser.print_help() | |
exit(1) | |
pass | |
assets = function_table[args.command](assets, args) | |
is_update_file = True | |
else: | |
assets = download_or_update_all(assets, args) | |
if is_update_file: | |
with open(input_file, "w") as f: | |
json.dump(assets, f, indent=2) | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment