Skip to content

Instantly share code, notes, and snippets.

@caiwan
Created April 2, 2020 08:22
Show Gist options
  • Save caiwan/985ae492606967a50ae91bde84d8bda3 to your computer and use it in GitHub Desktop.
Save caiwan/985ae492606967a50ae91bde84d8bda3 to your computer and use it in GitHub Desktop.
Download assets from GoogleDrive
"""
taken from this StackOverflow answer: https://stackoverflow.com/a/39225039
"""
import requests
import argparse
import json
import os
import pathlib
from hashlib import md5
import tqdm
parser = argparse.ArgumentParser(
description="Downloads and verifies files from Google drive."
)
parser.add_argument("input", type=str, nargs=1, help="Asset descriptor file")
parser.add_argument(
"--force",
"-f",
dest="is_force_download",
action="store_true",
default=False,
help="Force download all files.",
)
parser.add_argument(
"--update-hash",
"-u",
dest="is_update_hash",
action="store_true",
default=False,
help="Update file hashes.",
)
parser.add_argument(
"--cmd",
"-c",
dest="command",
type=str,
default="",
help="""Operation to be committed (add, remove, list) on list file.
NOTE: Does not do any checks, just updates list.""",
)
parser.add_argument(
"--file-id",
"-a",
dest="file_id",
type=str,
default="",
help="File id to to add with -c add",
)
parser.add_argument(
"--index",
"-i",
dest="item_id",
type=int,
default=0,
help="Item index to be removed with -c remove.",
)
parser.add_argument(
"--destination",
"-d",
dest="destination",
type=str,
default="",
help="File destination path to be added with -c add",
)
args = parser.parse_args()
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params={"id": id}, stream=True)
token = get_confirm_token(response)
if token:
params = {"id": id, "confirm": token}
response = session.get(URL, params=params, stream=True)
save_response_content(response, destination)
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith("download_warning"):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 65536
total_size = int(response.headers["content-length"])
with open(destination, "wb") as f:
for chunk in tqdm.tqdm(
desc=destination,
iterable=response.iter_content(chunk_size=CHUNK_SIZE),
total=total_size / CHUNK_SIZE,
unit="KB",
):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
def file_md5(file):
CHUNK_SIZE = 65536
hasher = md5()
with open(file, "rb") as f:
while True:
data = f.read(CHUNK_SIZE)
hasher.update(data)
if not data:
break
return hasher.hexdigest()
def download_or_update_all(assets, args):
for asset in assets:
destination = asset["destination"]
pathlib.Path(os.path.dirname(destination)).mkdir(parents=True, exist_ok=True)
file_hash = ""
if not args.is_force_download and os.path.exists(destination):
file_hash = file_md5(destination)
if file_hash == asset["hash"]:
print("{}: Skipping".format(destination))
continue
download_file_from_google_drive(asset["file_id"], destination)
if args.is_update_hash:
asset["hash"] = file_md5(destination)
pass
return assets
def operation_add(assets, args):
assets.append(
{"file_id": args.file_id, "destination": args.destination, "hash": ""}
)
return assets
pass
def operation_remove(assets, args):
del assets[args.item_id]
return assets
def operation_list(assets, args):
for i in range(len(assets)):
asset = assets[i]
print("{:4}: ".format(i))
for item in asset.items():
print("{:>6} {:<12}: {}".format("", *item))
print("")
pass
return assets
if __name__ == "__main__":
for input_file in args.input:
if not os.path.exists(input_file):
print("File {} does not exists".format(args.input))
exit(-1)
# TODO: Create file if not exists and add operation is going to be performed
input_file = args.input[0]
assets = []
with open(input_file, "rb") as f:
assets = json.load(f)
is_update_file = args.is_update_hash
if args.command:
function_table = {
"add": operation_add,
"remove": operation_remove,
"list": operation_list,
}
if args.command not in function_table:
print("invlaid command")
parser.print_help()
exit(1)
pass
assets = function_table[args.command](assets, args)
is_update_file = True
else:
assets = download_or_update_all(assets, args)
if is_update_file:
with open(input_file, "w") as f:
json.dump(assets, f, indent=2)
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment