Skip to content

Instantly share code, notes, and snippets.

@mka142
Last active March 19, 2023 00:31
Show Gist options
  • Save mka142/c363c9acca4ff41fa792a668527206d9 to your computer and use it in GitHub Desktop.
Save mka142/c363c9acca4ff41fa792a668527206d9 to your computer and use it in GitHub Desktop.
#based on
# @immuntasir
#immuntasir/drive_script_gen.ipynb
# Script for generating sh file that will download ans save in tree all files under given parent folder id from Google drive.
#Executing
# python gdrive_download_folder.py -s <folder_id> -f output_file_name
# Import the Libraries
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--source", type=str)
parser.add_argument("-f", "--file", type=str)
parser.add_argument("-o", "--output", type=str, default="$PWD")
# Set the id of the Google Drive folder. You can find it in the URL of the google drive folder.
args = parser.parse_args()
parent_folder_id = args.source
file_name = args.file
# Set the parent folder, where you want to store the contents of the google drive folder
parent_folder_dir = args.output
# A browser window will open. login using the appropriate account.
gauth = GoogleAuth()
gauth.LoadCredentialsFile("mycreds.txt")
if gauth.credentials is None:
# Authenticate if they're not there
gauth.LocalWebserverAuth()
elif gauth.access_token_expired:
# Refresh them if expired
gauth.Refresh()
else:
# Initialize the saved creds
gauth.Authorize()
# Save the current credentials to a file
gauth.SaveCredentialsFile("mycreds.txt")
drive = GoogleDrive(gauth)
if parent_folder_dir[-1] != "/":
parent_folder_dir = parent_folder_dir + "/"
parent_folder_dir += file_name + "/"
# This is the base wget command that we will use. This might change in the future due to changes in Google drive
wget_text = "\"wget -a logfile" + file_name + " " + "--load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&amp;confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&amp;id=FILE_ID' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=FILE_ID\" -O \"FILE_NAME\" && rm -rf /tmp/cookies.txt\"".replace(
"&amp;", "&"
)
# Get the folder structure
file_dict = dict()
folder_queue = [parent_folder_id]
dir_queue = [parent_folder_dir]
cnt = 0
while len(folder_queue) != 0:
current_folder_id = folder_queue.pop(0)
file_list = drive.ListFile(
{"q": "'{}' in parents and trashed=false".format(current_folder_id)}
).GetList()
current_parent = dir_queue.pop(0)
print(current_parent, current_folder_id)
for file1 in file_list:
file_dict[cnt] = dict()
file_dict[cnt]["id"] = file1["id"]
file_dict[cnt]["title"] = file1["title"]
file_dict[cnt]["dir"] = current_parent + file1["title"]
if file1["mimeType"] == "application/vnd.google-apps.folder":
file_dict[cnt]["type"] = "folder"
file_dict[cnt]["dir"] += "/"
folder_queue.append(file1["id"])
dir_queue.append(file_dict[cnt]["dir"])
else:
file_dict[cnt]["type"] = "file"
cnt += 1
# Write the bash script
f = open(file_name + ".sh", "w")
file_dict.keys()
f.write(f'#!/bin/bash\nmkdir -p "{parent_folder_dir}"\n')
f.write(
"""function call_completed () {
echo -ne "$(( 100*$1/$2))% | Downloaded $1 of $2 ... \\r"
}
function download_tree {
"""
)
all_files = len(file_dict.keys())
current_file = 1
for file_iter in file_dict.keys():
if file_dict[file_iter]["type"] == "folder":
f.write("mkdir " + '"' + file_dict[file_iter]["dir"] + '"' + "\n")
f.write(f"call_completed {current_file} {all_files}\n")
else:
f.write(
wget_text[1:-1]
.replace("FILE_ID", file_dict[file_iter]["id"])
.replace("FILE_NAME", file_dict[file_iter]["dir"])
+ "\n"
)
f.write(f"call_completed {current_file} {all_files}\n")
current_file += 1
f.write(
"""echo -ne '\\r\\n'
echo -ne "Downloaded!\\n"
}\n"""
)
f.write("download_tree\n")
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment