Skip to content

Instantly share code, notes, and snippets.

@immuntasir
Created July 7, 2020 14:30
Show Gist options
  • Save immuntasir/73b8e8eef7e6c9066aaf2432bebf7db0 to your computer and use it in GitHub Desktop.
Save immuntasir/73b8e8eef7e6c9066aaf2432bebf7db0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Import the Libraries\n",
"from pydrive.auth import GoogleAuth\n",
"import pandas as pd\n",
"from pydrive.drive import GoogleDrive"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# A browser window will open. login using the appropriate account.\n",
"gauth = GoogleAuth()\n",
"gauth.LocalWebserverAuth() #"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"drive = GoogleDrive(gauth)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Set the id of the Google Drive folder. You can find it in the URL of the google drive folder.\n",
"parent_folder_id = '1NIGvjHBuUQHWnMqzboyg-zLI1q_bOuCH'\n",
"# Set the parent folder, where you want to store the contents of the google drive folder\n",
"parent_folder_dir = '~/Downloads/drive_script/'\n",
"\n",
"if parent_folder_dir[-1] != '/':\n",
" parent_folder_dir = parent_folder_dir + '/'\n",
"\n",
"# This is the base wget command that we will use. This might change in the future due to changes in Google drive\n",
"wget_text = '\"wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate \\'https://docs.google.com/uc?export=download&id=FILE_ID\\' -O- | sed -rn \\'s/.*confirm=([0-9A-Za-z_]+).*/\\\\1\\\\n/p\\')&id=FILE_ID\" -O FILE_NAME && rm -rf /tmp/cookies.txt\"'.replace('&', '&')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"~/Downloads/drive_script/ 1NIGvjHBuUQHWnMqzboyg-zLI1q_bOuCH\n",
"~/Downloads/drive_script/subfolder_3/ 1ZhB_KvXYU9YNQMH90X8BIYKQyQXbCKot\n",
"~/Downloads/drive_script/subfolder_2/ 1wfHBBQN5l5FpvcMY0eFPibaV88Ilu0OY\n",
"~/Downloads/drive_script/subfolder_1/ 1DsRpzk0dlwVcvY6KNjOKcVUNDmqlXQ-U\n",
"~/Downloads/drive_script/subfolder_3/subfolder_3_1/ 1ZlF77CS3W8gvVZ9hhcNyLFobO8Kfi0MS\n",
"~/Downloads/drive_script/subfolder_1/subfolder_1_1/ 1UpBN2L27gl5-ZfHzezP7saOg8-2Z-0Ar\n",
"~/Downloads/drive_script/subfolder_1/subfolder_1_1/subfolder_1_1_1/ 1iiro7MmSqHokDhKzugmhQt0g1VZKZ4uY\n"
]
}
],
"source": [
"# Get the folder structure\n",
"\n",
"file_dict = dict()\n",
"folder_queue = [parent_folder_id]\n",
"dir_queue = [parent_folder_dir]\n",
"cnt = 0\n",
"\n",
"while len(folder_queue) != 0:\n",
" current_folder_id = folder_queue.pop(0)\n",
" file_list = drive.ListFile({'q': \"'{}' in parents and trashed=false\".format(current_folder_id)}).GetList()\n",
" \n",
" current_parent = dir_queue.pop(0)\n",
" print(current_parent, current_folder_id)\n",
" for file1 in file_list:\n",
" file_dict[cnt] = dict()\n",
" file_dict[cnt]['id'] = file1['id']\n",
" file_dict[cnt]['title'] = file1['title']\n",
" file_dict[cnt]['dir'] = current_parent + file1['title']\n",
"\n",
" if file1['mimeType'] == 'application/vnd.google-apps.folder':\n",
" file_dict[cnt]['type'] = 'folder'\n",
" file_dict[cnt]['dir'] += '/'\n",
" folder_queue.append(file1['id'])\n",
" dir_queue.append(file_dict[cnt]['dir'])\n",
" else:\n",
" file_dict[cnt]['type'] = 'file'\n",
" \n",
" cnt += 1"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>title</th>\n",
" <th>dir</th>\n",
" <th>type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1ZhB_KvXYU9YNQMH90X8BIYKQyQXbCKot</td>\n",
" <td>subfolder_3</td>\n",
" <td>~/Downloads/drive_script/subfolder_3/</td>\n",
" <td>folder</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1wfHBBQN5l5FpvcMY0eFPibaV88Ilu0OY</td>\n",
" <td>subfolder_2</td>\n",
" <td>~/Downloads/drive_script/subfolder_2/</td>\n",
" <td>folder</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1DsRpzk0dlwVcvY6KNjOKcVUNDmqlXQ-U</td>\n",
" <td>subfolder_1</td>\n",
" <td>~/Downloads/drive_script/subfolder_1/</td>\n",
" <td>folder</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1XryHOYweDeD_4JnxKOQa7tugPvKd0dZr</td>\n",
" <td>test_file_7.txt</td>\n",
" <td>~/Downloads/drive_script/test_file_7.txt</td>\n",
" <td>file</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1o6Au2Rfty4oOyDJI7t_RaAKQHzUUJR7g</td>\n",
" <td>test_file_5.txt</td>\n",
" <td>~/Downloads/drive_script/test_file_5.txt</td>\n",
" <td>file</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12N5LTGbo5BEWAtX6jG4B8Bg8hdrQHt2G</td>\n",
" <td>test_file_10.txt</td>\n",
" <td>~/Downloads/drive_script/test_file_10.txt</td>\n",
" <td>file</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>1irAuKK0bySnY6f5y7aUc682jJ66WkeeF</td>\n",
" <td>test_file_6.txt</td>\n",
" <td>~/Downloads/drive_script/test_file_6.txt</td>\n",
" <td>file</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1ZlF77CS3W8gvVZ9hhcNyLFobO8Kfi0MS</td>\n",
" <td>subfolder_3_1</td>\n",
" <td>~/Downloads/drive_script/subfolder_3/subfolder...</td>\n",
" <td>folder</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>19XDerGt9bBDzwOKjaAv6LamOvCB3LVHh</td>\n",
" <td>test_file_11.txt</td>\n",
" <td>~/Downloads/drive_script/subfolder_3/test_file...</td>\n",
" <td>file</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1llWzqj9ZXPG3GZsr0LqdVQfZMm3tkd8U</td>\n",
" <td>test_file_9.txt</td>\n",
" <td>~/Downloads/drive_script/subfolder_3/test_file...</td>\n",
" <td>file</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id title \\\n",
"0 1ZhB_KvXYU9YNQMH90X8BIYKQyQXbCKot subfolder_3 \n",
"1 1wfHBBQN5l5FpvcMY0eFPibaV88Ilu0OY subfolder_2 \n",
"2 1DsRpzk0dlwVcvY6KNjOKcVUNDmqlXQ-U subfolder_1 \n",
"3 1XryHOYweDeD_4JnxKOQa7tugPvKd0dZr test_file_7.txt \n",
"4 1o6Au2Rfty4oOyDJI7t_RaAKQHzUUJR7g test_file_5.txt \n",
"5 12N5LTGbo5BEWAtX6jG4B8Bg8hdrQHt2G test_file_10.txt \n",
"6 1irAuKK0bySnY6f5y7aUc682jJ66WkeeF test_file_6.txt \n",
"7 1ZlF77CS3W8gvVZ9hhcNyLFobO8Kfi0MS subfolder_3_1 \n",
"8 19XDerGt9bBDzwOKjaAv6LamOvCB3LVHh test_file_11.txt \n",
"9 1llWzqj9ZXPG3GZsr0LqdVQfZMm3tkd8U test_file_9.txt \n",
"\n",
" dir type \n",
"0 ~/Downloads/drive_script/subfolder_3/ folder \n",
"1 ~/Downloads/drive_script/subfolder_2/ folder \n",
"2 ~/Downloads/drive_script/subfolder_1/ folder \n",
"3 ~/Downloads/drive_script/test_file_7.txt file \n",
"4 ~/Downloads/drive_script/test_file_5.txt file \n",
"5 ~/Downloads/drive_script/test_file_10.txt file \n",
"6 ~/Downloads/drive_script/test_file_6.txt file \n",
"7 ~/Downloads/drive_script/subfolder_3/subfolder... folder \n",
"8 ~/Downloads/drive_script/subfolder_3/test_file... file \n",
"9 ~/Downloads/drive_script/subfolder_3/test_file... file "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(file_dict).transpose().head(10)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Write the bash script\n",
"f = open('script.sh', 'w')\n",
"file_dict.keys()\n",
"for file_iter in file_dict.keys():\n",
" if file_dict[file_iter]['type'] == 'folder':\n",
" f.write('mkdir ' + file_dict[file_iter]['dir'] + '\\n')\n",
" else:\n",
" f.write(wget_text[1:-1].replace('FILE_ID', file_dict[file_iter]['id']).replace('FILE_NAME', file_dict[file_iter]['dir']) + '\\n')\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
@mka142
Copy link

mka142 commented Mar 19, 2023

It seems that currently that's only functional for small folders. Google is blocking your requests after to much queries (automated queries), so only part of your files will be downloaded.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment