Created
July 14, 2016 23:43
-
-
Save jvperrin/4ea345868976b2f2d2fab5f7180b230b to your computer and use it in GitHub Desktop.
Box.com recursive backup script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import getpass | |
import json | |
import os | |
import subprocess | |
import sys | |
import threading | |
import time | |
from io import BytesIO | |
from shutil import get_terminal_size | |
# Try to import pycurl, error with mediocre installation instructions | |
try: | |
import pycurl | |
except ImportError: | |
print('Error: Pycurl does not appear to be installed') | |
print(' Please install it from a package (usually named python3-pycurl)') | |
print(' or by using pip (pip install pycurl)') | |
sys.exit(1) | |
BOX_DAV = 'https://dav.box.com/dav/' | |
MAX_CONCURR_UPLOADS = 10 | |
class BoxUpload: | |
def __init__(self, filename, curl_auth): | |
self.filename = filename | |
self.auth = curl_auth | |
self.percentage = 0.0 | |
self.bytes_uploaded = 0 | |
def start(self, folder, upload_folder, conn): | |
"""Set curl params for the file to upload""" | |
file_path = folder + '/' + self.filename | |
conn.upload = self | |
conn.setopt(conn.PROGRESSFUNCTION, self.progress) | |
conn.setopt(conn.NOPROGRESS, False) | |
conn.file_bytes = os.path.getsize(file_path) | |
conn.fp = open(file_path, 'rb') | |
conn.setopt(conn.INFILESIZE, conn.file_bytes) | |
conn.setopt(conn.READFUNCTION, conn.fp.read) | |
conn.setopt(conn.UPLOAD, True) | |
conn.setopt(conn.URL, '{}{}/{}'.format(BOX_DAV, upload_folder, self.filename)) | |
conn.setopt(conn.USERPWD, self.auth) | |
def progress(self, download_t, download_d, upload_t, upload_d): | |
"""Store the upload progress percentage for periodic printing. | |
Passed to pycurl as a progress function for each upload | |
""" | |
self.bytes_uploaded = upload_d | |
if upload_t > 0: | |
self.percentage = 100 * (upload_d / upload_t) | |
def get_progress(self, terminal_width): | |
"""Get a nice progress bar including the file and percent complete | |
The progress bar has a dynamic width, so it must be constructed by | |
figuring out the correct width based on other data shown | |
""" | |
first_half = '{} ['.format(self.filename) | |
second_half = '] {:.2f}%'.format(self.percentage) | |
width = terminal_width - len(first_half) - len(second_half) | |
filled = int(width * self.percentage / 100) | |
return '{}{}{}{}'.format( | |
first_half, | |
filled * '#', | |
(width - filled) * ' ', | |
second_half | |
) | |
def make_box_folder(folder, auth): | |
"""Make a folder on Box.com using the MKCOL HTTP request method""" | |
conn = pycurl.Curl() | |
print('Creating folder "' + folder + '"') | |
conn.setopt(conn.CUSTOMREQUEST, 'MKCOL') | |
conn.setopt(conn.USERPWD, auth) | |
conn.setopt(conn.URL, BOX_DAV + folder) | |
conn.perform() | |
http_status = conn.getinfo(pycurl.HTTP_CODE) | |
conn.close() | |
if http_status == 405: | |
print('Could not create a folder, it already exists, continuing...') | |
elif http_status == 401: | |
print('401 error trying to create a folder, check your authentication!') | |
sys.exit(2) | |
elif http_status != 201: | |
print('Error creating folder, HTTP ' + str(http_status)) | |
print('Folder name was ' + folder) | |
sys.exit(3) | |
def print_progress(multi, stats, prev_total): | |
"""Show an updating progress bar for all ongoing uploads""" | |
width = get_terminal_size((80, 20)).columns # Default size as a fallback | |
# Move cursor to the top left using ANSII magic to overwrite previous lines | |
sys.stdout.write('\033[1;1H') | |
bytes_partial = 0 | |
for conn in multi.handles: | |
if conn.upload: | |
print(conn.upload.get_progress(width)) | |
bytes_partial += conn.upload.bytes_uploaded | |
bytes_total = stats['bytes_uploaded'] + bytes_partial | |
# Print statistics on upload time and current speed | |
time_stats = 'Time so far: {} Speed: {:.2f} Mb/s'.format( | |
friendly_time(time.time() - stats['start_time']), | |
(bytes_total - prev_total) * 0.000008 | |
) | |
print(time_stats + (width - len(time_stats)) * ' ') | |
# Print more stats, this time on amount uploaded | |
upload_stats = 'Uploaded {}/{} files and {}/{} ({:.2f}%) in total'.format( | |
stats['num_finished'], | |
stats['num_total'], | |
friendly_file_size(bytes_total), | |
friendly_file_size(stats['bytes_total']), | |
100 * bytes_total / stats['bytes_total'] | |
) | |
print(upload_stats + (width - len(upload_stats)) * ' ') | |
# If there are less progress bars now, add blank rows to overwrite the | |
# bottom rows as necessary (to get rid of lines with outdated information) | |
blank_rows = len(multi.free) | |
for row in range(blank_rows): | |
print(' ' * width) | |
# Print the next update in a second, allow interrupts | |
thread = threading.Timer(1, print_progress, [multi, stats, bytes_total]) | |
thread.daemon = True | |
thread.start() | |
def reset_connection(multi, conn): | |
"""Resets a pycurl connection object to be used again""" | |
conn.fp.close() | |
multi.remove_handle(conn) | |
conn.upload = None | |
conn.fp = None | |
conn.file_bytes = 0 | |
multi.free.append(conn) | |
def friendly_file_size(num_bytes): | |
"""To convert bytes to human-readable units | |
Adapted from http://stackoverflow.com/a/1094933/1979001 | |
""" | |
for unit in ['', 'KB', 'MB', 'GB', 'TB', 'PB']: | |
if num_bytes < 1024.0: | |
return '{:3.2f} {}'.format(num_bytes, unit) | |
num_bytes /= 1024.0 | |
def friendly_time(seconds): | |
"""Show seconds in terms of larger units for easier reading""" | |
times = [('seconds', 60), ('minutes', 60), ('hours', 24), ('days', 365)] | |
for unit, factor in times: | |
if seconds < factor: | |
return '{:.2f} {}'.format(seconds, unit) | |
seconds /= factor | |
def main(args): | |
archive = input('Where do you want to upload files from? ') | |
try: | |
files_gen = os.walk(archive) | |
except PermissionError: | |
print('Could not access files! Make sure the directory exists or try again as root!') | |
sys.exit(1) | |
# Get authentication for Box.com and destination folder | |
email = input('Enter your Box.com email (Berkeley email): ') | |
password = getpass.getpass('Enter your Box.com password: ') | |
folder = input('What folder do you want to put the files in on Box.com (will be created)? ') | |
auth = email + ':' + password | |
# Create an array of files from the generator | |
files = [(item[0], item[2]) for item in list(files_gen)] | |
file_paths = [f[0] + '/' + filename for f in files for filename in f[1]] | |
# Make the new root folder for the backup | |
if not args.quiet: | |
print('Creating folder ' + folder + ' on Box.com...') | |
make_box_folder(folder, auth) | |
# Start timer for upload speed and get total upload size | |
file_bytes = sum(os.lstat(f).st_size for f in file_paths if os.path.exists(f)) | |
if not args.quiet: | |
print('Uploading {}...'.format(friendly_file_size(file_bytes))) | |
time.sleep(3) | |
start_time = time.time() | |
multi = pycurl.CurlMulti() | |
multi.handles = [] | |
multi.free = [] | |
stats = { | |
'num_finished': 0, | |
'num_total': len(file_paths), | |
'bytes_uploaded': 0, | |
'bytes_total': file_bytes, | |
'start_time': start_time, | |
} | |
# Create free connection handlers that can be reused | |
for i in range(MAX_CONCURR_UPLOADS): | |
conn = pycurl.Curl() | |
conn.fp = None | |
conn.upload = None | |
conn.file_bytes = 0 | |
multi.handles.append(conn) | |
multi.free.append(conn) | |
# Make a queue of all folders to be created with files to be uploaded | |
queue = [] | |
for f in files: | |
queue.append(f) | |
# Files that are safe to upload without having to create a new folder for them | |
safe_queue = [] | |
# Clear the screen and start printing the progress bar every second | |
if not args.quiet: | |
subprocess.call(('clear',)) | |
print_progress(multi, stats, 0) | |
while stats['num_finished'] < len(file_paths): | |
# Add uploads if there are connections free to use | |
if not safe_queue and queue: | |
item = queue.pop(0) | |
# Remove the relative path from the folder name | |
folder_name = item[0][(len(archive) + 1):] | |
if folder_name: | |
make_box_folder(folder + '/' + folder_name, auth) | |
# Add all files to the safe queue for uploading | |
for f in item[1]: | |
safe_queue.append((folder_name, f)) | |
while safe_queue and multi.free: | |
parent_folder, filename = safe_queue.pop(0) | |
if not os.path.exists(archive + '/' + parent_folder + '/' + filename): | |
stats['num_finished'] += 1 | |
continue | |
conn = multi.free.pop() | |
upload = BoxUpload(filename, auth) | |
upload.start(archive + '/' + parent_folder, folder + '/' + parent_folder, conn) | |
multi.add_handle(conn) | |
# Run curl's internal state machine | |
while True: | |
ret, num_handles = multi.perform() | |
if ret != pycurl.E_CALL_MULTI_PERFORM: | |
break | |
# Check for successful or failed curl connections and free them again | |
while True: | |
num_queued, ok_list, err_list = multi.info_read() | |
# Free up connections from files that have finished | |
for conn in ok_list: | |
stats['bytes_uploaded'] += conn.file_bytes | |
reset_connection(multi, conn) | |
# Retry files that have errored in some way | |
for conn, errno, errmsg in err_list: | |
print('Error uploading {}: Curl error #{} {}'.format( | |
conn.upload.filename, | |
errno, | |
errmsg | |
)) | |
# Allow exits by SIGINT | |
if (errno == 42): # Callback aborted exception code | |
sys.exit(4) | |
# Retry this file | |
print('Retrying {}'.format(conn.upload.filename)) | |
queue.insert(0, conn.upload.filename) | |
reset_connection(multi, conn) | |
stats['num_finished'] += len(ok_list) | |
# Move on if no more connections need freeing | |
if num_queued == 0: | |
break | |
# Wait for more data to be available | |
multi.select(1.0) | |
# Clean up any open files and connections | |
for conn in multi.handles: | |
if conn.fp: | |
conn.fp.close() | |
conn.close() | |
multi.close() | |
# Print statistics about the backup time, size, and speed | |
if not args.quiet: | |
subprocess.call(('clear',)) | |
total_time = time.time() - start_time | |
transfer_rate = file_bytes / (1000000 * total_time) | |
print('Box.com upload complete!') | |
print('Took {} to transfer {} files with combined size {}'.format( | |
friendly_time(total_time), | |
stats['num_total'], | |
friendly_file_size(file_bytes) | |
)) | |
print('Average rate of {:.2f} Mb/s ({:.2f} MB/s)'.format( | |
transfer_rate * 8, | |
transfer_rate | |
)) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Back up files to Box.com') | |
parser.add_argument('-q', '--quiet', action='store_true', | |
help='do not output progress to stdout') | |
args = parser.parse_args() | |
exit(main(args)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
also fix spaces edge case