Skip to content

Instantly share code, notes, and snippets.

@pgcd
Last active August 29, 2015 14:18
Show Gist options
  • Save pgcd/9871ed0aa39f2bff4906 to your computer and use it in GitHub Desktop.
Save pgcd/9871ed0aa39f2bff4906 to your computer and use it in GitHub Desktop.
Skipping Uploader for google drive
__author__ = 'pgcd'
import sys
import os
import subprocess
import hashlib
def hashfile(fname, blocksize=65536):
hasher = hashlib.md5()
with open(fname, 'rb') as afile:
buf = afile.read(blocksize)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(blocksize)
return hasher.hexdigest()
def splitData(d):
return {
'id': d[:28],
'title': d[31:74].strip(),
'size': d[74:85].strip(),
'date': d[85:],
}
def getId(folder, parent):
args = ['drive', 'list', '-n', '-q',
'title="%s" and \'%s\' in parents' % (os.path.basename(folder).replace('"', '\\"'),
parent)]
print " ".join(args)
result = subprocess.Popen(args, stdout=subprocess.PIPE).stdout.readlines()
try:
return splitData(result[0])['id']
except IndexError:
return ''
def createFolder(folder, parent=None):
args = ['drive', 'folder', '-t', os.path.basename(folder), ]
if parent:
args += ['-p', parent]
print " ".join(args)
result = subprocess.Popen(args, stdout=subprocess.PIPE).stdout.readlines()
t, sep, _id = result[0].partition(': ')
if t != 'Id':
return ''
return _id.strip()
def getMD5(_id):
args = ['drive', 'info', '-i', _id]
result = subprocess.Popen(args, stdout=subprocess.PIPE)
try:
t, sep, md5sum = result.stdout.readlines()[6].partition(': ')
except IndexError:
return ''
if t != 'Md5sum':
return ''
return md5sum.strip()
def down(folder_name, parent_id):
folder_ids = {
os.path.dirname(folder_name): parent_id
}
current_id = False
try:
with open('processed.log') as file_io:
previous_process = file_io.readlines()
except IOError:
previous_process = []
with open('processed.log', 'ab') as processed_log:
for dirName, subdirList, fileList in os.walk(folder_name):
parent_id = folder_ids.get(os.path.dirname(dirName))
if parent_id:
current_id = getId(dirName, parent_id)
if not current_id:
current_id = createFolder(dirName, parent_id)
if not current_id:
raise Exception("Unexplicable error creating folder %s under %s" % (dirName, parent_id))
print('Found local directory: %s, parentId: %s, currentId: %s' % (dirName, parent_id, current_id))
folder_ids[dirName] = current_id
for fname in fileList:
fullname = os.path.join(dirName, fname)
if fullname + '\n' in previous_process:
print "File %s (line %s) marked as processed, skipping" % (
fname, previous_process.index(fullname + '\n'))
continue
fileid = getId(fname, current_id)
if fileid:
remote_hash = getMD5(fileid)
local_hash = hashfile(fullname)
print "Local file %s; id %s; remote md5 %s; local md5 %s" % (fname, fileid, remote_hash,
local_hash)
if remote_hash == local_hash: # Ideally we should resume...
print "Same MD5, skipping"
processed_log.write(fullname + '\n')
continue
elif remote_hash:
args = ['drive', 'delete', '-i', fileid]
print "Deleting non-matching file"
subprocess.call(args)
# In this case we should upload
args = ['drive', 'upload', '-f', fullname, '-p', current_id]
print "Uploading local %s" % fullname
subprocess.call(args)
processed_log.write(fullname + '\n')
if __name__ == '__main__':
foldername = sys.argv[1].rstrip('/')
if len(sys.argv) == 3: # Upload to parent
down(foldername, sys.argv[2])
else:
down(foldername, 'root')
@pgcd
Copy link
Author

pgcd commented Apr 8, 2015

I'm using this with @prasmussen's gdrive (https://github.com/prasmussen/gdrive) to skip re-uploading the same stuff over and over again when my connection breaks up. It's extremely rough and untested outside what I'm doing right now so YMMV.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment