Skip to content

Instantly share code, notes, and snippets.

@larssono
Created June 1, 2017 15:27
Show Gist options
  • Save larssono/ddc769837f0544f56581a192a2bef855 to your computer and use it in GitHub Desktop.
Save larssono/ddc769837f0544f56581a192a2bef855 to your computer and use it in GitHub Desktop.
Get MD5 for a large number of files in Synapse
import synapseclient
import pandas as pd
from multiprocessing.dummy import Pool
def with_progress_bar(func, totalCalls, prefix = '', postfix='', isBytes=False):
"""Adds a progress bar to calls to func
:param func: Function being wrapped with progress Bar
:param totalCalls: total number of items/bytes when completed
:param prefix: String printed before progress bar
:param prefix: String printed after progress bar
:param isBytes: A boolean indicating weather to convert bytes to kB, MB, GB etc.
"""
from multiprocessing import Value, Lock
completed = Value('d', 0)
lock = Lock()
def progress(*args, **kwargs):
with lock:
completed.value +=1
synapseclient.utils.printTransferProgress(completed.value, totalCalls, prefix, postfix, isBytes)
return func(*args, **kwargs)
return progress
syn=synapseclient.Synapse()
syn.login()
mp = Pool(15)
ids = [f['file.id'] for f in syn.chunkedQuery("select id from file where projectId=='syn2351328'")]
func = with_progress_bar(lambda x: syn.get(x, downloadFile=False),len(ids))
files = mp.map(func, ids[:])
with open('files_and_md5sum.txt', 'w') as fp:
for f in files:
fp.write('%s\t%s\t%s\n' %(f.id, f._file_handle['contentMd5'], f._file_handle.get('externalURL')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment