Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save binarybottle/44f356a2ec8fa46fe3a3 to your computer and use it in GitHub Desktop.
Save binarybottle/44f356a2ec8fa46fe3a3 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
Recursively upload a directory structure to a Synapse project (
synapse_upload_directory_tree <PROJECT ID> -user <EMAIL> --pass <PASS> --top <TOP DIRECTORY>
synapse_upload_directory_tree syn3207152 --user --pass XXXXXX --top brains
synapse_upload_directory_tree syn3207152 --user --pass XXXXXX --top brains --file-db synapse_file_upload_directory_tree.sqlite
After Larsson's 2014/10/24 GitHub Gist:
The SQLite 3 database file specified in the --file-db acts as a local cache of
files that have already been uploaded. To bypass use of the cache, specify a
different filename or delete the database file. The file-db defaults to:
Copyright 2015, Sage Bionetworks (, Apache v2.0 License
import os
import sqlite3
import synapseclient
from synapseclient import File, Folder
import argparse
# File names should not contain these strings:
ignore_strings = ['~','!','@','#','$','%','^','&','*','(',')','+','`','=']
# File types inferred by their appends:
types = ['.nii','.nii.gz','.vtk','.mgz','.dcm','.tif','.tiff','.jpeg','.jpg']
type_names = ['nifti','nifti','vtk','mgz','dicom','tiff','tiff','jpeg','jpeg']
# Command-line arguments:
parser = argparse.ArgumentParser(description="""
Recursively upload a directory structure to""",
formatter_class = lambda prog:
argparse.HelpFormatter(prog, max_help_position=40))
help='Synapse Project ID, such as "syn32071528"')
parser.add_argument("-u", "--user", help="Synapse User Name", default=None)
parser.add_argument("-p", "--password", help="Synapse Password", default=None)
parser.add_argument("--file-db", help="SQLite database of tracking last modified dates of files to upload", default="synapse_file_upload_directory_tree.sqlite")
help='Topmost directory',
default='.', type=str, metavar='STR')
args = parser.parse_args()
project_id = args.project_id
user = args.user
password = args.password
start_path =
syn=synapseclient.login(user, password) #(silent=True)
conn = sqlite3.connect(args.file_db)
# get cached file and directory info from SQLite:
c = conn.cursor()
c.execute('CREATE TABLE IF NOT EXISTS files (path TEXT, mtime REAL);')
c.execute('CREATE TABLE IF NOT EXISTS folders (path TEXT, synapse_id TEXT);')
print('Reading file table...')
previous_uploads = {}
result = c.execute('SELECT path, mtime from files;')
for row in result:
previous_uploads[row[0]] = row[1]
print('Reading folder table...')
parents = {start_path: project_id}
result = c.execute('SELECT path, synapse_id from folders;')
for row in result:
parents[row[0]] = row[1]
# Start walking through the source directory tree from start_path:
for dirpath, dirnames, filenames in os.walk(start_path):
# Make each subdirectory (and store its path):
for dirname in dirnames:
path = os.path.join(dirpath, dirname)
if path not in parents:
print('Creating {0}...'.format(dirname))
f =, parent=parents[dirpath]))
parents[path] =
c = conn.cursor()
c.execute('INSERT OR REPLACE INTO folders (path, synapse_id) VALUES ("%s", "%s")' % (path,
# Loop through the file names:
for filename in filenames:
# Make sure each file name does not contain any ignore_strings:
okay = True
for str0 in ignore_strings:
if str0 in filename:
okay = False
if okay:
# Upload the file to the correct path:
path = os.path.join(dirpath, filename)
stat = os.stat(path)
if stat.st_size > 0:
mtime = stat.st_mtime
previous_mtime = previous_uploads.get(path, None)
# skip = 1
# if skip == 1:
# print('Uploading {0}...'.format(path))
# else:
if mtime > previous_mtime:
print('Uploading {0}...'.format(path))
f = File(path, parent=parents[dirpath], name=filename)
# Annotate the file on Synapse:
for istr2, str2 in enumerate(types):
if filename.endswith(str2):
f.fileType = type_names[istr2]
# Optionally add ", used='http://..)"
# to specify the source location
c = conn.cursor()
c.execute('INSERT OR REPLACE INTO files (path, mtime) VALUES ("%s", "%s")' % (path, mtime))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment