Skip to content

Instantly share code, notes, and snippets.

@astraw
Created October 23, 2011 07:25
Show Gist options
  • Save astraw/1306990 to your computer and use it in GitHub Desktop.
Save astraw/1306990 to your computer and use it in GitHub Desktop.
query and download data from strawlab tethered data server
import json, os
import requests
SCRIPT_NAME = 'hausen4.py'
DIRNAME = '~/hausen4_datasets_tmp'
def get_auth():
user = os.environ.get('STRAWLAB_USER')
password = os.environ.get('STRAWLAB_PASSWORD')
return (user,password)
AUTH = get_auth()
def get_ids_for_script(script_name):
query = {'experiment_script':script_name,
'created_on':{'$gt':'2011-10-12T00:00:00+00:00'}, # "%Y-%m-%dT%H:%M:%S+00:00"
}
r = requests.post('http://strawlab.imp.ac.at/datastore/tethered-data/query',
data=json.dumps(query),
headers={'contentType':'application/json',
'accepts':'application/json',
},
auth=AUTH,
)
if r.status_code != 200:
fname = '/tmp/upload-error.html'
with open(fname,mode='w') as fd:
fd.write(r.content)
raise RuntimeError('Could not execute query. Web server returned document in %s'%(fname,))
result = json.loads(r.content)
return result['ids']
def main():
local_dirname = os.path.expanduser(DIRNAME)
print 'saving files to',local_dirname
if not os.path.exists(local_dirname):
os.makedirs(local_dirname)
ids = get_ids_for_script(SCRIPT_NAME)
fnames = []
for docid in ids:
doc_h5_url = 'http://rothko.imp.univie.ac.at:8000/datastore/tethered-data/' + docid + '.h5'
doc_detail_url = 'http://rothko.imp.univie.ac.at:8000/datastore/tethered-data/detail/'+docid+'.json'
r = requests.get(doc_detail_url,
auth=AUTH,
)
assert r.status_code == 200
doc = json.loads(r.content)
new_fname = os.path.split(doc['gridfs_file'])[-1]
new_fname = os.path.splitext(new_fname)[0] + '.h5' # make sure this is saved as .h5 file
new_fname_full = os.path.join( local_dirname, new_fname )
fnames.append( new_fname )
if os.path.exists(new_fname_full):
print 'skipping',new_fname
continue
print 'downloading',new_fname
r = requests.get( doc_h5_url,
auth=AUTH,
)
assert r.status_code == 200
with open(new_fname_full,mode='wb') as fd:
fd.write(r.content)
# for fname in fnames:
# upload_file(fname)
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment