Skip to content

Instantly share code, notes, and snippets.

@wettenhj
Last active September 7, 2015 01:25
Show Gist options
  • Save wettenhj/faf0cdbfbdfa133355f0 to your computer and use it in GitHub Desktop.
Save wettenhj/faf0cdbfbdfa133355f0 to your computer and use it in GitHub Desktop.
Downloading files from MyTardis using its RESTful API
#!/usr/bin/env python
"""
The following Python script demonstrates how to download files within a
private MyTardis dataset using API key authentication.
WARNING: This script does not check MD5 sums or verify file sizes etc. and
it only retrieves the first page of datafile records
(see datafiles['meta']['limit'] and datafiles['meta']['next']).
Replace "https://mytardis.example.com" with a real MyTardis URL.
Replace "username:api_key" below with valid credentials for your
MyTardis server, otherwise you will get an HTTP 401 (Unauthorized) error.
Replace 1234 below with a dataset ID you want to download. When viewing
a dataset in your web browser, you can see its ID in your browser's address
bar, e.g. http://mytardis.example.com/dataset/1234
"""
import requests
import os
mytardis_url = "https://mytardis.example.com"
dataset_id = 1234
url = "%s/api/v1/dataset/%d/files?format=json" \
% (mytardis_url, dataset_id)
headers = {'Accept': 'application/json',
'Authorization': 'ApiKey username:api_key'}
response = requests.get(url, headers=headers)
if response.status_code != 200:
raise Exception("HTTP %d" % response.status_code, response.text)
datafiles = response.json()
# datafiles['meta']['next'] will tell us if there are
# additional pages of datafiles to be retreived:
print str(datafiles['meta'])
dataset_dir = "dataset-%d" % dataset_id
if not os.path.exists(dataset_dir):
os.makedirs(dataset_dir)
for datafile in datafiles['objects']:
if datafile['directory'] != "":
if not os.path.exists(os.path.join(dataset_dir, datafile['directory'])):
os.makedirs(os.path.join(dataset_dir, datafile['directory']))
print "Downloading %s..." % os.path.join(dataset_dir, datafile['directory'],
datafile['filename'])
url = "%s/%sdownload/" % (mytardis_url, datafile['resource_uri'])
response = requests.get(url, headers=headers)
f = open(os.path.join(dataset_dir, datafile['directory'],
datafile['filename']), 'wb')
for chunk in response.iter_content(chunk_size=512 * 1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment