Last active
September 7, 2015 01:25
-
-
Save wettenhj/faf0cdbfbdfa133355f0 to your computer and use it in GitHub Desktop.
Downloading files from MyTardis using its RESTful API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
The following Python script demonstrates how to download files within a | |
private MyTardis dataset using API key authentication. | |
WARNING: This script does not check MD5 sums or verify file sizes etc. and | |
it only retrieves the first page of datafile records | |
(see datafiles['meta']['limit'] and datafiles['meta']['next']). | |
Replace "https://mytardis.example.com" with a real MyTardis URL. | |
Replace "username:api_key" below with valid credentials for your | |
MyTardis server, otherwise you will get an HTTP 401 (Unauthorized) error. | |
Replace 1234 below with a dataset ID you want to download. When viewing | |
a dataset in your web browser, you can see its ID in your browser's address | |
bar, e.g. http://mytardis.example.com/dataset/1234 | |
""" | |
import requests | |
import os | |
mytardis_url = "https://mytardis.example.com" | |
dataset_id = 1234 | |
url = "%s/api/v1/dataset/%d/files?format=json" \ | |
% (mytardis_url, dataset_id) | |
headers = {'Accept': 'application/json', | |
'Authorization': 'ApiKey username:api_key'} | |
response = requests.get(url, headers=headers) | |
if response.status_code != 200: | |
raise Exception("HTTP %d" % response.status_code, response.text) | |
datafiles = response.json() | |
# datafiles['meta']['next'] will tell us if there are | |
# additional pages of datafiles to be retreived: | |
print str(datafiles['meta']) | |
dataset_dir = "dataset-%d" % dataset_id | |
if not os.path.exists(dataset_dir): | |
os.makedirs(dataset_dir) | |
for datafile in datafiles['objects']: | |
if datafile['directory'] != "": | |
if not os.path.exists(os.path.join(dataset_dir, datafile['directory'])): | |
os.makedirs(os.path.join(dataset_dir, datafile['directory'])) | |
print "Downloading %s..." % os.path.join(dataset_dir, datafile['directory'], | |
datafile['filename']) | |
url = "%s/%sdownload/" % (mytardis_url, datafile['resource_uri']) | |
response = requests.get(url, headers=headers) | |
f = open(os.path.join(dataset_dir, datafile['directory'], | |
datafile['filename']), 'wb') | |
for chunk in response.iter_content(chunk_size=512 * 1024): | |
if chunk: # filter out keep-alive new chunks | |
f.write(chunk) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment