wettenhj/mytardis_download.py

## mytardis_download.py
#!/usr/bin/env python
"""
The following Python script demonstrates how to download files within a
private MyTardis dataset using API key authentication.

WARNING: This script does not check MD5 sums or verify file sizes etc. and
it only retrieves the first page of datafile records
(see datafiles['meta']['limit'] and datafiles['meta']['next']).

Replace "https://mytardis.example.com" with a real MyTardis URL.

Replace "username:api_key" below with valid credentials for your
MyTardis server, otherwise you will get an HTTP 401 (Unauthorized) error.

Replace 1234 below with a dataset ID you want to download.  When viewing
a dataset in your web browser, you can see its ID in your browser's address
bar, e.g. http://mytardis.example.com/dataset/1234
"""
import requests
import os
mytardis_url = "https://mytardis.example.com"
dataset_id = 1234
url = "%s/api/v1/dataset/%d/files?format=json" \
    % (mytardis_url, dataset_id)
headers = {'Accept': 'application/json',
           'Authorization': 'ApiKey username:api_key'}
response = requests.get(url, headers=headers)
if response.status_code != 200:
  raise Exception("HTTP %d" % response.status_code, response.text)
datafiles = response.json()
# datafiles['meta']['next'] will tell us if there are
# additional pages of datafiles to be retreived:
print str(datafiles['meta'])
dataset_dir = "dataset-%d" % dataset_id
if not os.path.exists(dataset_dir):
  os.makedirs(dataset_dir)

for datafile in datafiles['objects']:
  if datafile['directory'] != "":
    if not os.path.exists(os.path.join(dataset_dir, datafile['directory'])):
      os.makedirs(os.path.join(dataset_dir, datafile['directory']))

  print "Downloading %s..." % os.path.join(dataset_dir, datafile['directory'],
                                           datafile['filename'])
  url = "%s/%sdownload/" % (mytardis_url, datafile['resource_uri'])
  response = requests.get(url, headers=headers)
  f = open(os.path.join(dataset_dir, datafile['directory'],
                        datafile['filename']), 'wb')
  for chunk in response.iter_content(chunk_size=512 * 1024):
    if chunk: # filter out keep-alive new chunks
      f.write(chunk)
  f.close()
	#!/usr/bin/env python
	"""
	The following Python script demonstrates how to download files within a
	private MyTardis dataset using API key authentication.

	WARNING: This script does not check MD5 sums or verify file sizes etc. and
	it only retrieves the first page of datafile records
	(see datafiles['meta']['limit'] and datafiles['meta']['next']).

	Replace "https://mytardis.example.com" with a real MyTardis URL.

	Replace "username:api_key" below with valid credentials for your
	MyTardis server, otherwise you will get an HTTP 401 (Unauthorized) error.

	Replace 1234 below with a dataset ID you want to download. When viewing
	a dataset in your web browser, you can see its ID in your browser's address
	bar, e.g. http://mytardis.example.com/dataset/1234
	"""
	import requests
	import os
	mytardis_url = "https://mytardis.example.com"
	dataset_id = 1234
	url = "%s/api/v1/dataset/%d/files?format=json" \
	% (mytardis_url, dataset_id)
	headers = {'Accept': 'application/json',
	'Authorization': 'ApiKey username:api_key'}
	response = requests.get(url, headers=headers)
	if response.status_code != 200:
	raise Exception("HTTP %d" % response.status_code, response.text)
	datafiles = response.json()
	# datafiles['meta']['next'] will tell us if there are
	# additional pages of datafiles to be retreived:
	print str(datafiles['meta'])
	dataset_dir = "dataset-%d" % dataset_id
	if not os.path.exists(dataset_dir):
	os.makedirs(dataset_dir)

	for datafile in datafiles['objects']:
	if datafile['directory'] != "":
	if not os.path.exists(os.path.join(dataset_dir, datafile['directory'])):
	os.makedirs(os.path.join(dataset_dir, datafile['directory']))

	print "Downloading %s..." % os.path.join(dataset_dir, datafile['directory'],
	datafile['filename'])
	url = "%s/%sdownload/" % (mytardis_url, datafile['resource_uri'])
	response = requests.get(url, headers=headers)
	f = open(os.path.join(dataset_dir, datafile['directory'],
	datafile['filename']), 'wb')
	for chunk in response.iter_content(chunk_size=512 * 1024):
	if chunk: # filter out keep-alive new chunks
	f.write(chunk)
	f.close()