asitang/labcas-download.py

## labcas-download.py
# encoding: utf-8
#
# Sample LabCAS Download script
#
# To run this, you'll need Python 3 with the `requests` package. The easiest
# way to do this is with a "virtual environment" by running:
#
# $ python3 -m venv venv
# $ cd venv
# $ bin/pip install --quiet --upgrade pip requests
#
# Then, set environment variables as follows:
#
# - LABCAS_ID: the name of the collection or dataset you want to download; if
#   unset, it defaults to `Automated_Quantitative_Measures_of_Breast_Density_Data/C0250/MASK`
# - TARGET_DIR: the local directory in which to save the data; if unset, it
#   defaults to the current directory
# - LABCAS_USERNAME: the username of the LabCAS account to use to fetch the data
# - LABCAS_PASSWORD: the credential that authenticates `LABCAS_USERNAME`
# - CONSORTIUM: 'mcl' or 'edrn'. Defaults to 'edrn'
# Created by Asitang Mishra, Asitang.Mishra@jpl.nasa.gov
# Refined by https://github.com/nutjob4life

import requests
import os
import urllib.parse

# Use environment variables
labcas_id = os.getenv('LABCAS_ID', "Automated_Quantitative_Measures_of_Breast_Density_Data/C0250/MASK")
target_dir = os.getenv('TARGET_DIR', '.')
labcas_username = os.getenv('LABCAS_USERNAME', '')
labcas_password = os.getenv('LABCAS_PASSWORD', '')
consortium=os.getenv('LABCAS_USERNAME', 'edrn').lower()

# ============= list all files
if labcas_username == '' or labcas_password == '':
    auth = None
else:
    auth = (labcas_username, labcas_password)
url = "https://"+consortium+"-labcas.jpl.nasa.gov/data-access-api/"
request_type = 'collections' if len(labcas_id.split('/')) == 1 else 'datasets'
request_url = url + request_type + '/list?rows=20000000&q=id:' + labcas_id
response = requests.get(request_url, timeout=10, auth=auth)
files_list = [item for item in response.text.split('\n') if item.strip() != '']
if len(files_list) != 0:
    print('Preparing', len(files_list), 'files to be downloaded....')
else:
    print('No files present or accessible in LabCAS for:', labcas_id)
    print('Please visit https://'+consortium+'-labcas.jpl.nasa.gov/ to find the correct id or check your access to the dataset.')

# ============= download files
for file_url in files_list:
    file_rel_path = urllib.parse.unquote(file_url.split('id')[1][1:])
    print('Downloading:', file_rel_path)
    response = requests.get(file_url, stream=True, auth=auth)
    os.makedirs(os.path.join(target_dir, os.path.dirname(file_rel_path)), exist_ok=True)
    handle = open(os.path.join(target_dir, file_rel_path), "wb")
    for chunk in response.iter_content(chunk_size=512):
        if chunk:
            handle.write(chunk)
	# encoding: utf-8
	#
	# Sample LabCAS Download script
	#
	# To run this, you'll need Python 3 with the `requests` package. The easiest
	# way to do this is with a "virtual environment" by running:
	#
	# $ python3 -m venv venv
	# $ cd venv
	# $ bin/pip install --quiet --upgrade pip requests
	#
	# Then, set environment variables as follows:
	#
	# - LABCAS_ID: the name of the collection or dataset you want to download; if
	# unset, it defaults to `Automated_Quantitative_Measures_of_Breast_Density_Data/C0250/MASK`
	# - TARGET_DIR: the local directory in which to save the data; if unset, it
	# defaults to the current directory
	# - LABCAS_USERNAME: the username of the LabCAS account to use to fetch the data
	# - LABCAS_PASSWORD: the credential that authenticates `LABCAS_USERNAME`
	# - CONSORTIUM: 'mcl' or 'edrn'. Defaults to 'edrn'
	# Created by Asitang Mishra, Asitang.Mishra@jpl.nasa.gov
	# Refined by https://github.com/nutjob4life

	import requests
	import os
	import urllib.parse

	# Use environment variables
	labcas_id = os.getenv('LABCAS_ID', "Automated_Quantitative_Measures_of_Breast_Density_Data/C0250/MASK")
	target_dir = os.getenv('TARGET_DIR', '.')
	labcas_username = os.getenv('LABCAS_USERNAME', '')
	labcas_password = os.getenv('LABCAS_PASSWORD', '')
	consortium=os.getenv('LABCAS_USERNAME', 'edrn').lower()

	# ============= list all files
	if labcas_username == '' or labcas_password == '':
	auth = None
	else:
	auth = (labcas_username, labcas_password)
	url = "https://"+consortium+"-labcas.jpl.nasa.gov/data-access-api/"
	request_type = 'collections' if len(labcas_id.split('/')) == 1 else 'datasets'
	request_url = url + request_type + '/list?rows=20000000&q=id:' + labcas_id
	response = requests.get(request_url, timeout=10, auth=auth)
	files_list = [item for item in response.text.split('\n') if item.strip() != '']
	if len(files_list) != 0:
	print('Preparing', len(files_list), 'files to be downloaded....')
	else:
	print('No files present or accessible in LabCAS for:', labcas_id)
	print('Please visit https://'+consortium+'-labcas.jpl.nasa.gov/ to find the correct id or check your access to the dataset.')

	# ============= download files
	for file_url in files_list:
	file_rel_path = urllib.parse.unquote(file_url.split('id')[1][1:])
	print('Downloading:', file_rel_path)
	response = requests.get(file_url, stream=True, auth=auth)
	os.makedirs(os.path.join(target_dir, os.path.dirname(file_rel_path)), exist_ok=True)
	handle = open(os.path.join(target_dir, file_rel_path), "wb")
	for chunk in response.iter_content(chunk_size=512):
	if chunk:
	handle.write(chunk)