Skip to content

Instantly share code, notes, and snippets.

@asitang
Created October 13, 2021 18:17
Show Gist options
  • Save asitang/3f5ca00f44953b28adcd9c8a97f372a5 to your computer and use it in GitHub Desktop.
Save asitang/3f5ca00f44953b28adcd9c8a97f372a5 to your computer and use it in GitHub Desktop.
LabCAS Download Script
# encoding: utf-8
#
# Sample LabCAS Download script
#
# To run this, you'll need Python 3 with the `requests` package. The easiest
# way to do this is with a "virtual environment" by running:
#
# $ python3 -m venv venv
# $ cd venv
# $ bin/pip install --quiet --upgrade pip requests
#
# Then, set environment variables as follows:
#
# - LABCAS_ID: the name of the collection or dataset you want to download; if
# unset, it defaults to `Automated_Quantitative_Measures_of_Breast_Density_Data/C0250/MASK`
# - TARGET_DIR: the local directory in which to save the data; if unset, it
# defaults to the current directory
# - LABCAS_USERNAME: the username of the LabCAS account to use to fetch the data
# - LABCAS_PASSWORD: the credential that authenticates `LABCAS_USERNAME`
# - CONSORTIUM: 'mcl' or 'edrn'. Defaults to 'edrn'
# Created by Asitang Mishra, Asitang.Mishra@jpl.nasa.gov
# Refined by https://github.com/nutjob4life
import requests
import os
import urllib.parse
# Use environment variables
labcas_id = os.getenv('LABCAS_ID', "Automated_Quantitative_Measures_of_Breast_Density_Data/C0250/MASK")
target_dir = os.getenv('TARGET_DIR', '.')
labcas_username = os.getenv('LABCAS_USERNAME', '')
labcas_password = os.getenv('LABCAS_PASSWORD', '')
consortium=os.getenv('LABCAS_USERNAME', 'edrn').lower()
# ============= list all files
if labcas_username == '' or labcas_password == '':
auth = None
else:
auth = (labcas_username, labcas_password)
url = "https://"+consortium+"-labcas.jpl.nasa.gov/data-access-api/"
request_type = 'collections' if len(labcas_id.split('/')) == 1 else 'datasets'
request_url = url + request_type + '/list?rows=20000000&q=id:' + labcas_id
response = requests.get(request_url, timeout=10, auth=auth)
files_list = [item for item in response.text.split('\n') if item.strip() != '']
if len(files_list) != 0:
print('Preparing', len(files_list), 'files to be downloaded....')
else:
print('No files present or accessible in LabCAS for:', labcas_id)
print('Please visit https://'+consortium+'-labcas.jpl.nasa.gov/ to find the correct id or check your access to the dataset.')
# ============= download files
for file_url in files_list:
file_rel_path = urllib.parse.unquote(file_url.split('id')[1][1:])
print('Downloading:', file_rel_path)
response = requests.get(file_url, stream=True, auth=auth)
os.makedirs(os.path.join(target_dir, os.path.dirname(file_rel_path)), exist_ok=True)
handle = open(os.path.join(target_dir, file_rel_path), "wb")
for chunk in response.iter_content(chunk_size=512):
if chunk:
handle.write(chunk)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment