Created
October 13, 2021 18:17
-
-
Save asitang/3f5ca00f44953b28adcd9c8a97f372a5 to your computer and use it in GitHub Desktop.
LabCAS Download Script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
# | |
# Sample LabCAS Download script | |
# | |
# To run this, you'll need Python 3 with the `requests` package. The easiest | |
# way to do this is with a "virtual environment" by running: | |
# | |
# $ python3 -m venv venv | |
# $ cd venv | |
# $ bin/pip install --quiet --upgrade pip requests | |
# | |
# Then, set environment variables as follows: | |
# | |
# - LABCAS_ID: the name of the collection or dataset you want to download; if | |
# unset, it defaults to `Automated_Quantitative_Measures_of_Breast_Density_Data/C0250/MASK` | |
# - TARGET_DIR: the local directory in which to save the data; if unset, it | |
# defaults to the current directory | |
# - LABCAS_USERNAME: the username of the LabCAS account to use to fetch the data | |
# - LABCAS_PASSWORD: the credential that authenticates `LABCAS_USERNAME` | |
# - CONSORTIUM: 'mcl' or 'edrn'. Defaults to 'edrn' | |
# Created by Asitang Mishra, Asitang.Mishra@jpl.nasa.gov | |
# Refined by https://github.com/nutjob4life | |
import requests | |
import os | |
import urllib.parse | |
# Use environment variables | |
labcas_id = os.getenv('LABCAS_ID', "Automated_Quantitative_Measures_of_Breast_Density_Data/C0250/MASK") | |
target_dir = os.getenv('TARGET_DIR', '.') | |
labcas_username = os.getenv('LABCAS_USERNAME', '') | |
labcas_password = os.getenv('LABCAS_PASSWORD', '') | |
consortium=os.getenv('LABCAS_USERNAME', 'edrn').lower() | |
# ============= list all files | |
if labcas_username == '' or labcas_password == '': | |
auth = None | |
else: | |
auth = (labcas_username, labcas_password) | |
url = "https://"+consortium+"-labcas.jpl.nasa.gov/data-access-api/" | |
request_type = 'collections' if len(labcas_id.split('/')) == 1 else 'datasets' | |
request_url = url + request_type + '/list?rows=20000000&q=id:' + labcas_id | |
response = requests.get(request_url, timeout=10, auth=auth) | |
files_list = [item for item in response.text.split('\n') if item.strip() != ''] | |
if len(files_list) != 0: | |
print('Preparing', len(files_list), 'files to be downloaded....') | |
else: | |
print('No files present or accessible in LabCAS for:', labcas_id) | |
print('Please visit https://'+consortium+'-labcas.jpl.nasa.gov/ to find the correct id or check your access to the dataset.') | |
# ============= download files | |
for file_url in files_list: | |
file_rel_path = urllib.parse.unquote(file_url.split('id')[1][1:]) | |
print('Downloading:', file_rel_path) | |
response = requests.get(file_url, stream=True, auth=auth) | |
os.makedirs(os.path.join(target_dir, os.path.dirname(file_rel_path)), exist_ok=True) | |
handle = open(os.path.join(target_dir, file_rel_path), "wb") | |
for chunk in response.iter_content(chunk_size=512): | |
if chunk: | |
handle.write(chunk) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment