Skip to content

Instantly share code, notes, and snippets.

@mkyt
Created March 30, 2021 05:24
Show Gist options
  • Save mkyt/503b4209efefbf67ce21b23288d77240 to your computer and use it in GitHub Desktop.
Save mkyt/503b4209efefbf67ce21b23288d77240 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""HCP Downloader
This script fetches preprocessed BOLD timeseries in grayordinate space
for all subjects from Human Connectome Project S3 repository
"""
import sys
import os
import os.path
import itertools
import boto3
from botocore.exceptions import ClientError
s3 = boto3.client(
's3',
aws_access_key_id='YOUR_ACCESS_KEY_ID',
aws_secret_access_key='YOUR_SECRET_ACCESS_KEY'
)
BUCKET_NAME = 'hcp-openaccess'
PREFIX = 'HCP_1200/'
def list_subjects():
res = []
p = s3.get_paginator('list_objects_v2')
for page in p.paginate(Bucket=BUCKET_NAME, Prefix=PREFIX, Delimiter='/'):
for pfx in page['CommonPrefixes']:
res.append(pfx['Prefix'][len(PREFIX):-1])
return res
def list_files(subj_id):
res = []
p = s3.get_paginator('list_objects_v2')
for page in p.paginate(Bucket=BUCKET_NAME, Prefix=PREFIX+subj_id+'/'):
res += map(lambda x: x['Key'], page['Contents'])
return res
def download_file(subj_id, key_path, out_path):
print(f'obtaining {key_path} for subject "{subj_id}"...')
if os.path.exists(out_path):
print('file already exists. skipping...')
return
idx = out_path.rfind(os.sep)
if idx >= 0:
os.makedirs(out_path[:idx], exist_ok=True)
try:
with open(out_path, 'wb') as fp:
s3.download_fileobj(BUCKET_NAME, PREFIX+subj_id+'/'+key_path, fp)
except ClientError:
print('file not found. skipping...')
os.remove(out_path)
def download_grayordinate_bold(subj_id, base_path):
for sess, lr in itertools.product(('1', '2'), ('LR', 'RL')):
path = f'MNINonLinear/Results/rfMRI_REST{sess}_{lr}/rfMRI_REST{sess}_{lr}_Atlas_MSMAll_hp2000_clean.dtseries.nii'
download_file(subj_id, path, base_path+os.sep+subj_id+os.sep+f'rfMRI_{sess}_{lr}.dtseries.nii')
if __name__ == '__main__':
if len(sys.argv) > 1:
base_path = sys.argv[1]
else:
base_path = '.'
subjects = list_subjects()
for subj in subjects:
download_grayordinate_bold(subj, base_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment