Skip to content

Instantly share code, notes, and snippets.

@oesteban
Last active April 6, 2024 19:52
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save oesteban/5947d28caf6c3750e0a2f2aa09102702 to your computer and use it in GitHub Desktop.
Save oesteban/5947d28caf6c3750e0a2f2aa09102702 to your computer and use it in GitHub Desktop.
Submitting many FMRIPREP/MRIQC tasks in Sherlock
#!/bin/bash
#
#SBATCH -J fmriprep
#SBATCH --array=1-257%7
#SBATCH --time=24:00:00
#SBATCH -n 1
#SBATCH --cpus-per-task=16
#SBATCH --mem-per-cpu=4G
#SBATCH -p russpold,owners,normal
# Outputs ----------------------------------
#SBATCH -o log/%x-%A-%a.out
#SBATCH -e log/%x-%A-%a.err
#SBATCH --mail-user=%u@stanford.edu
#SBATCH --mail-type=ALL
# ------------------------------------------
module load system singularity
SUBJECT_LIST=sub_list.txt
BIDS_DIR=$OAK/data/ds000030/1.0.3
OUT_DIR=${BIDS_DIR}/derivatives/fmriprep_1.2.5
WORK_DIR=${L_SCRATCH}/work/
FMRIPREP_OPTS="--omp-nthreads 8 --nthreads 12 --mem_mb 30000 --ignore-aroma-denoising-errors --output-space T1w template fsaverage6 fsaverage5 fsnative --template-resampling-grid 2mm --medial-surface-nan --use-syn-sdc --cifti-output --use-aroma"
unset PYTHONPATH
export FS_LICENSE=$HOME/.freesurfer.txt
subject=$( sed "${SLURM_ARRAY_TASK_ID}q;d" ${SUBJECT_LIST} )
cmd="singularity run $SINGULARITY_BIN/poldracklab_fmriprep_1.2.5-2018-12-04-2ef6b23ede2a.img ${BIDS_DIR} ${OUT_DIR} participant --participant-label $subject -w ${WORK_DIR} ${FMRIPREP_OPTS}"
echo Running task ${SLURM_ARRAY_TASK_ID}
echo Commandline: $cmd
eval $cmd
exitcode=$?
if [ "$exitcode" -ne "0" ]; then
echo "$subject" >> failed_subjects.${SLURM_ARRAY_JOB_ID}
echo "${SLURM_ARRAY_TASK_ID}" >> failed_taskids.${SLURM_ARRAY_JOB_ID}
fi
echo Finished tasks ${SLURM_ARRAY_TASK_ID} with exit code $exitcode
#!/bin/bash
##############################################
# An example jobarray sbatch file for Sherlock
##############################################
#SBATCH -J mriqc # Give me a job name
#SBATCH --array=1-233%10 # Number of lines in corresponding tasks_list.sh file,
# and have max 10 tasks running in parallel at a time
#SBATCH -p russpold,owners,normal # Queues you can submit to.
#SBATCH --time=48:00:00 # Wallclock time
#SBATCH -n 1 # Each line of tasks_list.sh is an independent task
#SBATCH --cpus-per-task=10 # Say you want each line to use 10 cpus.
#SBATCH --ntasks-per-node=1 # Necessary for the job array to allocate resources correctly
#SBATCH --mem-per-cpu=6400M # Take over all available RAM per node
# SBATCH --exclusive # Enable if you don't want other users to get access to the node
# (useful if your tasks take all available memory)
# Outputs ----------------------------------
#SBATCH -o %A-%a.out
#SBATCH -e %A-%a.err
#SBATCH --mail-user=<username>@stanford.edu
#SBATCH --mail-type=ALL
# ------------------------------------------
module load system # Only Sherlock2
module load singularity # load singularity (both Sherlock 1 and 2)
unset PYTHONPATH
export FS_LICENSE=$PWD/.freesurfer.txt # Necessary for FMRIPREP only
# The heavylifting happens here
# Make sure you have a tasks_list.sh file ready in the same working directory,
# with one task per line.
eval $( sed "${SLURM_ARRAY_TASK_ID}q;d" tasks_list.sh )
# Example of one possible line in the tasks_list.sh file:
# singularity run /share/PI/russpold/singularity_images/poldracklab_fmriprep_1.0.7-2018-02-14-521e873ab8db.img $OAK/data/openfmri/ds000116 $HOME/derivatives/ds000116 participant -w work/ds000116 --participant_label sub-17 --mem-mb 50000 --nthreads 10 --omp-nthreads 8 --force-syn -vv
#!/bin/bash
#
#SBATCH -J mriqc
#SBATCH --array=9,27,33,39
#SBATCH --time=24:00:00
#SBATCH -n 1
#SBATCH --cpus-per-task=16
#SBATCH --mem-per-cpu=4G
#SBATCH -p russpold,owners,normal
# Outputs ----------------------------------
#SBATCH -o log/%A-%a.out
#SBATCH -e log/%A-%a.err
#SBATCH --mail-user=oesteban@stanford.edu
#SBATCH --mail-type=ALL
# ------------------------------------------
module load system
module load singularity
module load git-annex anaconda
# ds_list.sh is a list of dataset names
DATASET=$( sed "${SLURM_ARRAY_TASK_ID}q;d" ds_list.sh )
echo Pulling ${DATASET} from OpenNeuro
pushd $OAK/data/openfmri/${DATASET}
find sub-*/ -name "*_T1w*" -exec datalad get -J 8 {} \+
popd
echo Done!
module unload anaconda --purge
echo Processing dataset ${DATASET}.
unset PYTHONPATH
singularity run /share/PI/russpold/singularity_images/poldracklab_mriqc_0.14.2-2018-08-21-070e53b20a43.img /oak/stanford/groups/russpold/data/openfmri/${DATASET} /oak/stanford/groups/russpold/data/openfmri/derivatives/${DATASET}/mriqc_0.14.1 participant -m T1w --n_procs 16 --ants-nthreads 8 -f --mem_gb 30 -vv -w work/${DATASET} --dsname ${DATASET} --email "oesteban@stanford.edu"
mriqc_exit=$?
echo MRIQC finished ${DATASET}.
if [[ "${mriqc_exit}" == "0" ]]; then
module load anaconda
echo Uploading to s3
pushd /oak/stanford/groups/russpold/data/openfmri/derivatives/${DATASET}/mriqc_0.14.1
find . -maxdepth 1 -name "*.html" -exec aws s3 cp {} s3://mriqc/openfmri/${DATASET}/ \;
popd
echo Done!
echo Cleaning up!
rm -rf work/${DATASET}
echo Removed working directory.
else
echo "MRIQC ${DATASET} exited with code ${mriqc_exit}. TaskID=${SLURM_ARRAY_TASK_ID}"
exit ${mriqc_exit}
fi
#!/bin/bash
#
#SBATCH -J mriqc
##SBATCH --array=1-265
#SBATCH --time=24:00:00
#SBATCH -n 1
#SBATCH --cpus-per-task=16
#SBATCH --mem-per-cpu=4G
#SBATCH -p russpold,owners,normal
# Outputs ----------------------------------
#SBATCH -o log/%A-%a.out
#SBATCH -e log/%A-%a.err
#SBATCH --mail-user=oesteban@stanford.edu
#SBATCH --mail-type=ALL
# ------------------------------------------
module load system
module load singularity
# extract subject ${DATASET} contains a list of subject IDs
SUBJECT=$( sed "${SLURM_ARRAY_TASK_ID}q;d" ${DATASET}.txt )
echo Processing dataset ${DATASET}.
unset PYTHONPATH
singularity run /share/PI/russpold/singularity_images/poldracklab_mriqc_0.14.2-2018-08-21-070e53b20a43.img /oak/stanford/groups/russpold/data/openfmri/${DATASET} /oak/stanford/groups/russpold/data/openfmri/derivatives/${DATASET}/mriqc_0.14.1 participant --participant-label ${SUBJECT} -m T1w --n_procs 16 --ants-nthreads 8 -f --mem_gb 30 -vv -w work/${DATASET} --dsname ${DATASET} --email "oesteban@stanford.edu"
mriqc_exit=$?
echo MRIQC finished ${DATASET}.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment