Created
August 29, 2023 10:09
-
-
Save ericvdtoorn/2b45a9651bc1d45d1ba3be9880d23b72 to your computer and use it in GitHub Desktop.
Optimizing KMCP runs (SLURM batch file)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
file format type num_seqs sum_len min_len avg_len max_len | |
DRR171473_1.fastp.depleted.fq.gz FASTQ DNA 20,312,778 2,754,850,269 28 135.6 150 | |
DRR171473_2.fastp.depleted.fq.gz FASTQ DNA 20,312,778 2,745,109,847 26 135.1 150 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
#SBATCH --partition=general | |
#SBATCH --qos=medium | |
#SBATCH --time=24:00:00 | |
#SBATCH --ntasks=1 | |
#SBATCH --cpus-per-task=32 | |
#SBATCH --mem=100GB | |
#SBATCH --gres=jobspernode:one:1 | |
#SBATCH --mail-type=END | |
set -xeuo pipefail | |
# perhaps with the path included | |
accession='DRR171473' | |
source_folder="/path/to/source/folder" | |
DB_INDEX="/path/to/humgut.index/database.index" | |
THREADS=64 | |
WORK_DIR="/dev/shm/$USER/kmcp" | |
LOG_DIR="/path/to/source/folder" | |
TAXID_MAP=/path/to/humgut_db/taxid-gtdb.map | |
TAXDUMP=/path/to/humgut_db/taxdump | |
DB_NAME="kmcp_humgut" | |
mkdir -p "$LOG_DIR" | |
mkdir -p "$WORK_DIR" | |
cd "$WORK_DIR" | |
echo "copying source files" | |
cp "$source_folder/${accession}_1.fastp.depleted.fq.gz" . | |
cp "$source_folder/${accession}_2.fastp.depleted.fq.gz" . | |
echo "execusting parallel search" | |
# execute the kmcp search with parallel | |
THREADS_PER_JOB=12 | |
cat "$DB_INDEX" | parallel -j $(($THREADS / $THREADS_PER_JOB)) kmcp search \ | |
--db-dir {} \ | |
--threads $THREADS_PER_JOB \ | |
--load-whole-db \ | |
"${accession}_2.fastp.depleted.fq.gz" \ | |
"${accession}_2.fastp.depleted.fq.gz" \ | |
-o "${accession}.${DB_NAME}@{#}.tsv.gz" \ | |
--log "${LOG_DIR}/${accession}.${DB_NAME}@{#}.log" | |
THREADS=64 | |
kmcp merge ${accession}.${DB_NAME}@*.tsv.gz \ | |
-o "${accession}.${DB_NAME}.tsv.gz" \ | |
--log "${LOG_DIR}/${accession}.${DB_NAME}.log" | |
kmcp profile --taxid-map "$TAXID_MAP" --taxdump "$TAXDUMP" \ | |
"${accession}.${DB_NAME}.tsv.gz" \ | |
-m 3 -j $THREADS \ | |
-o "${accession}.${DB_NAME}.profile" \ | |
--metaphlan-report "${accession}.${DB_NAME}.profile.metaphlan" \ | |
--log "${LOG_DIR}/${accession}.${DB_NAME}.profile.log" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment