Skip to content

Instantly share code, notes, and snippets.

@jerowe
Forked from twaddlac/metagenomics-pipeline.sh
Last active January 12, 2017 05:01
Show Gist options
  • Save jerowe/f2b35b59c71c31fd2b7cbdf9557bac3d to your computer and use it in GitHub Desktop.
Save jerowe/f2b35b59c71c31fd2b7cbdf9557bac3d to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
#SBATCH -p serial
#SBATCH --job-name=metagenomics
#SBATCH --time=00:10:00
# Output and error files
#SBATCH -o job.%J.out
#SBATCH -e job.%J.err
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=6
#SBATCH --mem=10GB
#Time for the whole workflow
#real 4m15.647s
#user 4m8.090s
#sys 0m53.599s
###PBS -l walltime=3:00:00,mem=10gb,nodes=1:ppn=4
#To run this in interactive mode
#srun --mem 10GB --cpus-per-task 4 --pty /bin/bash
set -x -e
##NYUAD
module purge all
module load gencore/1 gencore_metagenomics
##NYUNY
#module load kraken
#module load krona
#module load humann2
#module load kneaddata
#Get the data
cp -rf /scratch/gencore/datasets/metagenomics.tar.gz ./
tar -xvf metagenomics.tar.gz
cd metagenomics
#JOB KneadData
#INPUTS demo.fastq
#INPUTS Homo_sapiens_Bowtie2_v0.1
#OUTPUTS demo_kneaddata_$DATABASE_bowtie2_contam.fastq FASTQ file containing reads that were identified as contaminants from the database (named $DATABASE).
#OUTPUTS demo_kneaddata.fastq: This file includes reads that were not in the reference database.
#OUTPUTS demo_kneaddata.trimmed.fastq: This file has trimmed reads.
#OUTPUTS demo_kneaddata.log
tar -xvf Homo_sapiens_Bowtie2_v0.1.tgz
kneaddata --input demo.fastq \
--bowtie2-options "--very-sensitive -p 4" \
--trimmomatic "${EBROOTGENCORE_METAGENOMICS}/share/trimmomatic-0.36-3" \
--reference-db Homo_sapiens_Bowtie2_v0.1 \
--output kneaddata_output
#JOB Kraken
#INPUTS kneaddata_output/demo_kneaddata.trimmed.fastq
#INPUTS minikraken
#OUTPUTS kraken.out.txt
tar -xvf minikraken.tgz
kraken --threads 4 \
--preload \
--fastq-input \
--output kraken.out.txt \
--db minikraken_20141208
kneaddata_output/demo_kneaddata.trimmed.fastq
#JOB krona
#INPUT kraken.out.txt
#OUTPUT kraken.krona.html
#krona requires a database we have already downloaded for you
#${EBROOTGENCORE_METAGENOMICS}/opt/krona/updateTaxonomy.sh
ktImportTaxonomy -o kraken.krona.html -t 4 -s 4 kraken.out.txt
#JOB HUMAnN2
#INPUT kneaddata_output/demo_kneaddata.trimmed.fastq
#OUTPUTS humann2/genefamilies.tsv
#OUTPUTS humann2/pathabudance.tsv
#OUTPUTS humann2/pathcoverage.tsv
#OUTPUTS humann2/metphlan_bug_list.tsv
humann2 --verbose --threads 4 --input kneaddata_output/demo_kneaddata.trimmed.fastq --output humann2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment