Skip to content

Instantly share code, notes, and snippets.

@chrisamiller
Created October 8, 2019 16:22
Show Gist options
  • Save chrisamiller/55f90f72382e68775541d3c8d48f3f79 to your computer and use it in GitHub Desktop.
Save chrisamiller/55f90f72382e68775541d3c8d48f3f79 to your computer and use it in GitHub Desktop.
workflow WGBS {
String Reference = "/gscmnt/gc2708/info/medseq/gc6118/sketkar/refdata/mouse/all_sequences.fa"
String JobGroup
String? TMPDIR
String OutputDir
String FinalLabel
String uBam
call bam2fastq {
input: uBam=uBam,
jobGroup=JobGroup
}
call biscuit_align_and_sort {
input: refFasta=Reference,
Fastqs=bam2fastq.fastq_files,
jobGroup=JobGroup
}
call remove_files as remove_fastqs {
input: files=bam2fastq.fastq_files,
order_by=biscuit_align_and_sort.bam_file,
jobGroup=JobGroup
}
call markdup {
input: Bam=biscuit_align_and_sort.bam_file,
Name=FinalLabel,
jobGroup=JobGroup
}
call bam_coord_sort {
input: Bam=markdup.bam_file,
Name=FinalLabel,
jobGroup=JobGroup
}
call flagstat {
input: in=bam_coord_sort.bam_file,
jobGroup=JobGroup
}
call bamutil {
input: in=bam_coord_sort.bam_file,
jobGroup=JobGroup
}
call biscuit_pileup {
input: Bam=bam_coord_sort.bam_file,
Name=FinalLabel,
refFasta=Reference,
jobGroup=JobGroup
}
call biscuit_vcf2bed {
input: Vcf=biscuit_pileup.vcf_file,
VcfIndex=biscuit_pileup.vcf_index,
Name=FinalLabel,
jobGroup=JobGroup
}
call remove_file as rm_markdup {
input: file=markdup.bam_file,
order_by=bam_coord_sort.bam_file,
jobGroup=JobGroup
}
call remove_files as rm_aligned {
input: files=biscuit_align_and_sort.bam_file,
order_by=markdup.bam_file,
jobGroup=JobGroup
}
call gather_result as gather_the_rest {
input: files=[bam_coord_sort.bam_file,
bam_coord_sort.bam_index,
flagstat.fsOut,
bamutil.bamutilOut,
biscuit_pileup.vcf_file,
biscuit_pileup.vcf_index,
biscuit_pileup.meth_stats,
biscuit_vcf2bed.bed_file,
biscuit_vcf2bed.bed_index],
dir=OutputDir,
jobGroup=JobGroup
}
}
task bam2fastq {
String uBam
String jobGroup
command {
(set -eo pipefail && \
/usr/bin/java -Xmx16g -jar /usr/picard/picard.jar SortSam I=${uBam} O=/dev/stdout SO=queryname VALIDATION_STRINGENCY=SILENT | \
/usr/bin/java -Xmx16g -jar /usr/picard/picard.jar SamToFastq I=/dev/stdin F="1.fastq.gz" F2="2.fastq.gz" NON_PF=true VALIDATION_STRINGENCY=SILENT)
}
runtime {
docker_image: "registry.gsc.wustl.edu/genome/picard-2.4.1-r:2"
cpu: "2"
memory_gb: "32"
queue: "research-hpc"
resource: "rusage[gtmp=10, mem=32000]"
job_group: jobGroup
}
output {
Array[File] fastq_files = ["1.fastq.gz","2.fastq.gz"]
}
}
task biscuit_align_and_sort {
Array[String] Fastqs
String refFasta
# String readGroup
String jobGroup
command {
(set -eo pipefail && /usr/bin/biscuit align -t 12 -M \
${refFasta} ${Fastqs[0]} ${Fastqs[1]} | \
/usr/bin/samtools sort -T "tmpsort" -O bam -o aligned.bam) && \
/usr/bin/samtools index aligned.bam
}
runtime {
docker_image: "zackramjan/biscuit"
cpu: "12"
memory_gb: "64"
queue: "research-hpc"
resource: "rusage[gtmp=10, mem=64000]"
job_group: jobGroup
}
output {
File bam_file = "aligned.bam"
File bam_index = "aligned.bam.bai"
}
}
task merge {
Array[String] files
String jobGroup
command {
/usr/local/bin/samtools merge "AlignedMerged.bam" ${sep=" " files}
}
runtime {
docker_image: "registry.gsc.wustl.edu/genome/samtools-1.3.1-2:2"
cpu: "1"
memory_gb: "20"
queue: "research-hpc"
resource: "rusage[gtmp=10, mem=20000]"
job_group: jobGroup
}
output {
File bam_file = "AlignedMerged.bam"
}
}
task bam_coord_sort {
String Bam
String Name
String jobGroup
command {
/usr/local/bin/sambamba sort -t 8 -m 18G --tmpdir="." -o "${Name}.sorted.bam" ${Bam} && \
/usr/local/bin/sambamba index -t 8 "${Name}.sorted.bam"
}
runtime {
docker_image: "registry.gsc.wustl.edu/genome/sambamba-0.6.4:1"
cpu: "8"
memory_gb: "20"
queue: "research-hpc"
resource: "rusage[gtmp=10, mem=20000]"
job_group: jobGroup
}
output {
File bam_file = "${Name}.sorted.bam"
File bam_index = "${Name}.sorted.bam.bai"
}
}
task markdup {
String Bam
String jobGroup
String Name
command {
/usr/bin/biscuit markdup ${Bam} "${Name}.aligned_deduped.bam" && \
/usr/bin/samtools index "${Name}.aligned_deduped.bam"
}
runtime {
docker_image: "zackramjan/biscuit"
cpu: "1"
memory_gb: "32"
queue: "research-hpc"
resource: "rusage[gtmp=10, mem=32000]"
job_group: jobGroup
}
output {
File bam_file = "${Name}.aligned_deduped.bam"
File bam_index = "${Name}.aligned_deduped.bam.bai"
}
}
task biscuit_pileup {
String Bam
String refFasta
String Name
String jobGroup
command {
/usr/bin/biscuit pileup -r ${refFasta} -i ${Bam} -w "${Name}.meth.stats.txt" -o "${Name}.meth.vcf" -q 20 && \
bgzip "${Name}.meth.vcf" && \
tabix -p vcf "${Name}.meth.vcf.gz"
}
runtime {
docker_image: "zackramjan/biscuit"
cpu: "1"
memory_gb: "32"
queue: "research-hpc"
resource: "rusage[gtmp=10, mem=32000]"
job_group: jobGroup
}
output {
File vcf_file = "${Name}.meth.vcf.gz"
File vcf_index = "${Name}.meth.vcf.gz.tbi"
File meth_stats = "${Name}.meth.stats.txt"
}
}
task biscuit_vcf2bed {
String Vcf
String VcfIndex
Int? MinCov
String? Context
String Name
String jobGroup
command {
/usr/bin/biscuit vcf2bed -k ${default=1 MinCov} -c -t ${default="cg" Context} ${Vcf} > "${Name}.meth.bed" && \
bgzip "${Name}.meth.bed" && \
tabix -p bed "${Name}.meth.bed.gz"
}
runtime {
docker_image: "zackramjan/biscuit"
cpu: "1"
memory_gb: "32"
queue: "research-hpc"
resource: "rusage[gtmp=10, mem=32000]"
job_group: jobGroup
}
output {
File bed_file = "${Name}.meth.bed.gz"
File bed_index = "${Name}.meth.bed.gz.tbi"
}
}
task flagstat {
String in
String jobGroup
command {
/usr/local/bin/samtools flagstat ${in} > "flagstat.out"
}
runtime {
docker_image: "registry.gsc.wustl.edu/genome/tagged-alignment:2"
cpu: "1"
memory_gb: "10"
queue: "research-hpc"
resource: "rusage[gtmp=10, mem=10000]"
job_group: jobGroup
}
output {
File fsOut = "flagstat.out"
}
}
task bamutil {
String in
String jobGroup
command {
/usr/local/bin/bam stats --noPhoneHome --in ${in} --phred --excludeFlags 3844 2> bamutil_stats.txt
}
runtime {
docker_image: "registry.gsc.wustl.edu/genome/bamutil:2"
cpu: "1"
memory_gb: "10"
queue: "research-hpc"
resource: "rusage[gtmp=10, mem=10000]"
job_group: jobGroup
}
output {
File bamutilOut = "bamutil_stats.txt"
}
}
task gather_result {
String dir
Array[String] files
String jobGroup
command {
/bin/mv -t ${dir} ${sep=" " files}
}
runtime {
docker_image: "ubuntu:xenial"
queue: "research-hpc"
job_group: jobGroup
}
output {
String out = stdout()
}
}
task remove_files {
Array[String] files
String order_by
String jobGroup
command {
/bin/rm -f ${sep=" " files}
}
runtime {
docker_image: "ubuntu:xenial"
queue: "research-hpc"
job_group: jobGroup
}
output {
String out = stdout()
}
}
task remove_file {
String file
String order_by
String jobGroup
command {
/bin/rm -f file
}
runtime {
docker_image: "ubuntu:xenial"
queue: "research-hpc"
job_group: jobGroup
}
output {
String out = stdout()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment