Created
May 23, 2019 10:11
-
-
Save lindenb/f5311887f5a5df0abfaf487df4ae2858 to your computer and use it in GitHub Desktop.
calling bams in gvcf mode with bcftools nextflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
reference=file("${params.reference}") | |
bams=file("${params.bams}") | |
process getChromosomes { | |
tag "get chromosomes from ${ref}" | |
executor 'local' | |
input: | |
file ref from reference | |
output: | |
file("contigs.txt") into contigs | |
script: | |
""" | |
cut -f 1 "${ref.toRealPath()}.fai" > contigs.txt | |
""" | |
} | |
process sampleAndBams { | |
tag "get bams from ${bams}" | |
executor 'local' | |
input: | |
file bams from bams | |
output: | |
file("sample_bams.csv") into sample_bams | |
script: | |
""" | |
cat "${bams}" | sort | while read B | |
do | |
samtools view -H "\${B}" | grep '^@RG' | tr '\\t' '\\n' | grep ^SM -m1 | cut -d ':' -f 2- | tr '\\n' ',' >> sample_bams.csv | |
echo "\${B}" >> sample_bams.csv | |
done | |
""" | |
} | |
sample_bams.splitCsv(header: false,sep:',',strip:true). | |
combine(contigs.splitCsv(header: false,sep:',',strip:true)). | |
into{sample_bam_contig} | |
process call_gvcf { | |
tag "call ${sample} ${bam} ${contig}" | |
input: | |
file ref from reference | |
set sample,bam,contig from sample_bam_contig | |
output: | |
set contig,file("${sample}.csv") into sample_contig_bcf | |
file("${sample}.${contig}.bcf") | |
file("${sample}.${contig}.bcf.csi") | |
script: | |
""" | |
bcftools mpileup --output-type u --fasta-ref "${ref.toRealPath()}" --regions "${contig}" -a 'INFO/AD' -a 'FORMAT/AD' -a 'FORMAT/DP' --gvcf 5,10,20,30 "${bam}" |\ | |
bcftools call --gvcf 5,10,20,30 --multiallelic-caller --output-type b -o "${sample}.${contig}.bcf" | |
bcftools index "${sample}.${contig}.bcf" | |
echo "${sample},\${PWD}/${sample}.${contig}.bcf" > "${sample}.csv" | |
""" | |
} | |
process combine_contig { | |
tag "combine ${contig} N=${vcfs.size()}" | |
input: | |
file ref from reference | |
set contig,vcfs from sample_contig_bcf.groupTuple() | |
output: | |
file("${contig}.bcf") into contig_merged | |
file("${contig}.bcf.csi") | |
script: | |
""" | |
# make sure we always use the samples in the same order | |
cat ${vcfs.join(" ")} | sort -T . -t, -k1,1 | cut -d, -f 2 > vcf.list | |
bcftools merge --merge all --gvcf "${ref.toRealPath()}" --file-list "vcf.list" --output-type 'u' |\ | |
bcftools convert --gvcf2vcf --fasta-ref "${ref.toRealPath()}" --output-type 'u' |\ | |
bcftools view -m 2 --output-type 'u' |\ | |
bcftools norm --fasta-ref "${ref.toRealPath()}" --output-type 'b' -o "${contig}.bcf" | |
bcftools index "${contig}.bcf" | |
rm vcf.list | |
""" | |
} | |
process combine_all{ | |
tag "combine N=${vcfs.size()}" | |
input: | |
file ref from reference | |
val vcfs from contig_merged.collect() | |
output: | |
file("${params.prefix}vcf.gz") | |
file("${params.prefix}vcf.gz.tbi") | |
script: | |
""" | |
cat << __EOF__ | sort > vcf.list | |
${vcfs.join("\n")} | |
__EOF__ | |
bcftools concat --file-list "vcf.list" --output-type 'z' -o "${params.prefix}vcf.gz" | |
bcftools index --tbi "${params.prefix}vcf.gz" | |
""" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment