Skip to content

Instantly share code, notes, and snippets.

@lindenb
Created May 23, 2019 10:11
Show Gist options
  • Save lindenb/f5311887f5a5df0abfaf487df4ae2858 to your computer and use it in GitHub Desktop.
Save lindenb/f5311887f5a5df0abfaf487df4ae2858 to your computer and use it in GitHub Desktop.
calling bams in gvcf mode with bcftools nextflow
reference=file("${params.reference}")
bams=file("${params.bams}")
process getChromosomes {
tag "get chromosomes from ${ref}"
executor 'local'
input:
file ref from reference
output:
file("contigs.txt") into contigs
script:
"""
cut -f 1 "${ref.toRealPath()}.fai" > contigs.txt
"""
}
process sampleAndBams {
tag "get bams from ${bams}"
executor 'local'
input:
file bams from bams
output:
file("sample_bams.csv") into sample_bams
script:
"""
cat "${bams}" | sort | while read B
do
samtools view -H "\${B}" | grep '^@RG' | tr '\\t' '\\n' | grep ^SM -m1 | cut -d ':' -f 2- | tr '\\n' ',' >> sample_bams.csv
echo "\${B}" >> sample_bams.csv
done
"""
}
sample_bams.splitCsv(header: false,sep:',',strip:true).
combine(contigs.splitCsv(header: false,sep:',',strip:true)).
into{sample_bam_contig}
process call_gvcf {
tag "call ${sample} ${bam} ${contig}"
input:
file ref from reference
set sample,bam,contig from sample_bam_contig
output:
set contig,file("${sample}.csv") into sample_contig_bcf
file("${sample}.${contig}.bcf")
file("${sample}.${contig}.bcf.csi")
script:
"""
bcftools mpileup --output-type u --fasta-ref "${ref.toRealPath()}" --regions "${contig}" -a 'INFO/AD' -a 'FORMAT/AD' -a 'FORMAT/DP' --gvcf 5,10,20,30 "${bam}" |\
bcftools call --gvcf 5,10,20,30 --multiallelic-caller --output-type b -o "${sample}.${contig}.bcf"
bcftools index "${sample}.${contig}.bcf"
echo "${sample},\${PWD}/${sample}.${contig}.bcf" > "${sample}.csv"
"""
}
process combine_contig {
tag "combine ${contig} N=${vcfs.size()}"
input:
file ref from reference
set contig,vcfs from sample_contig_bcf.groupTuple()
output:
file("${contig}.bcf") into contig_merged
file("${contig}.bcf.csi")
script:
"""
# make sure we always use the samples in the same order
cat ${vcfs.join(" ")} | sort -T . -t, -k1,1 | cut -d, -f 2 > vcf.list
bcftools merge --merge all --gvcf "${ref.toRealPath()}" --file-list "vcf.list" --output-type 'u' |\
bcftools convert --gvcf2vcf --fasta-ref "${ref.toRealPath()}" --output-type 'u' |\
bcftools view -m 2 --output-type 'u' |\
bcftools norm --fasta-ref "${ref.toRealPath()}" --output-type 'b' -o "${contig}.bcf"
bcftools index "${contig}.bcf"
rm vcf.list
"""
}
process combine_all{
tag "combine N=${vcfs.size()}"
input:
file ref from reference
val vcfs from contig_merged.collect()
output:
file("${params.prefix}vcf.gz")
file("${params.prefix}vcf.gz.tbi")
script:
"""
cat << __EOF__ | sort > vcf.list
${vcfs.join("\n")}
__EOF__
bcftools concat --file-list "vcf.list" --output-type 'z' -o "${params.prefix}vcf.gz"
bcftools index --tbi "${params.prefix}vcf.gz"
"""
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment