Skip to content

Instantly share code, notes, and snippets.

@dridk
Last active January 25, 2021 00:20
Show Gist options
  • Save dridk/c2a7c9c8a6232407bf0c45f4442e6fb6 to your computer and use it in GitHub Desktop.
Save dridk/c2a7c9c8a6232407bf0c45f4442e6fb6 to your computer and use it in GitHub Desktop.
Pipeline d'analyse de données NGS Sars-CoV-2
from glob import glob
# rule root:
# input:
# [i.replace("PRJNA673096/","") for i in glob("PRJNA673096/*")]
SAMPLES = [i.replace("PRJNA673096/","") for i in glob("PRJNA673096/*")]
GENOM="genome/whuan.fasta"
rule align:
input:
R1="PRJNA673096/{sample}/{sample}_1.fastq.gz",
R2="PRJNA673096/{sample}/{sample}_1.fastq.gz"
output:
temporary("{sample}.sam")
log:
"{sample}.align.log"
shell:
"bwa mem {GENOM} {input.R1} {input.R2} > {output} 2> {log}"
rule sam2bam:
input:
"{sample}.sam"
output:
"{sample}.bam"
shell:
"samtools sort -O BAM {input} > {output}"
rule samIndex:
input:
"{sample}.bam"
output:
"{sample}.bam.bai"
shell:
"samtools index {input}"
rule vcf:
input:
"{sample}.bam", "{sample}.bam.bai"
output:
"{sample}.vcf"
shell:
"freebayes -f {GENOM} -p 1 -C10 {input[0]} > {output} "
rule vcf_rename_sample:
""" All sample is called unknown """
input:
"{sample}.vcf"
output:
"{sample}.name.vcf"
shell:
"cat {input}|sed 's/unknown/{wildcards.sample}/g' > {output}"
rule bgzip:
input:
"{sample}.vcf"
output:
"{sample}.vcf.gz"
shell:
"bgzip {input} ; tabix -p vcf {output}"
rule merge_vcf:
input:
[f'{sample}.name.vcf.gz' for sample in SAMPLES]
output:
"final.vcf.gz"
shell:
"bcftools merge {input} -Oz -o {output}"
rule annotation:
input:
"final.vcf.gz"
output:
"final.ann.vcf"
shell:
"snpEff -Xmx10G -v NC_045512.2 {input}> {output}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment