Pipeline d'analyse de données NGS Sars-CoV-2
from glob import glob | |
# rule root: | |
# input: | |
# [i.replace("PRJNA673096/","") for i in glob("PRJNA673096/*")] | |
SAMPLES = [i.replace("PRJNA673096/","") for i in glob("PRJNA673096/*")] | |
GENOM="genome/whuan.fasta" | |
rule align: | |
input: | |
R1="PRJNA673096/{sample}/{sample}_1.fastq.gz", | |
R2="PRJNA673096/{sample}/{sample}_1.fastq.gz" | |
output: | |
temporary("{sample}.sam") | |
log: | |
"{sample}.align.log" | |
shell: | |
"bwa mem {GENOM} {input.R1} {input.R2} > {output} 2> {log}" | |
rule sam2bam: | |
input: | |
"{sample}.sam" | |
output: | |
"{sample}.bam" | |
shell: | |
"samtools sort -O BAM {input} > {output}" | |
rule samIndex: | |
input: | |
"{sample}.bam" | |
output: | |
"{sample}.bam.bai" | |
shell: | |
"samtools index {input}" | |
rule vcf: | |
input: | |
"{sample}.bam", "{sample}.bam.bai" | |
output: | |
"{sample}.vcf" | |
shell: | |
"freebayes -f {GENOM} -p 1 -C10 {input[0]} > {output} " | |
rule vcf_rename_sample: | |
""" All sample is called unknown """ | |
input: | |
"{sample}.vcf" | |
output: | |
"{sample}.name.vcf" | |
shell: | |
"cat {input}|sed 's/unknown/{wildcards.sample}/g' > {output}" | |
rule bgzip: | |
input: | |
"{sample}.vcf" | |
output: | |
"{sample}.vcf.gz" | |
shell: | |
"bgzip {input} ; tabix -p vcf {output}" | |
rule merge_vcf: | |
input: | |
[f'{sample}.name.vcf.gz' for sample in SAMPLES] | |
output: | |
"final.vcf.gz" | |
shell: | |
"bcftools merge {input} -Oz -o {output}" | |
rule annotation: | |
input: | |
"final.vcf.gz" | |
output: | |
"final.ann.vcf" | |
shell: | |
"snpEff -Xmx10G -v NC_045512.2 {input}> {output}" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment