Created
June 22, 2023 14:10
-
-
Save PoisonAlien/082b2a1b123423ae6942744e2a7bdc13 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env nextflow | |
VERSION="2.0.0" | |
System.err.println "------------------------------------" | |
System.err.println " Annotate VCF files" | |
System.err.println " ${VERSION}" | |
System.err.println "------------------------------------" | |
params.vcf = "my.vcf.gz" | |
//wget -O ${pipeline_dir}/resources/Homo_sapiens.GRCh38.107.chr.gff3.gz https://ftp.ensembl.org/pub/release-107/gff3/homo_sapiens/Homo_sapiens.GRCh38.107.chr.gff3.gz | |
params.gffhg38 = "Homo_sapiens.GRCh38.107.chr.gff3.gz" | |
//wget -O ${pipeline_dir}/resources/ref_fa/hg38/hg38.fa.gz https://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/latest/hg38.fa.gz | |
//gunzip ${pipeline_dir}/resources/ref_fa/hg38/hg38.fa.gz | |
//samtools faidx ${pipeline_dir}/resources/ref_fa/hg38/hg38.fa | |
params.refhg38 = "hg38.fa" | |
params.anno = "bcsq" | |
params.outdir = "./" | |
process normalizeVCF{ | |
//conda '${params.condapath}' | |
input: | |
path vcf from vcf_files | |
output: | |
file "${vcf.getSimpleName()}.norm.vcf.gz" into normVCF | |
file "${vcf.getSimpleName()}.norm.vcf.gz.csi" into normVcfidx | |
script: | |
""" | |
bcftools norm -f ${params.refhg38} -m -both -O z -o ${vcf.getSimpleName()}.norm.vcf.gz \ | |
-r chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY,chrM ${vcf.toRealPath()} | |
bcftools index ${vcf.getSimpleName()}.norm.vcf.gz | |
""" | |
} | |
process annotate { | |
publishDir "$params.outdir", mode: 'copy' | |
input: | |
path vcf from normVCF | |
path vcfIdx from normVcfidx | |
output: | |
path "${vcf.getSimpleName()}.anno.vcf.gz" into maVcf | |
path "${vcf.getSimpleName()}.anno.vcf.gz.csi" into maVcfIdx | |
script: | |
if (params.anno == 'bcsq') | |
""" | |
bcftools csq -O z -l -p a -s - --threads 2 -f ${params.refhg38} -g ${params.gffhg38} -c CSQ ${vcf.toRealPath()} | \ | |
bcftools +split-vep /dev/stdin -O z -o ${vcf.getSimpleName()}.anno.vcf.gz -c - -s worst | |
bcftools index ${vcf.getSimpleName()}.anno.vcf.gz | |
""" | |
else if(params.anno == 'vep') | |
""" | |
vep --format vcf --vcf --input_file ${vcf.toRealPath()} --fasta ${params.vepRef} \ | |
--offline --cache --dir ${params.vepCacheDir} --buffer_size 5000 \ | |
--pick --sift b --polyphen b --ccds --hgvs --symbol --canonical --mane --variant_class \ | |
--vcf --compress_output bgzip --output_file ${vcf.getSimpleName()}.anno.vcf.gz --stats_file ${vcf.getSimpleName()}.html | |
bcftools index ${vcf.getSimpleName()}.anno.vcf.gz | |
""" | |
else | |
error "Invalid annotation mode: ${params.anno}" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment