Skip to content

Instantly share code, notes, and snippets.

@nilesh-tawari
Last active December 5, 2021 17:56
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nilesh-tawari/8e14d9a0093d75139c6c62baebcf69c8 to your computer and use it in GitHub Desktop.
Save nilesh-tawari/8e14d9a0093d75139c6c62baebcf69c8 to your computer and use it in GitHub Desktop.
VEP installation
https://asia.ensembl.org/info/docs/tools/vep/script/vep_options.html
# Instructions for installing and running new VEP
## 1. Install new version of VEP:
git clone https://github.com/Ensembl/ensembl-vep.git
cd ensembl-vep
perl INSTALL.pl
## 2. Install merged cache
wget http://ftp.ensembl.org/pub/grch37/release-87/VEP/homo_sapiens_merged_vep_87_GRCh37.tar.gz
tar -zxvf homo_sapiens_merged_vep_87_GRCh37.tar.gz
or
wget ftp://ftp.ensembl.org/pub/release-91/variation/VEP/homo_sapiens_merged_vep_91_GRCh37.tar.gz
Refseq
wget http://ftp.ensembl.org/pub/grch37/release-91/variation/VEP/homo_sapiens_refseq_vep_91_GRCh37.tar.gz
## 3. Install plugins
git clone https://github.com/Ensembl/VEP_plugins.git
## 4. Export path of installation, cache and plugin directory:
vep=/mnt/projects/polarisbioit/RESEARCH/nilesh/rare_diseases/new_vep/ensembl-vep
export PERL5LIB=${PERL5LIB}:/mnt/projects/polarisbioit/RESEARCH/nilesh/rare_diseases/new_vep/cache
export PERL5LIB=${PERL5LIB}:/mnt/projects/polarisbioit/RESEARCH/nilesh/rare_diseases/new_vep/VEP_plugins
## 5. Script to run VEP: $1 in input VCF and $2 is output VCF
export PERL5LIB=${PERL5LIB}:/mnt/projects/polarisbioit/RESEARCH/nilesh/rare_diseases/new_vep/cache
export PERL5LIB=${PERL5LIB}:/mnt/projects/polarisbioit/RESEARCH/nilesh/rare_diseases/new_vep/VEP_plugins
dbSNFP_file=/mnt/projects/polarisbioit/RESEARCH/nilesh/SOFTWARE/dbNSFP/dbNSFP3.3a.gz
LCR_file=/mnt/projects/polarisbioit/RESEARCH/nilesh/clinical_framework/LCR-hs37d5.bed.gz
dbscSNV_file=/mnt/projects/polarisbioit/RESEARCH/nilesh/SOFTWARE/dbscSNV/dbscSNV1.1.txt.gz
vt=/mnt/projects/polarisbioit/RESEARCH/nilesh/SOFTWARE/vt
$vt/vt decompose $1 -o $1.decomposed.vcf
$vt/vt normalize $1.decomposed.vcf -r $ref_fasta -o $1.decomposed.normalized.vcf
perl $vep/vep.pl --fork 8 -i $1.decomposed.normalized.vcf --vcf -o $2.vcf --force_overwrite --cache --merged --offline --dir $cache --species homo_sapiens --fasta $ref_fasta --everything --check_existing --plugin Phenotypes,file=$Phenotype_file,include_sources=MIM_disease'&'OMIM'&'ClinVar'&'dbGaP'&'DDG2P'&'HGMD-PUBLIC'&'NHGRI-EBI_GWAS_catalog'&'Orphanet'&'Uniprot'&'AMDGC'&'GEFOS'&'GIANT'&'GOA'&'IIBDGC'&'MAGIC'&'COSMIC --plugin dbNSFP,$dbSNFP_file,LRT_score,LRT_pred,MutationTaster_score,MutationTaster_pred,MutationAssessor_score,MutationAssessor_pred,FATHMM_score,FATHMM_pred,PROVEAN_score,PROVEAN_pred,VEST3_score,MetaSVM_score,MetaSVM_pred,MetaLR_score,MetaLR_pred,Reliability_index,M-CAP_score,M-CAP_pred,fathmm-MKL_coding_score,fathmm-MKL_coding_pred,fathmm-MKL_coding_group,Eigen_coding_or_noncoding,Eigen-raw,Eigen-phred,Eigen-PC-raw,Eigen-PC-phred,GenoCanyon_score,integrated_fitCons_score,integrated_confidence_value,GM12878_fitCons_score,GM12878_confidence_value,H1-hESC_fitCons_score,H1-hESC_confidence_value,HUVEC_fitCons_score,HUVEC_confidence_value,GERP++_NR,GERP++_RS,phyloP100way_vertebrate,phyloP20way_mammalian,phastCons100way_vertebrate,phastCons20way_mammalian,SiPhy_29way_pi,SiPhy_29way_logOdds --plugin LoFtool --plugin CSN --plugin SameCodon -custom $LCR_file,LCR_region,bed,overlap,1 --plugin dbscSNV,$dbscSNV_file
FOR RUNNING NEW VEP (91):
vep=/mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/ensembl-vep
bin=/mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin
export PERL5LIB=${PERL5LIB}:/mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/vep_cache
export PERL5LIB=${PERL5LIB}:/mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/VEP_plugins
export PERL5LIB=${PERL5LIB}:/mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/loftee
source activate snakemake-tutorial
zless wCDS020.vcf.gz | sed 's/ID=AD,Number=./ID=AD,Number=R/' | vt decompose -s - -o wCDS020.dec.vcf.gz
vt normalize wCDS020.dec.vcf.gz -r /mnt/projects/polarisbioit/RESEARCH/perumal-testing/Tools/VEP88/.vep/homo_sapiens/88_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa -o wCDS020.dec.nor.vcf.gz
source deactivate
FOR GRCh37:
dbscSNV_file=/mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/dbscSNV1.1/dbscSNV1.1_GRCh37.txt.gz
dbSNFP_file=/mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/dbNSFPv2.9.3/dbNSFP2.9.3.gz
$vep/vep --fork 16 --vcf -i INPUT.dec.nor.vcf.gz -o OUTPUT.dec.nor.vep91.vcf --force_overwrite --cache --refseq --flag_pick --offline --dir $bin/vep_cache/ --species homo_sapiens --fasta /mnt/projects/polarisbioit/RESEARCH/george-testing/SOMATIC_TARGET_PANEL.dev/00.resources/hs37d5.fa --everything --check_existing --plugin LoF,human_ancestor_fa:$bin/LoF_files/human_ancestor.fa.rz,loftee_path:$bin/loftee/ --plugin LoFtool --plugin CSN -custom /mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/rmsk_hg19_sort.bed.gz,rmsk_region,bed,overlap,1 --plugin dbNSFP,$dbSNFP_file,LRT_score,LRT_pred,MutationTaster_score,MutationTaster_pred,MutationAssessor_score,MutationAssessor_pred,FATHMM_score,FATHMM_pred,PROVEAN_score,PROVEAN_pred,MetaSVM_score,MetaSVM_pred,MetaLR_score,MetaLR_pred,Reliability_index,M-CAP_score,M-CAP_pred,Eigen_coding_or_noncoding,Eigen-raw,Eigen-phred,Eigen-PC-raw,Eigen-PC-phred,GERP++_NR,GERP++_RS,phyloP100way_vertebrate,phastCons100way_vertebrate,SiPhy_29way_pi,SiPhy_29way_logOdds --dir_plugins /mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/VEP_plugins --plugin dbscSNV,$dbscSNV_file --assembly GRCh37 -custom /mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/gnomad.genomes.r2.0.1.sites.noVEP.vcf.gz,gnomADg,vcf,exact,0,AF_AFR,AF_AMR,AF_ASJ,AF_EAS,AF_FIN,AF_NFE,AF_OTH
# USE VEP filter
/mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/ensembl-vep/filter_vep -i wCDS020.dec.nor.vep91_sift.vcf -o wCDS020.dec.nor.vep91_sift_filter.vcf --filter "IMPACT is not MODIFIER"
=======================================================================================================================================
TO DO:
--plugin Phenotypes,file=$Phenotype_file,include_sources=MIM_disease'&'OMIM'&'ClinVar'&'dbGaP'&'DDG2P'&'HGMD-PUBLIC'&'NHGRI-EBI_GWAS_catalog'&'Orphanet'&'Uniprot'&'AMDGC'&'GEFOS'&'GIANT'&'GOA'&'IIBDGC'&'MAGIC'&'COSMIC
FOR GRCh38:
dbSNFP_file=/mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/dbNSFP.gz
dbscSNV_file=/mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/dbscSNV1.1.txt.gz
$vep/vep --fork 16 --vcf -i wCDS020.dec.nor.vcf.gz -o wCDS020.dec.nor.vep91.vcf --force_overwrite --cache --merged --offline --dir ../bin/vep_cache/ --species homo_sapiens --fasta /mnt/projects/polarisbioit/RESEARCH/perumal-testing/Tools/VEP88/.vep/homo_sapiens/88_GRCh37/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa --everything --check_existing --plugin LoF,human_ancestor_fa:../bin/LoF_files/human_ancestor.fa.rz,loftee_path:../bin/loftee/ --plugin LoFtool --plugin CSN --plugin SameCodon -custom /mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/rmsk_hg19_sort.bed.gz,rmsk_region,bed,overlap,1 --plugin dbNSFP,$dbSNFP_file,LRT_score,LRT_pred,MutationTaster_score,MutationTaster_pred,MutationAssessor_score,MutationAssessor_pred,FATHMM_score,FATHMM_pred,PROVEAN_score,PROVEAN_pred,MetaSVM_score,MetaSVM_pred,MetaLR_score,MetaLR_pred,Reliability_index,M-CAP_score,M-CAP_pred,fathmm-MKL_coding_score,fathmm-MKL_coding_pred,fathmm-MKL_coding_group,Eigen_coding_or_noncoding,Eigen-raw,Eigen-phred,Eigen-PC-raw,Eigen-PC-phred,GenoCanyon_score,integrated_fitCons_score,integrated_confidence_value,GM12878_fitCons_score,GM12878_confidence_value,H1-hESC_fitCons_score,H1-hESC_confidence_value,HUVEC_fitCons_score,HUVEC_confidence_value,GERP++_NR,GERP++_RS,phyloP100way_vertebrate,phyloP20way_mammalian,phastCons100way_vertebrate,phastCons20way_mammalian,SiPhy_29way_pi,SiPhy_29way_logOdds --dir_plugins /mnt/projects/polarisbioit/RESEARCH/nilesh/20_ACMG_implementation/CalcPath/bin/VEP_plugins --plugin dbscSNV,$dbscSNV_file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment