Last active
November 30, 2020 16:51
-
-
Save alanhoyle/58f69223150bf4fdfbf1ade797cc19f0 to your computer and use it in GitHub Desktop.
Build VEP cache for vcf2maf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# based on https://gist.github.com/ckandoth/5390e3ae4ecf182fa92f6318cfa9fa97 | |
VEP_VER=97 | |
VEP_CACHE=/opt/vep-cache | |
mkdir -p ${VEP_CACHE} | |
vep_install -a ap --NO_HTSLIB --NO_TEST --NO_UPDATE -s homo_sapiens -y GRCh38 -c ${VEP_CACHE} --convert --cache_version ${VEP_VER} --PLUGINS LoF | |
wget https://raw.githubusercontent.com/konradjk/loftee/v0.3-beta/splice_module.pl -O ${VEP_CACHE}/Plugins/splice_module.pl | |
cd ${VEP_CACHE} | |
wget ftp://ftp.broadinstitute.org:/pub/ExAC_release/release0.3.1/subsets/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz | |
echo "##FILTER=<ID=AC_Adj0_Filter,Description=\"Only low quality genotype calls containing alternate alleles are present\">" > header_line.tmp && \ | |
curl -LO https://raw.githubusercontent.com/mskcc/vcf2maf/v1.6.16/data/known_somatic_sites.bed && \ | |
echo "filtering and annotating the ExAC VCF" && \ | |
bcftools annotate --header-lines header_line.tmp --remove FMT,^INF/AF,INF/AC,INF/AN,INF/AC_Adj,INF/AN_Adj,INF/AC_AFR,INF/AC_AMR,INF/AC_EAS,INF/AC_FIN,INF/AC_NFE,INF/AC_OTH,INF/AC_SAS,INF/AN_AFR,INF/AN_AMR,INF/AN_EAS,INF/AN_FIN,INF/AN_NFE,INF/AN_OTH,INF/AN_SAS ${VEP_CACHE}/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz | \ | |
pv -f -l | \ | |
bcftools filter --targets-file ^known_somatic_sites.bed --output-type z --output ${VEP_CACHE}/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz && \ | |
mv -f ${VEP_CACHE}/ExAC_nonTCGA.r0.3.1.sites.fixed.vcf.gz ${VEP_CACHE}/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz && \ | |
tabix -p vcf ${VEP_CACHE}/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz && \ | |
rm header_line.tmp | |
# rsync -vh rsync://ftp.ensembl.org/ensembl/pub/release-${VEP_VER}/variation/vep/homo_sapiens_vep_${VEP_VER}_GRCh37.tar.gz ${VEP_CACHE} && \ | |
# rsync -vh rsync://ftp.ensembl.org/ensembl/pub/release-${VEP_VER}/variation/vep/mus_musculus_vep_${VEP_VER}_GRCm38.tar.gz ${VEP_CACHE} && \ | |
# OR: | |
# wget ftp://ftp.ensembl.org/ensembl/pub/release-${VEP_VER}/variation/vep/homo_sapiens_vep_${VEP_VER}_GRCh37.tar.gz ${VEP_CACHE} && \ | |
# wget ftp://ftp.ensembl.org/ensembl/pub/release-${VEP_VER}/variation/vep/mus_musculus_vep_${VEP_VER}_GRCm38.tar.gz | |
rsync -vh --progress rsync://ftp.ensembl.org/ensembl/pub/release-${VEP_VER}/variation/vep/homo_sapiens_vep_${VEP_VER}_GRCh38.tar.gz ${VEP_CACHE} | |
# echo "expanding VEP cache" && \ | |
# bash -c "cat ${VEP_CACHE}/*_vep_${VEP_VER}_GRC*.tar.gz | pv -f | tar -izxf - -C ${VEP_CACHE} " && \ | |
# echo "... Removing source files ..." && \ | |
# rm -v ${VEP_CACHE}/*_vep_${VEP_VER}_GRC*.tar.gz && \ | |
echo "... converting VEP cache to compressed/index ..." && \ | |
vep_convert_cache --species all --version all --dir ${VEP_CACHE} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment