Skip to content

Instantly share code, notes, and snippets.

@BrunoGrandePhD
Created January 12, 2022 21:06
Show Gist options
  • Save BrunoGrandePhD/690604da0428f0ee5832ec4a1857a5c4 to your computer and use it in GitHub Desktop.
Save BrunoGrandePhD/690604da0428f0ee5832ec4a1857a5c4 to your computer and use it in GitHub Desktop.
Installing and running Ensembl VEP via vcf2maf

Instructions

These instructions are intended for Linux.

  1. Install Miniconda 3.
  2. Create conda environment using the attached environment.yml file:
    conda env create -n vep -f environment.yml
  3. Download VEP cache and decompress the TAR archive:
    mkdir ~/vep_cache
    rsync -avh --progress rsync://ftp.ensembl.org/ensembl/pub/release-103/variation/indexed_vep_cache/homo_sapiens_vep_103_GRCh38.tar.gz ~/vep_cache/homo_sapiens_vep_103_GRCh38.tar.gz
    tar vxzf ~/vep_cache/homo_sapiens_vep_103_GRCh38.tar.gz
  4. Pre-load VEP cache into memory:
    rsync -avhP ~/vep_cache/ /dev/shm/vep_cache/
  5. Activate VEP conda environment and run attached Bash script:
    conda activate vep
    bash ./run-vcf2maf.sh /path/to/sample.vcf.gz
name: vep
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- _libgcc_mutex=0.1
- _openmp_mutex=4.5
- atk-1.0=2.36.0
- bzip2=1.0.8
- c-ares=1.17.1
- ca-certificates=2020.12.5
- cairo=1.16.0
- certifi=2020.12.5
- clustalw=2.1
- curl=7.76.1
- ensembl-vep=103.1
- expat=2.3.0
- font-ttf-dejavu-sans-mono=2.37
- font-ttf-inconsolata=3.000
- font-ttf-source-code-pro=2.038
- font-ttf-ubuntu=0.83
- fontconfig=2.13.1
- fonts-conda-ecosystem=1
- fonts-conda-forge=1
- freetype=2.10.4
- fribidi=1.0.10
- gdk-pixbuf=2.42.6
- gettext=0.19.8.1
- giflib=5.2.1
- graphite2=1.3.13
- graphviz=2.47.1
- gtk2=2.24.33
- gts=0.7.6
- harfbuzz=2.8.1
- htslib=1.10.2
- icu=68.1
- jpeg=9d
- krb5=1.17.2
- ld_impl_linux-64=2.35.1
- libcurl=7.76.1
- libdb=6.2.32
- libdeflate=1.6
- libedit=3.1.20191231
- libev=4.33
- libffi=3.3
- libgcc-ng=9.3.0
- libgd=2.3.2
- libglib=2.68.1
- libgomp=9.3.0
- libiconv=1.16
- libnghttp2=1.43.0
- libpng=1.6.37
- librsvg=2.50.3
- libssh2=1.9.0
- libstdcxx-ng=9.3.0
- libtiff=4.2.0
- libtool=2.4.6
- libuuid=2.32.1
- libwebp=1.2.0
- libwebp-base=1.2.0
- libxcb=1.13
- libxml2=2.9.10
- libxslt=1.1.33
- lz4-c=1.9.3
- mysql-connector-c=6.1.11
- ncurses=6.2
- openssl=1.1.1k
- paml=4.9
- pango=1.42.4
- pcre=8.44
- perl=5.26.2
- perl-aceperl=1.92
- perl-algorithm-diff=1.1903
- perl-algorithm-munkres=0.08
- perl-apache-test=1.40
- perl-app-cpanminus=1.7044
- perl-appconfig=1.71
- perl-array-compare=3.0.1
- perl-autoloader=5.74
- perl-base=2.23
- perl-bio-asn1-entrezgene=1.73
- perl-bio-coordinate=1.007001
- perl-bio-db-hts=3.01
- perl-bio-featureio=1.6.905
- perl-bio-phylo=0.58
- perl-bio-samtools=1.43
- perl-bio-tools-phylo-paml=1.7.3
- perl-bio-tools-run-alignment-clustalw=1.7.4
- perl-bio-tools-run-alignment-tcoffee=1.7.4
- perl-bioperl=1.7.2
- perl-bioperl-core=1.007002
- perl-bioperl-run=1.007002
- perl-business-isbn=3.004
- perl-business-isbn-data=20140910.003
- perl-cache-cache=1.08
- perl-capture-tiny=0.48
- perl-carp=1.38
- perl-cgi=4.44
- perl-class-data-inheritable=0.08
- perl-class-inspector=1.34
- perl-class-load=0.25
- perl-class-load-xs=0.10
- perl-class-method-modifiers=2.12
- perl-clone=0.42
- perl-common-sense=3.74
- perl-compress-raw-bzip2=2.087
- perl-compress-raw-zlib=2.087
- perl-constant=1.33
- perl-convert-binary-c=0.78
- perl-convert-binhex=1.125
- perl-crypt-rc4=2.02
- perl-data-dumper=2.173
- perl-data-optlist=0.110
- perl-data-stag=0.14
- perl-date-format=2.30
- perl-db-file=1.855
- perl-dbd-mysql=4.046
- perl-dbd-sqlite=1.64
- perl-dbi=1.642
- perl-devel-globaldestruction=0.14
- perl-devel-overloadinfo=0.005
- perl-devel-stacktrace=2.04
- perl-digest-hmac=1.03
- perl-digest-md5=2.55
- perl-digest-perl-md5=1.9
- perl-digest-sha1=2.13
- perl-dist-checkconflicts=0.11
- perl-dynaloader=1.25
- perl-email-date-format=1.005
- perl-encode=2.88
- perl-encode-locale=1.05
- perl-error=0.17027
- perl-eval-closure=0.14
- perl-exception-class=1.44
- perl-exporter=5.72
- perl-exporter-tiny=1.002001
- perl-extutils-makemaker=7.36
- perl-file-listing=6.04
- perl-file-path=2.16
- perl-file-slurp-tiny=0.004
- perl-file-sort=1.01
- perl-file-temp=0.2304
- perl-file-which=1.23
- perl-font-afm=1.20
- perl-font-ttf=1.06
- perl-gd=2.68
- perl-getopt-long=2.50
- perl-graph=0.9704
- perl-graphviz=2.24
- perl-html-element-extended=1.18
- perl-html-entities-numbered=0.04
- perl-html-formatter=2.16
- perl-html-parser=3.72
- perl-html-tableextract=2.13
- perl-html-tagset=3.20
- perl-html-tidy=1.60
- perl-html-tree=5.07
- perl-html-treebuilder-xpath=0.14
- perl-http-cookies=6.04
- perl-http-daemon=6.01
- perl-http-date=6.02
- perl-http-message=6.18
- perl-http-negotiate=6.01
- perl-image-info=1.38
- perl-image-size=3.300
- perl-io-compress=2.087
- perl-io-html=1.001
- perl-io-sessiondata=1.03
- perl-io-socket-ssl=2.066
- perl-io-string=1.08
- perl-io-stringy=2.111
- perl-io-tty=1.12
- perl-ipc-run=20180523.0
- perl-ipc-sharelite=0.17
- perl-jcode=2.07
- perl-json=4.02
- perl-json-xs=2.34
- perl-lib=0.63
- perl-libwww-perl=6.39
- perl-libxml-perl=0.08
- perl-list-moreutils=0.428
- perl-list-moreutils-xs=0.428
- perl-lwp-mediatypes=6.04
- perl-lwp-protocol-https=6.07
- perl-lwp-simple=6.15
- perl-mailtools=2.21
- perl-math-cdf=0.1
- perl-math-derivative=1.01
- perl-math-random=0.72
- perl-math-spline=0.02
- perl-mime-base64=3.15
- perl-mime-lite=3.030
- perl-mime-tools=5.508
- perl-mime-types=2.17
- perl-mldbm=2.05
- perl-module-implementation=0.09
- perl-module-runtime=0.016
- perl-module-runtime-conflicts=0.003
- perl-moo=2.003004
- perl-moose=2.2011
- perl-mozilla-ca=20180117
- perl-mro-compat=0.13
- perl-net-http=6.19
- perl-net-ssleay=1.88
- perl-ntlm=1.09
- perl-ole-storage_lite=0.19
- perl-package-deprecationmanager=0.17
- perl-package-stash=0.38
- perl-package-stash-xs=0.28
- perl-params-util=1.07
- perl-parent=0.236
- perl-parse-recdescent=1.967015
- perl-pathtools=3.75
- perl-pdf-api2=2.035
- perl-perlio-gzip=0.20
- perl-pod-escapes=1.07
- perl-pod-usage=1.69
- perl-postscript=0.06
- perl-role-tiny=2.000008
- perl-scalar-list-utils=1.52
- perl-sereal=4.007
- perl-sereal-decoder=4.007
- perl-sereal-encoder=4.007
- perl-set-intervaltree=0.12
- perl-set-scalar=1.29
- perl-soap-lite=1.19
- perl-socket=2.027
- perl-sort-naturally=1.03
- perl-spreadsheet-parseexcel=0.65
- perl-spreadsheet-writeexcel=2.40
- perl-statistics-descriptive=3.0702
- perl-storable=3.15
- perl-sub-exporter=0.987
- perl-sub-exporter-progressive=0.001013
- perl-sub-identify=0.14
- perl-sub-install=0.928
- perl-sub-name=0.21
- perl-sub-quote=2.006003
- perl-sub-uplevel=0.2800
- perl-svg=2.84
- perl-svg-graph=0.02
- perl-task-weaken=1.06
- perl-template-toolkit=2.26
- perl-test=1.26
- perl-test-deep=1.128
- perl-test-differences=0.67
- perl-test-exception=0.43
- perl-test-harness=3.42
- perl-test-leaktrace=0.16
- perl-test-most=0.35
- perl-test-requiresinternet=0.05
- perl-test-warn=0.36
- perl-text-csv=2.00
- perl-text-diff=1.45
- perl-tie-ixhash=1.23
- perl-time-hires=1.9760
- perl-time-local=1.28
- perl-timedate=2.30
- perl-tree-dag_node=1.31
- perl-try-tiny=0.30
- perl-type-tiny=1.004004
- perl-types-serialiser=1.0
- perl-unicode-map=0.112
- perl-uri=1.76
- perl-www-robotrules=6.02
- perl-xml-dom=1.46
- perl-xml-dom-xpath=0.14
- perl-xml-filter-buffertext=1.01
- perl-xml-libxml=2.0132
- perl-xml-libxslt=1.94
- perl-xml-namespacesupport=1.12
- perl-xml-parser=2.44_01
- perl-xml-regexp=0.04
- perl-xml-sax=1.02
- perl-xml-sax-base=1.09
- perl-xml-sax-expat=0.51
- perl-xml-sax-writer=0.57
- perl-xml-simple=2.25
- perl-xml-twig=3.52
- perl-xml-writer=0.625
- perl-xml-xpath=1.44
- perl-xml-xpathengine=0.14
- perl-xsloader=0.24
- perl-yaml=1.29
- pip=21.1.1
- pixman=0.40.0
- pthread-stubs=0.4
- python=3.7.10
- python_abi=3.7
- readline=8.1
- samtools=1.10
- setuptools=49.6.0
- sqlite=3.35.5
- t_coffee=11.0.8
- tidyp=1.04
- tk=8.6.10
- unzip=6.0
- vcf2maf=1.6.20
- wheel=0.36.2
- xorg-kbproto=1.0.7
- xorg-libice=1.0.10
- xorg-libsm=1.2.3
- xorg-libx11=1.7.0
- xorg-libxau=1.0.9
- xorg-libxdmcp=1.1.3
- xorg-libxext=1.3.4
- xorg-libxrender=0.9.10
- xorg-renderproto=0.11.1
- xorg-xextproto=7.3.0
- xorg-xproto=7.0.31
- xz=5.2.5
- zlib=1.2.11
- zstd=1.4.9
#!/bin/bash
set -euf -o pipefail
INPUT_VCF="${1}"
# Decompress VCF file, if applicable
if [[ $INPUT_VCF == *.gz ]]; then
INPUT_VCF_DECOMP=${INPUT_VCF/%.vcf.gz/.vcf}
>&2 echo "Decompressing VCF file: $INPUT_VCF > $INPUT_VCF_DECOMP"
gzip -dc $INPUT_VCF > $INPUT_VCF_DECOMP
INPUT_VCF=$INPUT_VCF_DECOMP
fi
INTER_VCF="${INPUT_VCF/%.vcf/.pass.vcf}"
INTER_VEP_VCF="${INTER_VCF/%.vcf/.vep.vcf}"
OUTPUT_MAF="${INPUT_VCF/%.vcf/.pass.maf}"
LOGS_DIR="$(dirname ${INPUT_VCF})/logs"
LOGS_PREFIX="$LOGS_DIR/$(basename ${INPUT_VCF} .vcf)"
mkdir -p "$LOGS_DIR"
>&2 echo "Filtering for passed variants: $INPUT_VCF > $INTER_VCF"
awk 'BEGIN {FS=OFS="\t"} $0 ~ /^#/ || $7 == "PASS" || $7 == "."' "$INPUT_VCF" > "$INTER_VCF"
>&2 echo "Converting VCF to MAF: $INTER_VCF > $OUTPUT_MAF"
rm -f "$INTER_VEP_VCF" "$OUTPUT_MAF"
vcf2maf.pl \
--input-vcf "$INTER_VCF" \
--output-maf "$OUTPUT_MAF" \
--vep-path ~/miniconda3/envs/vep/share/ensembl-vep-103.1-2/ \
--vep-data /dev/shm/vep_cache/ \
--vep-forks 6 \
--species homo_sapiens \
--ncbi-build GRCh38 \
--cache-version 103 \
--maf-center "Sage Bionetworks" \
> "$LOGS_PREFIX.out" \
2> "$LOGS_PREFIX.err"
>&2 echo "Logs will be here:"
>&2 echo "- stdout: $LOGS_PREFIX.out"
>&2 echo "- stderr: $LOGS_PREFIX.err"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment