Created
September 10, 2019 15:00
-
-
Save DrYak/dfcdb0e15fb5c40c9608253aeb5dcb33 to your computer and use it in GitHub Desktop.
testing recent shorah directly from github
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mkdir -p ~/shorah-test | |
cd ~/shorah-test | |
# | |
# install bioconda | |
# | |
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh | |
# -b for batch (no question asked) | |
bash Miniconda3-latest-Linux-x86_64.sh -b -p ~/shorah-test/miniconda3 | |
. ~/shorah-test/miniconda3/bin/activate | |
conda config --add channels defaults | |
conda config --add channels bioconda | |
conda config --add channels conda-forge | |
conda update -n base conda | |
# | |
# create conda environment with all of shorah's dependencies | |
# | |
conda create -p ~/shorah-test/shorah_env python=3.7 htslib biopython numpy gsl zlib bzip2 boost readline perl automake=1.15.0 autoconf pkg-config | |
conda activate ~/shorah-test/shorah_env | |
# | |
# install shorah | |
# | |
# (allocaholic is the latest branch that will eventually made into version 2.0) | |
git clone https://github.com/cbg-ethz/shorah.git -b allocaholic | |
cd ~/shorah-test/shorah | |
aclocal -I m4 | |
autoconf | |
automake -a -c | |
mkdir -p build | |
cd build | |
../configure PKG_CONFIG_PATH=$HOME/shorah-test/shorah_env/lib/pkgconfig/ CFLAGS="-O3 -ffast-math -Wall -pedantic -g" CXXFLAGS="-O3 -ffast-math -Wall -pedantic -g -std=gnu++11" LDFLAGS="-Wl,-O1 -Wl,--as-needed -Wl,-rpath=$HOME/shorah-test/shorah_env/lib/" --with-boost=$HOME/shorah-test/shorah_env/ --prefix=$HOME/shorah-test/ | |
make clean | |
make all | |
make install | |
# | |
# use shorah | |
# | |
## input: cohort_consensus.fasta -- reference genome against which samples are aligned | |
## input: REF_aln.bam / .bam.bai -- aligned and indexed samples | |
# | |
## param: -x limit samples per windows (increase it if you use more coverage than 100'000) | |
## param: -S sigma value to use when calling SNVs | |
## param: -w size of window (needs to be smaller than samples !) | |
# | |
## other userful: | |
## -r chrm:start-stop, --region chrm:start-stop | |
## region in format 'chr:start-stop', e.g. | |
## 'chrm:1000-3000' | |
## -R {nnn} : fixed seed for reproducible runs. | |
## -c {nnn} : minimal coverage (ignore windows with less than nnn coverage) | |
## -s {nnn} : shift between windows | |
# | |
cd data | |
PATH=$HOME/shorah-test/bin:$PATH PYTHONPATH=$HOME/shorah-test/lib/python3.7/site-packages ~/shorah-test/bin/shorah shotgun --bam REF_aln.bam --fasta cohort_consensus.fasta -x 100000 -S 42 -w 102 | |
# | |
# for visualisation | |
# | |
# support/ | |
# | |
# contains, per window, the found haplotypes | |
# posterior: bayesian posterior porbably (i.e.: after the run) | |
# ave_read: number of reads that support bases of this haplotype in average (average per base, because some samples only fill the window partially, hence the non-int number) | |
# | |
# for down stream further analysis | |
# | |
# .cor.fas | |
# | |
# the alignement sample from the BAM file, but correct based on corrected output (correct/) from each overlapping window |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment