Skip to content

Instantly share code, notes, and snippets.

@max-mapper
Last active May 4, 2018 23:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save max-mapper/7ad5c0e81ee003fde843f6a133d94b86 to your computer and use it in GitHub Desktop.
Save max-mapper/7ad5c0e81ee003fde843f6a133d94b86 to your computer and use it in GitHub Desktop.
CALeDNA container (build with npm i mkcontainer -g)
#! /bin/bash
#$ -S /bin/bash
#$ -q std.q
#$ -cwd
#$ -N anacapa-co1-mock-blca
#$ -j y
#$ -o anacapa-co1-mock-blca-out.qlog
#$ -e anacapa-co1-mock-blca-err.qlog
#$ -l mem_free=48G
#$ -pe smp 1
#$ -V
/opt/singularity-2.4.2/bin/singularity exec -B /act /home/mogden2/anacapa-1.0.0.img /bin/bash -c "time /home/mogden2/Anacapa-git/Anacapa_db/anacapa_bowtie2_blca.sh -o /home/mogden2/results-mock -d /home/mogden2/Anacapa-git/Anacapa_db -k /home/mogden2/Anacapa-git/Anacapa_db/scripts/merced-hpc-headers.sh -u mogden2"
#! /bin/bash
#$ -S /bin/bash
#$ -q std.q
#$ -cwd
#$ -N anacapa-co1-mock
#$ -j y
#$ -o anacapa-co1-mock-out.qlog
#$ -e anacapa-co1-mock-err.qlog
#$ -l mem_free=48G
#$ -pe smp 1
#$ -V
time /opt/singularity-2.4.2/bin/singularity exec -B /act /home/mogden2/anacapa-1.1.0.img /bin/bash -c "time /home/mogden2/Anacapa-git/Anacapa_db/anacapa_QC_dada2.sh -i /home/mogden2/Mock_CO1_Leray_Knowlton/CO1_Leray_and_Knowlton_fastq-2x300 -o /home/mogden2/results-mock -d /home/mogden2/Anacapa-git/Anacapa_db -k /home/mogden2/Anacapa-git/Anacapa_db/scripts/merced-hpc-headers.sh -f /home/mogden2/Mock_CO1_Leray_Knowlton/forward_CO1_p.txt -r /home/mogden2/Mock_CO1_Leray_Knowlton/reverse_CO1_p.txt -a nextera -t MiSeq"
CUTADAPT="/usr/local/anacapa/anaconda/bin/cutadapt"
MUSCLE="/usr/local/bin/muscle"
MODULE_SOURCE=""
FASTX_TOOLKIT=""
ANACONDA_PYTHON=""
BOWTIE2=""
ATS=""
R=""
PYTHONWNUMPY=""
GCC=""
LOCALMODE=TRUE
#!/usr/bin/bash
mkdir mnt cache
set -e
mkcontainer-generate
make CACHE=$PWD/cache
sudo losetup -Pf container.img --show
sudo mount /dev/loop0p1 mnt # todo detect correct one from --show above
sudo singularity build --writable singularity.img mnt
sudo umount mnt
sudo losetup -D # todo figure out how to detach only ours from above, not all
ENV NSPAWN_BOOTSTRAP_IMAGE_SIZE=10GB
FROM ubuntu:xenial
# set unlimited bash history
# nspawn needs resolv.conf to be set up for internet to work
# password gets changed so we can login later
RUN mkdir /usr/local/anacapa && \
cd /usr/local/anacapa && \
echo "export HISTFILESIZE=" >> .bashrc && \
echo "export HISTSIZE=" >> .bashrc && \
rm -f /etc/resolv.conf && echo '8.8.8.8' > /etc/resolv.conf && \
echo "root:root" | chpasswd && \
# install apt + npm dependencies
RUN apt-get install software-properties-common apt-transport-https curl wget git libssl-dev libcurl4-openssl-dev libxml2-dev -y && \
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
add-apt-repository 'deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu xenial/' && \
add-apt-repository universe && \
apt-get update && \
apt-get install r-base -y && \
wget -P /tmp/ "http://repo.continuum.io/archive/Anaconda2-5.0.1-Linux-x86_64.sh" && \
bash "/tmp/Anaconda2-5.0.1-Linux-x86_64.sh" -b -p /usr/local/anacapa/anaconda && \
echo "export PATH=/usr/local/anacapa/anaconda/bin:\$PATH" >> /usr/local/anacapa/.bashrc && \
curl -sL https://deb.nodesource.com/setup_6.x | sudo -E bash - && \
apt-get install -y nodejs && \
npm i dat -g
# download scripts from this gist and install R modules
RUN cd /usr/local/anacapa && \
. /usr/local/anacapa/.bashrc && \
git clone https://gist.github.com/maxogden/7ad5c0e81ee003fde843f6a133d94b86 gist && \
mv gist/run.sh run.sh && \
chmod +x run.sh && \
Rscript --vanilla gist/install-deps.R && \
chmod o+w /usr/local/lib/R/site-library
# install python modules
RUN cd /usr/local/anacapa & \
. /usr/local/anacapa/.bashrc && \
pip install biopython cutadapt && \
conda config --add channels r && \
conda config --add channels defaults && \
conda config --add channels conda-forge && \
conda config --add channels bioconda && \
conda install -yqc bioconda phyloseq ecopcr obitools blast bowtie2
# install hoffman software
RUN cd /usr/local/anacapa && \
dat clone $HOFFMANDEPS hoffman-deps && \
tar xzvf hoffman-deps/fastx_toolkit.tar.gz && \
mkdir -p /u/local && \
ln -s /usr/local/anacapa/apps /u/local/apps && \
echo "export PATH=/usr/local/anacapa/apps/fastx_toolkit/0.0.13.2/gcc-4.4.6/bin/:\$PATH" >> .bashrc && \
tar xzvf hoffman-deps/libgtextutils.tar.gz && \
echo "/usr/local/anacapa/apps/libgtextutils/0.6.1/gcc-4.4.6/lib/" > /etc/ld.so.conf.d/libgtextutils.conf && \
ldconfig && \
tar xzvf hoffman-deps/bowtie2-2.2.9.tar.gz && \
echo "export PATH=/usr/local/anacapa/apps/bowtie2/2.2.9:\$PATH" >> .bashrc && \
cp hoffman-deps/muscle3.8.31_i86linux64 /usr/local/bin/muscle && \
chmod +x /usr/local/bin/muscle
# Manage packages -----
#1. Download packages from CRAN
.cran_packages <- c("ggplot2", "plyr", "dplyr","seqRFLP", "reshape2", "tibble", "devtools", "Matrix", "mgcv", "readr", "stringr")
.inst <- .cran_packages %in% installed.packages()
if (any(!.inst)) {
install.packages(.cran_packages[!.inst], repos = "http://cran.rstudio.com/")
}
# 2. Download packages from biocLite
.bioc_packages <- c("phyloseq", "genefilter", "impute", "Biostrings")
.inst <- .bioc_packages %in% installed.packages()
if (any(!.inst)) {
source("http://bioconductor.org/biocLite.R")
biocLite(.bioc_packages[!.inst])
}
.dada_version = "1.6.0"
.dada_version_gh = "v1.6"
if("dada2" %in% installed.packages()){
if(packageVersion("dada2") == .dada_version) {
cat("congrats, right version of dada2")
} else {
devtools::install_github("benjjneb/dada2", ref=.dada_version_gh)
}
}
if(!("dada2" %in% installed.packages())){
# if the user doesn't have dada2 installed, install version 1.6 from github
devtools::install_github("benjjneb/dada2", ref=.dada_version_gh)
}
library("dada2")
cat(paste("dada2 package version:", packageVersion("dada2")))
if(packageVersion("dada2") != '1.6.0') {
stop("Please make sure you have dada version ", .dada_version, " installed")
}
# works on ucmerced
DADA2_PAIRED_HEADER="#!/bin/bash\n#$ -q std.q\n#$ -cwd\n#$ -l mem_free=48G\n#$ -pe smp 1\n#$ -N paired_dada2\n#$ -M ${UN}\n#$ -o ${OUT}/Run_info/run_logs/paired.out\n#$ -e ${OUT}/Run_info/run_logs/paired.err"
DADA2_UNPAIRED_F_HEADER="#!/bin/bash\n#$ -q std.q\n#$ -cwd\n#$ -l mem_free=48G\n#$ -pe smp 1\n#$ -N unpaired_F_dada2\n#$ -M ${UN}\n#$ -o ${OUT}/Run_info/run_logs/unpaired_F.out\n#$ -e ${OUT}/Run_info/run_logs/unpaired_F.err"
DADA2_UNPAIRED_R_HEADER="#!/bin/bash\n#$ -q std.q\n#$ -cwd\n#$ -l mem_free=48G\n#$ -pe smp 1\n#$ -N unpaired_R_dada2\n#$ -M ${UN}\n#$ -o ${OUT}/Run_info/run_logs/unpaired_R.out\n#$ -e ${OUT}/Run_info/run_logs/unpaired_R.err"
B2_HEADER="#!/bin/bash\n#$ -q std.q\n#$ -cwd\n#$ -l mem_free=48G\n#$ -pe smp 1\n#$ -N bowtie2_blca \n#$ -M ${UN}\n#$ -o ${OUT}/Run_info/run_logs/bowtie2_blca${MB}.out \n#$ -e ${OUT}/Run_info/run_logs/bowtie2_blca${MB}.alignment_stats"
BLCA_HEADER="#!/bin/bash\n#$ -q std.q\n#$ -cwd\n#$ -l mem_free=48G\n#$ -pe smp 1\n#$ -N BLCA_${MB} \n#$ -M ${UN}\n#$ -o ${OUT}/Run_info/run_logs/blca${MB}.out\n#$ -e ${OUT}/Run_info/run_logs/blca${MB}.err"
#!/bin/bash
dat clone $TAXON taxonref20171118
dat clone $DATA CO1_mock_data
find anacapa-git -name "*.sh" | xargs chmod +x
cd anacapa-git/Anacapa_db
ls ../taxonref/*_filtered* | xargs -L 1 tar xzvf
mv 12S_db_filtered_to_remove_ambigous_taxonomy 12S
mv 16S_db_filtered_to_remove_ambigous_taxonomy 16S
mv 18S_db_filtered_to_remove_ambigous_taxonomy 18S
mv FITS_db_filtered_to_remove_ambigous_taxonomy FITS
mv PITS_db_filtered_to_remove_ambigous_taxonomy PITS
mv CO1_db_filtered_to_remove_ambigous_taxonomy CO1
cp ../../gist/anacapa_config.sh scripts/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment