Skip to content

Instantly share code, notes, and snippets.

@mschatz
mschatz / AnVILTerminal.sh
Last active May 1, 2021 04:56
AnVIL Notes
## Configure a new ANVIL instance with my favorite unix tools
##
## Note conda installation requires manual interaction, but is automated after that
#####################################################################################
## set up conda
cd ~
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
sh Miniconda3-latest-Linux-x86_64.sh
. .bashrc
@mschatz
mschatz / bucketsize.sh
Last active March 10, 2021 06:56
Calculate a GCP Bucket size using gsutil in parallel
## Calculate the total storage in a bucket by computing the size of each subdirectory in parallel
##################################################################################################
## Samantha devel
mkdir samantha; cd samantha
bucket=fc-61692b1d-7909-439a-9ae6-3fb7cb9069a7
gsutil ls gs://$bucket > ls
grep '/$' ls > dirs
mkdir sizes
cut -f4 -d'/' dirs | parallel -t "gsutil du -c gs://$bucket/{}/ > sizes/{}.sizes"
## Set up and test the aspera client in an ANVIL terminal
###########################################################
## download the client
wget -qO- https://download.asperasoft.com/download/sw/connect/3.9.8/ibm-aspera-connect-3.9.8.176272-linux-g2.12-64.tar.gz | tar xvz
## unpack it
chmod +x ibm-aspera-connect-3.9.8.176272-linux-g2.12-64.sh
./ibm-aspera-connect-3.9.8.176272-linux-g2.12-64.sh
T2T Analysis Scripts
bucketstats.R : Plot histogram of file sizes in a bucket by filetype (from gsutil ls)
hcstatus.sh. : Look through buckets to see how many raw vcf files are available
### local install of gcloud/gsutils
cd ~/build
curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-332.0.0-linux-x86_64.tar.gz
tar xzvf google-cloud-sdk-332.0.0-linux-x86_64.tar.gz
cd google-cloud-sdk
./install.sh
. ~/.bashrc
gcloud init --console-only
gcloud auth application-default login --no-launch-browser
library(AnVIL)
terra = Terra()
response = terra$listBillingProjectMembers("deeppilots-bioconductor")
emails = response |> flatten() |> filter(role == "User") |> pull(email)
for (email in emails) terra$removeUserFromBillingProject("deeppilots-bioconductor", "User", email)
$ wget https://downloads.globus.org/globus-connect-personal/linux/stable/globusconnectpersonal-latest.tgz
$ tar xzvf globusconnectpersonal-latest.tgz
export PYTHONHOME=~/miniconda3
## setup and name your endpoint
$ globusconnectpersonal-3.1.1/globusconnectpersonal -setup
$ globusconnectpersonal-3.1.1/globusconnectpersonal -start &
$ pip install globus-cli
$ globus login
while true; do date; sleep 3; done | tee true.log
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele not already represented at this location by REF and ALT">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=VQSRTrancheINDEL95.00to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -7145.7725">
##FILTER=<ID=VQSRTrancheINDEL95.00to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -7145.7725 <= x < -0.0761">
##FILTER=<ID=VQSRTrancheSNP99.80to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -66643.7516">
##FILTER=<ID=VQSRTrancheSNP99.80to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -66643.7516 <= x < -116.7261">
##GATKCommandLine=<ID=ApplyVQSR,CommandLine="ApplyVQSR --recal-file /cromwell_root/1kgp.chrY.recalibrate_indel.recal --tranches-file /cromwell_root/fc-51aefb1c-4e8e-4dcb-a59c-62e318ea351a/221c5800-22d5-4843-945c-1e5d4a5da0b6/
#include <stdio.h>
#include <string.h>
int main(int argc, char ** argv)
{
char letters [] = "ABCDEFGHIJKLMNOPQRSTUVabcdefgtxxzz";
for (int i = 0; i < strlen(letters); i++)
{
char bp = letters[i];