Skip to content

Instantly share code, notes, and snippets.

@mschatz
mschatz / tally.sh
Last active November 9, 2023 05:54
tally space by file extension
gsutil -u anvil-tool-development du gs://fc-47de7dae-e8e6-429c-b760-b4ba49136eee > t2t.list.txt
grep -v '/$' t2t.list.txt | awk '{split($2,a,"."); print a[length(a)], $1}' | grep -v 'gs:' | awk '{names[$1]++; sizes[$1]+=$2;} END{for (name in names) print name, names[name], sizes[name]}' | sort -nrk3 | head
@mschatz
mschatz / pdfmerge.sh
Created October 28, 2022 03:42 — forked from BeatHubmann/pdfmerge.sh
Batch merge PDF files Mac OS command line
gs -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -sOutputFile=_all-merged.pdf *.pdf
version 1.0
task untar {
input {
File archive
}
command <<<
mkdir untar
tar xf ~{archive} -C untar
find untar -type f > filelist.txt
#include <stdio.h>
#include <string.h>
int main(int argc, char ** argv)
{
char letters [] = "ABCDEFGHIJKLMNOPQRSTUVabcdefgtxxzz";
for (int i = 0; i < strlen(letters); i++)
{
char bp = letters[i];
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele not already represented at this location by REF and ALT">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=VQSRTrancheINDEL95.00to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -7145.7725">
##FILTER=<ID=VQSRTrancheINDEL95.00to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -7145.7725 <= x < -0.0761">
##FILTER=<ID=VQSRTrancheSNP99.80to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -66643.7516">
##FILTER=<ID=VQSRTrancheSNP99.80to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -66643.7516 <= x < -116.7261">
##GATKCommandLine=<ID=ApplyVQSR,CommandLine="ApplyVQSR --recal-file /cromwell_root/1kgp.chrY.recalibrate_indel.recal --tranches-file /cromwell_root/fc-51aefb1c-4e8e-4dcb-a59c-62e318ea351a/221c5800-22d5-4843-945c-1e5d4a5da0b6/
while true; do date; sleep 3; done | tee true.log
$ wget https://downloads.globus.org/globus-connect-personal/linux/stable/globusconnectpersonal-latest.tgz
$ tar xzvf globusconnectpersonal-latest.tgz
export PYTHONHOME=~/miniconda3
## setup and name your endpoint
$ globusconnectpersonal-3.1.1/globusconnectpersonal -setup
$ globusconnectpersonal-3.1.1/globusconnectpersonal -start &
$ pip install globus-cli
$ globus login
library(AnVIL)
terra = Terra()
response = terra$listBillingProjectMembers("deeppilots-bioconductor")
emails = response |> flatten() |> filter(role == "User") |> pull(email)
for (email in emails) terra$removeUserFromBillingProject("deeppilots-bioconductor", "User", email)
### local install of gcloud/gsutils
cd ~/build
curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-332.0.0-linux-x86_64.tar.gz
tar xzvf google-cloud-sdk-332.0.0-linux-x86_64.tar.gz
cd google-cloud-sdk
./install.sh
. ~/.bashrc
gcloud init --console-only
gcloud auth application-default login --no-launch-browser
T2T Analysis Scripts
bucketstats.R : Plot histogram of file sizes in a bucket by filetype (from gsutil ls)
hcstatus.sh. : Look through buckets to see how many raw vcf files are available