Skip to content

Instantly share code, notes, and snippets.

View jungbluth's full-sized avatar

Sean Jungbluth jungbluth

View GitHub Profile
@jungbluth
jungbluth / venneuler.R
Last active September 20, 2023 03:11
venneuler.R
#!/usr/bin/env Rscript
vd <- venneuler(c(A=5, B=3, C=5, "A&B"=0, "A&C"=0, "B&C"=13 ,"A&B&C"=0))
plot(vd)
@jungbluth
jungbluth / sourmash-workflow.sh
Last active September 20, 2023 03:12
sourmash-workflow.sh
#!/usr/bin/env bash
###PREP
##download databases
##located at: http://sourmash.readthedocs.io/en/latest/databases.html
#sbt-database? renameme
@jungbluth
jungbluth / run_sourmash.sh
Last active September 20, 2023 03:12
run_sourmash.sh
#!/usr/bin/env bash
for line in $(cat list); do
FILE=/global/cfs/cdirs/img/web-data/sandbox.blast.data/${line}/${line}.a.fna
if [ -f "$FILE" ]; then
mamba activate sourmash
sourmash sketch dna -p k=31,scaled=1000 -o ${line}.fna.sig "$FILE"
fi
done
@jungbluth
jungbluth / JGI_prospero_script.sh
Last active September 20, 2023 03:12
JGI_prospero_script.sh
ID=3300011415
curl -s https://prospero.jgi.doe.gov/ws/lineage/lookup/$ID | jq .
@jungbluth
jungbluth / IMG-calculate-bin-scaffold-coverage.R
Last active September 20, 2023 03:13
IMG-calculate-bin-scaffold-coverage.R
dat1<-read.table("scaffoldCart22861_18-jan-2017.xls", header=T, sep="\t")
dat2<-read.table("scaffolds-to-bins.txt", header=F, sep="\t")
dat3<-merge(dat1, dat2, by.x="Scaffold.ID", by.y="V1")
dat3$genomesum<-0
dat4<-as.data.frame(unique(dat3$V2))
dat4$coverage<-0
for (i in 1:length(levels(dat3$V2))){
dat3[which(dat3$V2==(unique(dat3$V2))[i]),]$genomesum<-sum(dat3[which(dat3$V2==(unique(dat3$V2))[i]),]$Sequence.Length..bp.)
dat4[i,2]<-sum((dat3[which(dat3$V2==(unique(dat3$V2))[i]),]$Sequence.Length..bp./dat3[which(dat3$V2==(unique(dat3$V2))[i]),]$genomesum)*dat3[which(dat3$V2==(unique(dat3$V2))[i]),]$Read.Depth)
@jungbluth
jungbluth / gather_img_fna.sh
Last active September 20, 2023 03:13
gather_img_fna.sh
#!/usr/bin/env bash
imgid=${1}
listfile=${2}
sqlsample () { if [ $(echo $(sqlite3 ${1} ".schema") | tr ';' '\n' | grep -c "CREATE TABLE" ) -gt "1" ]; then echo "Warning: ${1} contains multiple tables - no export here because function written to export single table."; else TABLECOLUMNS=$(for line in $(echo $(sqlite3 ${1} ".schema") | sed 's/^[^(]*(//' | tr ',' '\n' | sed 's/^ *//' | sed 's/ .*//'); do printf "$line,"; done | sed 's/,$/\n/'); TABLE=$(echo $(sqlite3 ${1} ".schema") | sed 's/^CREATE TABLE //' | sed 's/ .*//' | sed 's/(.*//'); sqlite3 ${1} "SELECT $(echo $TABLECOLUMNS) FROM $(echo $TABLE)"; fi; }
cd /global/cfs/cdirs/img_web/img_web_data/mer.fs/${imgid}/assembled/fna
# convert sqlite3 database to fasta flat file
@jungbluth
jungbluth / anvio_minimal_run.sh
Last active September 20, 2023 03:11
anvio_minimal_run.sh
anvi-script-reformat-fasta GCA_001899445.1_ASM189944v1_genomic.fna -o GCA_001899445.1_ASM189944v1_genomic_reformatted.fna -l 100 --simplify-names
anvi-gen-contigs-database -f GCA_001899445.1_ASM189944v1_genomic_reformatted.fna -o contigs.db --split-length 10000 --kmer-size 4 -T 2 --prodigal-translation-table 11 -n 'GCA_001899445.1_ASM189944v1_genomic contig database'
anvi-run-hmms -c contigs.db --num-threads 2
anvi-run-ncbi-cogs -c contigs.db -T 2 --sensitive --cog-data-dir /kb/module/work/anviodb/COG
anvi-run-pfams -c contigs.db -T 2 --pfam-data-dir /kb/module/work/anviodb/Pfam
#!/usr/bin/env python3
# this script connects to IMG web backend and pulls together smart, pfam, faa, and gene copy tables
import os
import sys
import pandas
import sqlite3
from glob import glob
from natsort import natsorted
for line in $(cat /global/cfs/cdirs/kbase/ke_prototype/GTDB/Pseudomonas-genus_forManasa/Pseudomonas-IDs_GTDB_v207.list); do
genomeID=$(echo $line | cut -d "_" -f2-)
splitA=$(echo $line | cut -d "_" -f2)
splitB=$(echo $line | cut -d "_" -f3 | sed 's/......\..$//')
splitC=$(echo $line | cut -d "_" -f3 | sed 's/...\..$//' | sed 's/^...//')
splitD=$(echo $line | cut -d "_" -f3 | sed 's/\..$//' | sed 's/^......//')
echo "$splitA $splitB $splitC $splitD"
wget --recursive -e robots=off --reject "index.html" --timestamping -A "_genomic.fna.gz" ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$splitA/$splitB/$splitC/$splitD/ -P ./
done
#!/usr/bin/env python
import sys
import os
#for x in (os.getenv("PYTHONPATH") if os.getenv("PYTHONPATH") else "").split(":"):
# if x not in sys.path:
# sys.path.append(x)
#import qcutils
sys.path.insert(0,"/global/homes/b/bfoster/git/jgi-mt/lib")