Skip to content

Instantly share code, notes, and snippets.

View vjcitn's full-sized avatar

Vince Carey vjcitn

  • Boston
View GitHub Profile
@vjcitn
vjcitn / inst.R
Created March 25, 2024 10:51
"instrumented" do_SingleR
do_SingleRi = function(sce=NULL, path="/home/vincent/tenx3k.h5ad",
ref=celldex::HumanPrimaryCellAtlasData(),
ref.type = "label.main",
min.common = 1000, assay.type.test=1L, instrument=TRUE,
clprocid=NULL, ...) {
if (instrument == TRUE && is.null(clprocid)) stop("clprocid not set")
cl_timestamp(clprocid, "init")
stopifnot(ref.type %in% c("label.main", "label.fine"))
if (is.null(sce)) {
is_h5ad = length(grep("h5ad$", path)==1)
@vjcitn
vjcitn / testi
Last active March 25, 2024 10:51
use 'instrumented' do_SingleR(i)
source("inst.R", echo=TRUE)
library(Rcollectl)
library(AnVILBestPractices)
library(SingleR)
library(BiocParallel)
clid = cl_start()
Rcollectl::cl_timestamp(clid, "pre-data")
p3k = TENxPBMCData::TENxPBMCData("pbmc3k")
Rcollectl::cl_timestamp(clid, "3k loaded")
rownames(p3k) = make.names(rowData(p3k)$Symbol, unique=TRUE)
@vjcitn
vjcitn / probe_lake.R
Created March 15, 2024 13:26
defines a function probe_lake() to produce shiny app to explore BiocBuildDB data lake
# setup
library(aws.s3)
library(DBI)
library(dplyr)
library(duckdb)
library(shiny)
# get bucket content metadata into a data.frame "bb"
@vjcitn
vjcitn / dohist.R
Created March 11, 2024 11:56
histogram of commit times from an info.csv
con <- dbConnect(duckdb::duckdb(), read_only=TRUE)
dbExecute(con, "install 'httpfs'")
dbExecute(con, "load 'httpfs'")
library(ggplot2)
devinf = "buildResults/f9785dba87426695825cc6524dcb82c6-info.csv.gz"
pa = sprintf('s3://bioc-builddb-mirror/%s', devinf)
sqlstring = sprintf("FROM read_csv('%s')", pa)
@vjcitn
vjcitn / pull_info.R
Created March 11, 2024 11:08
code to explore info.csv collected by BiocBuildDB process
allinf = c(
"buildResults/08150976a8cce9f7bf2d08a6ff86160c-info.csv.gz",
"buildResults/19dfe90f431098a035502d632404e0f2-info.csv.gz",
"buildResults/27dc71f7942ea33a660ca2495d2701d0-info.csv.gz",
"buildResults/2a0158be35acdff034889a8b072b823d-info.csv.gz",
"buildResults/2cc2a659a35d607f71655f3c9c9e4283-info.csv.gz",
"buildResults/3ebd0a185863d3d036726be357bedc60-info.csv.gz",
"buildResults/4104e02fcdf0685bf51735c7abb2c06e-info.csv.gz",
"buildResults/519f35883478df30fd90189f094770d7-info.csv.gz",
"buildResults/64744d6015f8f931cae0c13d1e50b092-info.csv.gz",
#
# https://priyanshwarke2015-ndcs.medium.com/image-classification-with-cnn-model-in-cifar100-dataset-8d4122b75bad
# with user-level miniconda3 installed, use
# miniconda3/bin/pip install matplotlib
# miniconda3/bin/pip install torchvision
import os
import torch
import torchvision
@vjcitn
vjcitn / gist:da11ea5228124fd06c5e59239a54cb9d
Created December 9, 2023 14:52
using shiny with readVcf, predictCoding, ...
library(shiny) # works with the chr22.vcf.gz in the VariantAnnotation extdata
library(dplyr)
library(DT)
library(VariantAnnotation)
library(BSgenome.Hsapiens.UCSC.hg19)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
ui <- fluidPage(
titlePanel("VCF XPLORR"),
numericInput("numvar", "num2chk", value = 50, min = 50, max = 500, step = 10),
@vjcitn
vjcitn / newlib.R
Created November 11, 2023 11:32
a redefinition of library() to be quieter
# a replacement for library() that does not print tons of messages
# as each package is loaded. place in .Rprofile
libstats = function(inisess, newsess) {
inibase = inisess$basePkgs # unchanging?
inioth = names(inisess$otherPkgs)
newbase = newsess$basePkgs
newoth = names(newsess$otherPkgs)
iniatt = length(unique(c(inibase,inioth)))
newatt = length(unique(c(newbase,newoth)))
@vjcitn
vjcitn / igandemo.R
Created October 6, 2023 10:14
neo4r and biocypher
# assumes https://github.com/biocypher/igan.git README instructions
# followed, docker compose up -d succeeded and localhost:7474 gives
# a neo4j browser interface (at localhost:7687/browser on my machine)
#
# BiocManager::install("neo4j-rstats/neo4r")
# want 0.1.3
library(neo4r)
library(magrittr)
con <- neo4j_api$new(
url = "http://localhost:7474", user="", password="")
@vjcitn
vjcitn / check_discrepant_pkgs.R
Created October 1, 2023 00:08
code in this gist attempts to prepare R to identify causes of CHECK_SUGGESTS_ONLY errors
stopifnot(Sys.getenv("_R_CHECK_SUGGESTS_ONLY_") == "true")
# these error in check on nebbiolo2 but not lconway
suggposs = c("AffyRNADegradation", "alabaster.spatial", "ANF", "animalcules",
"BDMMAcorrect", "bnbc", "CancerSubtypes", "ccrepe", "CDI", "cellscape",
"ClassifyR", "CNVgears", "CoGAPS", "compartmap", "CoRegNet",
"crisprseekplus", "DEWSeq", "DEXSeq", "DMCFB", "dpeak", "DSS",
"EasyCellType", "eegc", "EGSEA", "EpiCompare", "exomePeak2",
"farms", "FCBF", "flowMap", "FoldGO", "FScanR", "GCSscore", "GenomicFeatures",
"GEOmetadb", "GEOquery", "gscreend", "HelloRanges", "ILoReg",