Skip to content

Instantly share code, notes, and snippets.

@mikelove
Last active February 19, 2019 14:13
Show Gist options
  • Save mikelove/7b1537b6e35f031668d460552bb35d5a to your computer and use it in GitHub Desktop.
Save mikelove/7b1537b6e35f031668d460552bb35d5a to your computer and use it in GitHub Desktop.
alevin bootstrap importer
boot.cell.names <- read.table("quants_boot_rows.txt", strings=FALSE)[,1]
gene.names <- read.table("quants_mat_cols.txt", strings=FALSE)[,1]
cell.names <- read.table("quants_mat_rows.txt", strings=FALSE)[,1]
library(jsonlite)
jsonPath <- file.path("../cmd_info.json")
cmd_info <- jsonlite::fromJSON(jsonPath)
num.cells <- length(cell.names)
num.genes <- length(gene.names)
num.bootstraps <- as.numeric(cmd_info$numCellBootstraps)
mat.file <- "quants_mat.gz"
mean.mat.file <- "quants_mean_mat.gz"
boot.mat.file <- "quants_boot_mat.gz"
mat <- matrix(nrow=num.genes, ncol=num.cells, dimnames=list(gene.names, cell.names))
con <- gzcon(file(mat.file, "rb"))
for (j in seq_len(num.cells)) {
mat[,j] <- readBin(con, double(), endian = "little", n=num.genes)
}
close(con)
library(Matrix)
mat <- Matrix(mat)
num.non.zero <- matrix(nrow=num.cells, ncol=num.bootstraps)
con <- gzcon(file(boot.mat.file, "rb"))
for (j in seq_len(num.cells)) {
if (j %% 50 == 0) cat(j,"")
for (k in seq_len(num.bootstraps)) {
vec <- readBin(con, double(), endian = "little", n=num.genes)
num.non.zero[j,k] <- sum(vec > 0)
}
}
close(con)
is <- lapply(seq_len(num.bootstraps), function(k) numeric(sum(num.non.zero[,k])))
xs <- is
# easy to build this one
js <- lapply(seq_len(num.bootstraps), function(k) rep(seq_len(num.cells), num.non.zero[,k]))
con <- gzcon(file(boot.mat.file, "rb"))
ptr <- rep(1, num.bootstraps)
for (j in seq_len(num.cells)) {
if (j %% 50 == 0) cat(j,"")
for (k in seq_len(num.bootstraps)) {
vec <- readBin(con, double(), endian = "little", n=num.genes)
n <- num.non.zero[j,k]
stopifnot(n == sum(vec > 0))
is[[k]][ptr[k]:(ptr[k]+n-1)] <- which(vec > 0)
xs[[k]][ptr[k]:(ptr[k]+n-1)] <- vec[vec > 0]
ptr[k] <- ptr[k] + n
}
}
close(con)
bmat.list <- list()
for (k in seq_len(num.bootstraps)) {
bmat <- sparseMatrix(i=is[[k]], j=js[[k]], x=xs[[k]])
idx <- match(cell.names, boot.cell.names)
bmat <- bmat[,idx]
stopifnot(all(abs(colSums(mat) - colSums(bmat)) < 1))
bmat.list[[k]] <- bmat
}
@vjcitn
Copy link

vjcitn commented Dec 22, 2018

line 60: nbootstraps should be num.bootstraps

@mikelove
Copy link
Author

Thank you

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment