Skip to content

Instantly share code, notes, and snippets.

@lianchye
Created December 20, 2019 07:02
Show Gist options
  • Save lianchye/906792a4a29f6ff18941a2b4d7aa5a5a to your computer and use it in GitHub Desktop.
Save lianchye/906792a4a29f6ff18941a2b4d7aa5a5a to your computer and use it in GitHub Desktop.
Single cell Search Engine of Tabula Muris
#Install Keras
devtools::install_github("rstudio/keras")
library(keras)
install_keras()
#Install other packages for analysis etc..
list.of.packages = c("class", "ggsci","ggpubr","tidyverse","kernelKnn","doParallel")
new.packages = list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
# Grab the github package
mapcell_zip_url = "https://github.com/lianchye/mapcell/archive/master.zip"
download.file(url = mapcell_zip_url, destfile = "mapcell-master.zip")
unzip(zipfile = "mapcell-master.zip")
setwd(dir = "./mapcell-master/")
# Load Trained Model for embedding and reference point to search
source("./annotation_functions/function_lib_annotation.R")
gene_names = "./models_reference/human_cell_landscape/peri-blood/gene_names.rds" %>% readRDS()
model_sel = "./models_reference/human_cell_landscape/peri-blood/weights.h5" %>% build_embedding(rna_length = length(gene_names),weights_h5 = .)
refCells_sel = "./models_reference/human_cell_landscape/peri-blood/embed_ref.rds" %>% readRDS()
metaCells_sel = "./models_reference/human_cell_landscape/peri-blood/meta_ref.rds" %>% readRDS()
#Install Tabular Muris Bioconductor Version
BiocManager::install("SingleCellExperiment")
BiocManager::install("ExperimentHub")
BiocManager::install("TabulaMurisData")
suppressPackageStartupMessages({
library(ExperimentHub)
library(SingleCellExperiment)
library(TabulaMurisData)
})
# Get tabularmuris data... Data will be cached, first time will take sometime
eh = ExperimentHub()
query(eh, "TabulaMurisData")
droplet = eh[["EH1617"]]
rownames(droplet) = rownames(droplet) %>% toupper()
cell_batch_size = 2500
chunks = split(1:ncol(droplet), ceiling(seq_along(1:ncol(droplet))/cell_batch_size))
common_genes = intersect(gene_names %>% toupper(),rownames(droplet))
neighbors = 20
out.list=list()
for(chunk_ind in 1:length(chunks)){
cat(chunk_ind)
ext_data.ct = matrix(0,nrow = length(gene_names), ncol = length(chunks[[chunk_ind]]))
rownames(ext_data.ct) = gene_names
colnames(ext_data.ct) = colnames(droplet)[chunks[[chunk_ind]]]
ext_data.ct[common_genes,]=droplet[common_genes,chunks[[chunk_ind]]]@assays[["counts"]] %>% as.matrix()
ext_data.embed = model_sel %>% predict(ext_data.ct %>% apply(., 2, function(x){x/max(x)}) %>% as.matrix() %>% t(),verbose=1)
knn_index_out = KernelKnn::knn.index.dist(refCells_sel,ext_data.embed,k=neighbors,method = "pearson_correlation",threads = 14)
snn_annot.df =
data.frame(cell_bc = rep(colnames(ext_data.ct),each=neighbors),
knn_annot = knn_index_out$test_knn_idx %>% t() %>% as.vector() %>% metaCells_sel$Celltype_clean[.],
knn_dist = knn_index_out$test_knn_dist%>% t() %>% as.vector(),
stringsAsFactors = FALSE)
snn_annot.df =
snn_annot.df %>%
left_join(
snn_annot.df %>%
group_by(cell_bc,knn_annot) %>% tally() %>%
group_by(cell_bc) %>% slice(which.max(n)) %>% ungroup() %>%
rename(annot_call=knn_annot,annot_cts=n))
out.list[[chunk_ind]]=snn_annot.df
}
out.df=out.list %>% do.call("rbind",.) %>%
left_join(droplet@colData %>% data.frame(),by=c("cell_bc"="cell")) %>%
filter(cell_ontology_class!="NA") %>%
group_by(cell_bc) %>% top_n(n = 1,wt=-knn_dist)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment