Created
December 20, 2019 07:02
-
-
Save lianchye/906792a4a29f6ff18941a2b4d7aa5a5a to your computer and use it in GitHub Desktop.
Single cell Search Engine of Tabula Muris
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Install Keras | |
devtools::install_github("rstudio/keras") | |
library(keras) | |
install_keras() | |
#Install other packages for analysis etc.. | |
list.of.packages = c("class", "ggsci","ggpubr","tidyverse","kernelKnn","doParallel") | |
new.packages = list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] | |
if(length(new.packages)) install.packages(new.packages) | |
# Grab the github package | |
mapcell_zip_url = "https://github.com/lianchye/mapcell/archive/master.zip" | |
download.file(url = mapcell_zip_url, destfile = "mapcell-master.zip") | |
unzip(zipfile = "mapcell-master.zip") | |
setwd(dir = "./mapcell-master/") | |
# Load Trained Model for embedding and reference point to search | |
source("./annotation_functions/function_lib_annotation.R") | |
gene_names = "./models_reference/human_cell_landscape/peri-blood/gene_names.rds" %>% readRDS() | |
model_sel = "./models_reference/human_cell_landscape/peri-blood/weights.h5" %>% build_embedding(rna_length = length(gene_names),weights_h5 = .) | |
refCells_sel = "./models_reference/human_cell_landscape/peri-blood/embed_ref.rds" %>% readRDS() | |
metaCells_sel = "./models_reference/human_cell_landscape/peri-blood/meta_ref.rds" %>% readRDS() | |
#Install Tabular Muris Bioconductor Version | |
BiocManager::install("SingleCellExperiment") | |
BiocManager::install("ExperimentHub") | |
BiocManager::install("TabulaMurisData") | |
suppressPackageStartupMessages({ | |
library(ExperimentHub) | |
library(SingleCellExperiment) | |
library(TabulaMurisData) | |
}) | |
# Get tabularmuris data... Data will be cached, first time will take sometime | |
eh = ExperimentHub() | |
query(eh, "TabulaMurisData") | |
droplet = eh[["EH1617"]] | |
rownames(droplet) = rownames(droplet) %>% toupper() | |
cell_batch_size = 2500 | |
chunks = split(1:ncol(droplet), ceiling(seq_along(1:ncol(droplet))/cell_batch_size)) | |
common_genes = intersect(gene_names %>% toupper(),rownames(droplet)) | |
neighbors = 20 | |
out.list=list() | |
for(chunk_ind in 1:length(chunks)){ | |
cat(chunk_ind) | |
ext_data.ct = matrix(0,nrow = length(gene_names), ncol = length(chunks[[chunk_ind]])) | |
rownames(ext_data.ct) = gene_names | |
colnames(ext_data.ct) = colnames(droplet)[chunks[[chunk_ind]]] | |
ext_data.ct[common_genes,]=droplet[common_genes,chunks[[chunk_ind]]]@assays[["counts"]] %>% as.matrix() | |
ext_data.embed = model_sel %>% predict(ext_data.ct %>% apply(., 2, function(x){x/max(x)}) %>% as.matrix() %>% t(),verbose=1) | |
knn_index_out = KernelKnn::knn.index.dist(refCells_sel,ext_data.embed,k=neighbors,method = "pearson_correlation",threads = 14) | |
snn_annot.df = | |
data.frame(cell_bc = rep(colnames(ext_data.ct),each=neighbors), | |
knn_annot = knn_index_out$test_knn_idx %>% t() %>% as.vector() %>% metaCells_sel$Celltype_clean[.], | |
knn_dist = knn_index_out$test_knn_dist%>% t() %>% as.vector(), | |
stringsAsFactors = FALSE) | |
snn_annot.df = | |
snn_annot.df %>% | |
left_join( | |
snn_annot.df %>% | |
group_by(cell_bc,knn_annot) %>% tally() %>% | |
group_by(cell_bc) %>% slice(which.max(n)) %>% ungroup() %>% | |
rename(annot_call=knn_annot,annot_cts=n)) | |
out.list[[chunk_ind]]=snn_annot.df | |
} | |
out.df=out.list %>% do.call("rbind",.) %>% | |
left_join(droplet@colData %>% data.frame(),by=c("cell_bc"="cell")) %>% | |
filter(cell_ontology_class!="NA") %>% | |
group_by(cell_bc) %>% top_n(n = 1,wt=-knn_dist) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment