Last active
February 27, 2020 16:59
-
-
Save dosorio/969e613808352f96eed1254736d9e60a to your computer and use it in GitHub Desktop.
SC-02-27
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Review | |
# https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1850-9 | |
# Install Harmony | |
library(devtools) | |
install_github("immunogenomics/harmony") | |
# Install SVA | |
BiocManager::install('sva') | |
# Moving to downloads | |
setwd("../Downloads/") | |
# Downloading files | |
# 1K Cells | |
download.file("http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_filtered_feature_bc_matrix.tar.gz", destfile = "PBMC1K.tar.gz") | |
untar("PBMC1K.tar.gz", exdir = "1K") | |
# 10K Cells | |
download.file("http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.tar.gz", destfile = "PBMC10K.tar.gz") | |
untar("PBMC10K.tar.gz", exdir = "10K") | |
# Loading libraries | |
library(Seurat) | |
library(sva) | |
library(Matrix) | |
library(harmony) | |
# Reading the cells | |
PBMC_1K <- Read10X("1K/filtered_feature_bc_matrix/") | |
PBMC_10K <- Read10X("10K/filtered_feature_bc_matrix/") | |
# Selecting 3000 random cells | |
PBMC_10K <- PBMC_10K[,sample(colnames(PBMC_10K), 3000)] | |
# Simulating a batch effect | |
PBMC_1K <- t(t((PBMC_1K))/colSums(PBMC_1K)) * 1e6 | |
PBMC_10K <- t(t((PBMC_10K))/colSums(PBMC_10K)) * 0.1e6 | |
# Renaming the cells | |
colnames(PBMC_1K) <- paste0("1K_",colnames(PBMC_1K)) | |
colnames(PBMC_10K) <- paste0("10K_",colnames(PBMC_10K)) | |
# QC | |
QC <- function(X){ | |
mtCounts <- colSums(X[grepl("MT-", rownames(X)),]) | |
allCounts <- colSums(X) | |
mtRate <- mtCounts/allCounts | |
X <- X[rowSums(X) > 0, mtRate < 0.1] | |
return(X) | |
} | |
PBMC_1K <- QC(PBMC_1K) | |
PBMC_10K <- QC(PBMC_10K) | |
# One gene batch effect | |
plot(c(PBMC_1K["MALAT1",],PBMC_10K["MALAT1",]), main = "BEFORE CORRECTION") | |
# Batch correction using SVA | |
B <- unlist(lapply(strsplit(c(colnames(PBMC_1K), colnames(PBMC_10K)), "_"), function(X){X[1]})) | |
sharedGenes <- intersect(rownames(PBMC_1K), rownames(PBMC_10K)) | |
sharedGenes <- cbind(PBMC_1K[sharedGenes,], PBMC_10K[sharedGenes,]) | |
sharedGenes <- sharedGenes[rowVars(sharedGenes) > 0,] | |
sharedGenes <- ComBat(dat = sharedGenes, batch = B) | |
sharedGenes <- round(sharedGenes) | |
sharedGenes[sharedGenes < 0] <- 0 | |
plot(sharedGenes["MALAT1",], main = "AFTER CORRECTION") | |
# PreProcessing | |
PP <- function(X){ | |
X <- CreateSeuratObject(X) | |
X <- ScaleData(X) | |
X <- FindVariableFeatures(X) | |
return(X) | |
} | |
PBMC_1K <- PP(sharedGenes[,B == "1K"]) | |
PBMC_10K <- PP(sharedGenes[,B == "10K"]) | |
# List of HVG | |
HVG <- intersect(VariableFeatures(PBMC_1K), VariableFeatures(PBMC_10K)) | |
# CCA | |
outCCA <- RunCCA(PBMC_1K, PBMC_10K, features = HVG) | |
DimPlot(outCCA, reduction = "cca") | |
outCCA <- ScaleData(outCCA) | |
outCCA <- FindVariableFeatures(outCCA) | |
outCCA <- RunPCA(outCCA) | |
outCCA <- RunUMAP(outCCA, dims = 1:20, reduction = 'cca') | |
UMAPPlot(outCCA) | |
# CCA | |
outCCA <- RunCCA(PBMC_1K, PBMC_10K, features = HVG) | |
DimPlot(outCCA, reduction = "cca") | |
outCCA <- ScaleData(outCCA) | |
outCCA <- FindVariableFeatures(outCCA) | |
outCCA <- RunPCA(outCCA) | |
outCCA <- RunUMAP(outCCA, dims = 1:20, reduction = 'cca') | |
UMAPPlot(outCCA) | |
# The Seurat way | |
allDataS <- list('1K' = PBMC_1K, '10K' = PBMC_10K) | |
allDataS <- FindIntegrationAnchors(object.list = allDataS) | |
allDataS <- IntegrateData(allDataS) | |
DefaultAssay(allDataS) <- "integrated" | |
allDataS <- ScaleData(allDataS) | |
allDataS <- RunPCA(allDataS) | |
allDataS <- RunUMAP(allDataS, dims = 1:20) | |
UMAPPlot(allDataS) | |
# The Harmony way | |
allDataH <- merge(PBMC_1K, PBMC_10K) | |
allDataH <- ScaleData(allDataH) | |
allDataH <- FindVariableFeatures(allDataH) | |
allDataH <- RunPCA(allDataH) | |
allDataH <- RunHarmony(allDataH, group.by.vars = 'orig.ident') | |
allDataH <- RunUMAP(allDataH, dims = 1:20, reduction = 'harmony') | |
UMAPPlot(allDataH) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment