Skip to content

Instantly share code, notes, and snippets.

@dosorio
Last active February 27, 2020 16:59
Show Gist options
  • Save dosorio/969e613808352f96eed1254736d9e60a to your computer and use it in GitHub Desktop.
Save dosorio/969e613808352f96eed1254736d9e60a to your computer and use it in GitHub Desktop.
SC-02-27
# Review
# https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1850-9
# Install Harmony
library(devtools)
install_github("immunogenomics/harmony")
# Install SVA
BiocManager::install('sva')
# Moving to downloads
setwd("../Downloads/")
# Downloading files
# 1K Cells
download.file("http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_filtered_feature_bc_matrix.tar.gz", destfile = "PBMC1K.tar.gz")
untar("PBMC1K.tar.gz", exdir = "1K")
# 10K Cells
download.file("http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.tar.gz", destfile = "PBMC10K.tar.gz")
untar("PBMC10K.tar.gz", exdir = "10K")
# Loading libraries
library(Seurat)
library(sva)
library(Matrix)
library(harmony)
# Reading the cells
PBMC_1K <- Read10X("1K/filtered_feature_bc_matrix/")
PBMC_10K <- Read10X("10K/filtered_feature_bc_matrix/")
# Selecting 3000 random cells
PBMC_10K <- PBMC_10K[,sample(colnames(PBMC_10K), 3000)]
# Simulating a batch effect
PBMC_1K <- t(t((PBMC_1K))/colSums(PBMC_1K)) * 1e6
PBMC_10K <- t(t((PBMC_10K))/colSums(PBMC_10K)) * 0.1e6
# Renaming the cells
colnames(PBMC_1K) <- paste0("1K_",colnames(PBMC_1K))
colnames(PBMC_10K) <- paste0("10K_",colnames(PBMC_10K))
# QC
QC <- function(X){
mtCounts <- colSums(X[grepl("MT-", rownames(X)),])
allCounts <- colSums(X)
mtRate <- mtCounts/allCounts
X <- X[rowSums(X) > 0, mtRate < 0.1]
return(X)
}
PBMC_1K <- QC(PBMC_1K)
PBMC_10K <- QC(PBMC_10K)
# One gene batch effect
plot(c(PBMC_1K["MALAT1",],PBMC_10K["MALAT1",]), main = "BEFORE CORRECTION")
# Batch correction using SVA
B <- unlist(lapply(strsplit(c(colnames(PBMC_1K), colnames(PBMC_10K)), "_"), function(X){X[1]}))
sharedGenes <- intersect(rownames(PBMC_1K), rownames(PBMC_10K))
sharedGenes <- cbind(PBMC_1K[sharedGenes,], PBMC_10K[sharedGenes,])
sharedGenes <- sharedGenes[rowVars(sharedGenes) > 0,]
sharedGenes <- ComBat(dat = sharedGenes, batch = B)
sharedGenes <- round(sharedGenes)
sharedGenes[sharedGenes < 0] <- 0
plot(sharedGenes["MALAT1",], main = "AFTER CORRECTION")
# PreProcessing
PP <- function(X){
X <- CreateSeuratObject(X)
X <- ScaleData(X)
X <- FindVariableFeatures(X)
return(X)
}
PBMC_1K <- PP(sharedGenes[,B == "1K"])
PBMC_10K <- PP(sharedGenes[,B == "10K"])
# List of HVG
HVG <- intersect(VariableFeatures(PBMC_1K), VariableFeatures(PBMC_10K))
# CCA
outCCA <- RunCCA(PBMC_1K, PBMC_10K, features = HVG)
DimPlot(outCCA, reduction = "cca")
outCCA <- ScaleData(outCCA)
outCCA <- FindVariableFeatures(outCCA)
outCCA <- RunPCA(outCCA)
outCCA <- RunUMAP(outCCA, dims = 1:20, reduction = 'cca')
UMAPPlot(outCCA)
# CCA
outCCA <- RunCCA(PBMC_1K, PBMC_10K, features = HVG)
DimPlot(outCCA, reduction = "cca")
outCCA <- ScaleData(outCCA)
outCCA <- FindVariableFeatures(outCCA)
outCCA <- RunPCA(outCCA)
outCCA <- RunUMAP(outCCA, dims = 1:20, reduction = 'cca')
UMAPPlot(outCCA)
# The Seurat way
allDataS <- list('1K' = PBMC_1K, '10K' = PBMC_10K)
allDataS <- FindIntegrationAnchors(object.list = allDataS)
allDataS <- IntegrateData(allDataS)
DefaultAssay(allDataS) <- "integrated"
allDataS <- ScaleData(allDataS)
allDataS <- RunPCA(allDataS)
allDataS <- RunUMAP(allDataS, dims = 1:20)
UMAPPlot(allDataS)
# The Harmony way
allDataH <- merge(PBMC_1K, PBMC_10K)
allDataH <- ScaleData(allDataH)
allDataH <- FindVariableFeatures(allDataH)
allDataH <- RunPCA(allDataH)
allDataH <- RunHarmony(allDataH, group.by.vars = 'orig.ident')
allDataH <- RunUMAP(allDataH, dims = 1:20, reduction = 'harmony')
UMAPPlot(allDataH)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment