dosorio/SC-02-27.R

## SC-02-27.R
# Review
# https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1850-9

# Install Harmony
library(devtools)
install_github("immunogenomics/harmony")

# Install SVA
BiocManager::install('sva')

# Moving to downloads
setwd("../Downloads/")

# Downloading files
# 1K Cells
download.file("http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_filtered_feature_bc_matrix.tar.gz", destfile = "PBMC1K.tar.gz")
untar("PBMC1K.tar.gz", exdir = "1K")

# 10K Cells
download.file("http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.tar.gz", destfile = "PBMC10K.tar.gz")
untar("PBMC10K.tar.gz", exdir = "10K")

# Loading libraries
library(Seurat)
library(sva)
library(Matrix)
library(harmony)

# Reading the cells
PBMC_1K <- Read10X("1K/filtered_feature_bc_matrix/")
PBMC_10K <- Read10X("10K/filtered_feature_bc_matrix/")

# Selecting 3000 random cells
PBMC_10K <- PBMC_10K[,sample(colnames(PBMC_10K), 3000)]

# Simulating a batch effect
PBMC_1K <- t(t((PBMC_1K))/colSums(PBMC_1K)) * 1e6
PBMC_10K <- t(t((PBMC_10K))/colSums(PBMC_10K)) * 0.1e6

# Renaming the cells
colnames(PBMC_1K) <- paste0("1K_",colnames(PBMC_1K))
colnames(PBMC_10K) <- paste0("10K_",colnames(PBMC_10K))

# QC
QC <- function(X){
  mtCounts <- colSums(X[grepl("MT-", rownames(X)),])
  allCounts <- colSums(X)
  mtRate <- mtCounts/allCounts
  X <- X[rowSums(X) > 0, mtRate < 0.1]
  return(X)
}
PBMC_1K <- QC(PBMC_1K)
PBMC_10K <- QC(PBMC_10K)

# One gene batch effect
plot(c(PBMC_1K["MALAT1",],PBMC_10K["MALAT1",]), main = "BEFORE CORRECTION")

# Batch correction using SVA
B <- unlist(lapply(strsplit(c(colnames(PBMC_1K), colnames(PBMC_10K)), "_"), function(X){X[1]}))
sharedGenes <- intersect(rownames(PBMC_1K), rownames(PBMC_10K))
sharedGenes <- cbind(PBMC_1K[sharedGenes,], PBMC_10K[sharedGenes,])
sharedGenes <- sharedGenes[rowVars(sharedGenes) > 0,]
sharedGenes <- ComBat(dat = sharedGenes, batch = B)
sharedGenes <- round(sharedGenes)
sharedGenes[sharedGenes < 0] <- 0
plot(sharedGenes["MALAT1",], main = "AFTER CORRECTION")

# PreProcessing
PP <- function(X){
  X <- CreateSeuratObject(X)
  X <- ScaleData(X)
  X <- FindVariableFeatures(X)
  return(X)
}
PBMC_1K <- PP(sharedGenes[,B == "1K"])
PBMC_10K <- PP(sharedGenes[,B == "10K"])

# List of HVG
HVG <- intersect(VariableFeatures(PBMC_1K), VariableFeatures(PBMC_10K))

# CCA
outCCA <- RunCCA(PBMC_1K, PBMC_10K, features = HVG)
DimPlot(outCCA, reduction = "cca")
outCCA <- ScaleData(outCCA)
outCCA <- FindVariableFeatures(outCCA)
outCCA <- RunPCA(outCCA)
outCCA <- RunUMAP(outCCA, dims = 1:20, reduction = 'cca')
UMAPPlot(outCCA)

# CCA
outCCA <- RunCCA(PBMC_1K, PBMC_10K, features = HVG)
DimPlot(outCCA, reduction = "cca")
outCCA <- ScaleData(outCCA)
outCCA <- FindVariableFeatures(outCCA)
outCCA <- RunPCA(outCCA)
outCCA <- RunUMAP(outCCA, dims = 1:20, reduction = 'cca')
UMAPPlot(outCCA)


# The Seurat way
allDataS <- list('1K' = PBMC_1K, '10K' = PBMC_10K)
allDataS <- FindIntegrationAnchors(object.list = allDataS)
allDataS <- IntegrateData(allDataS)
DefaultAssay(allDataS) <- "integrated"

allDataS <- ScaleData(allDataS)
allDataS <- RunPCA(allDataS)
allDataS <- RunUMAP(allDataS, dims = 1:20)
UMAPPlot(allDataS)


# The Harmony way
allDataH <- merge(PBMC_1K, PBMC_10K)
allDataH <- ScaleData(allDataH)
allDataH <- FindVariableFeatures(allDataH)
allDataH <- RunPCA(allDataH)
allDataH <- RunHarmony(allDataH, group.by.vars = 'orig.ident')
allDataH <- RunUMAP(allDataH, dims = 1:20, reduction = 'harmony')
UMAPPlot(allDataH)
	# Review
	# https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1850-9

	# Install Harmony
	library(devtools)
	install_github("immunogenomics/harmony")

	# Install SVA
	BiocManager::install('sva')

	# Moving to downloads
	setwd("../Downloads/")

	# Downloading files
	# 1K Cells
	download.file("http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_filtered_feature_bc_matrix.tar.gz", destfile = "PBMC1K.tar.gz")
	untar("PBMC1K.tar.gz", exdir = "1K")

	# 10K Cells
	download.file("http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_10k_v3/pbmc_10k_v3_filtered_feature_bc_matrix.tar.gz", destfile = "PBMC10K.tar.gz")
	untar("PBMC10K.tar.gz", exdir = "10K")

	# Loading libraries
	library(Seurat)
	library(sva)
	library(Matrix)
	library(harmony)

	# Reading the cells
	PBMC_1K <- Read10X("1K/filtered_feature_bc_matrix/")
	PBMC_10K <- Read10X("10K/filtered_feature_bc_matrix/")

	# Selecting 3000 random cells
	PBMC_10K <- PBMC_10K[,sample(colnames(PBMC_10K), 3000)]

	# Simulating a batch effect
	PBMC_1K <- t(t((PBMC_1K))/colSums(PBMC_1K)) * 1e6
	PBMC_10K <- t(t((PBMC_10K))/colSums(PBMC_10K)) * 0.1e6

	# Renaming the cells
	colnames(PBMC_1K) <- paste0("1K_",colnames(PBMC_1K))
	colnames(PBMC_10K) <- paste0("10K_",colnames(PBMC_10K))

	# QC
	QC <- function(X){
	mtCounts <- colSums(X[grepl("MT-", rownames(X)),])
	allCounts <- colSums(X)
	mtRate <- mtCounts/allCounts
	X <- X[rowSums(X) > 0, mtRate < 0.1]
	return(X)
	}
	PBMC_1K <- QC(PBMC_1K)
	PBMC_10K <- QC(PBMC_10K)

	# One gene batch effect
	plot(c(PBMC_1K["MALAT1",],PBMC_10K["MALAT1",]), main = "BEFORE CORRECTION")

	# Batch correction using SVA
	B <- unlist(lapply(strsplit(c(colnames(PBMC_1K), colnames(PBMC_10K)), "_"), function(X){X[1]}))
	sharedGenes <- intersect(rownames(PBMC_1K), rownames(PBMC_10K))
	sharedGenes <- cbind(PBMC_1K[sharedGenes,], PBMC_10K[sharedGenes,])
	sharedGenes <- sharedGenes[rowVars(sharedGenes) > 0,]
	sharedGenes <- ComBat(dat = sharedGenes, batch = B)
	sharedGenes <- round(sharedGenes)
	sharedGenes[sharedGenes < 0] <- 0
	plot(sharedGenes["MALAT1",], main = "AFTER CORRECTION")

	# PreProcessing
	PP <- function(X){
	X <- CreateSeuratObject(X)
	X <- ScaleData(X)
	X <- FindVariableFeatures(X)
	return(X)
	}
	PBMC_1K <- PP(sharedGenes[,B == "1K"])
	PBMC_10K <- PP(sharedGenes[,B == "10K"])

	# List of HVG
	HVG <- intersect(VariableFeatures(PBMC_1K), VariableFeatures(PBMC_10K))

	# CCA
	outCCA <- RunCCA(PBMC_1K, PBMC_10K, features = HVG)
	DimPlot(outCCA, reduction = "cca")
	outCCA <- ScaleData(outCCA)
	outCCA <- FindVariableFeatures(outCCA)
	outCCA <- RunPCA(outCCA)
	outCCA <- RunUMAP(outCCA, dims = 1:20, reduction = 'cca')
	UMAPPlot(outCCA)

	# CCA
	outCCA <- RunCCA(PBMC_1K, PBMC_10K, features = HVG)
	DimPlot(outCCA, reduction = "cca")
	outCCA <- ScaleData(outCCA)
	outCCA <- FindVariableFeatures(outCCA)
	outCCA <- RunPCA(outCCA)
	outCCA <- RunUMAP(outCCA, dims = 1:20, reduction = 'cca')
	UMAPPlot(outCCA)


	# The Seurat way
	allDataS <- list('1K' = PBMC_1K, '10K' = PBMC_10K)
	allDataS <- FindIntegrationAnchors(object.list = allDataS)
	allDataS <- IntegrateData(allDataS)
	DefaultAssay(allDataS) <- "integrated"

	allDataS <- ScaleData(allDataS)
	allDataS <- RunPCA(allDataS)
	allDataS <- RunUMAP(allDataS, dims = 1:20)
	UMAPPlot(allDataS)


	# The Harmony way
	allDataH <- merge(PBMC_1K, PBMC_10K)
	allDataH <- ScaleData(allDataH)
	allDataH <- FindVariableFeatures(allDataH)
	allDataH <- RunPCA(allDataH)
	allDataH <- RunHarmony(allDataH, group.by.vars = 'orig.ident')
	allDataH <- RunUMAP(allDataH, dims = 1:20, reduction = 'harmony')
	UMAPPlot(allDataH)