Created
July 31, 2019 18:10
-
-
Save dosorio/0e5be7790deeafb192ed13cc8b0825b1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("../Downloads/") | |
# BiocManager::install(c('TFBSTools', 'ggbio', 'motifmatchr', | |
# 'BSgenome','BSgenome.Hsapiens.UCSC.hg19', | |
# 'EnsDb.Hsapiens.v75','hdf5r')) | |
# devtools::install_github("timoast/signac") | |
library(Signac) | |
library(Seurat) | |
library(GenomeInfoDb) | |
library(EnsDb.Hsapiens.v75) | |
library(ggplot2) | |
set.seed(1234) | |
# Files downloaded from https://support.10xgenomics.com/single-cell-atac/datasets/1.1.0/atac_v1_pbmc_10k | |
download.file("http://cf.10xgenomics.com/samples/cell-atac/1.1.0/atac_v1_pbmc_10k/atac_v1_pbmc_10k_filtered_peak_bc_matrix.h5", "atacCounts.h5", method = 'curl') | |
download.file("http://cf.10xgenomics.com/samples/cell-atac/1.1.0/atac_v1_pbmc_10k/atac_v1_pbmc_10k_singlecell.csv", "atacMetadata.csv", method = 'curl') | |
download.file("http://cf.10xgenomics.com/samples/cell-atac/1.1.0/atac_v1_pbmc_10k/atac_v1_pbmc_10k_fragments.tsv.gz", "atacFragments.tsv.gz", method = 'curl') | |
download.file("http://cf.10xgenomics.com/samples/cell-atac/1.1.0/atac_v1_pbmc_10k/atac_v1_pbmc_10k_fragments.tsv.gz.tbi", "atacFragments.tsv.gz.tbi", method = 'curl') | |
# Loading data | |
counts <- Read10X_h5("atacCounts.h5") | |
metadata <- read.csv("atacMetadata.csv", header = TRUE, row.names = 1) | |
# Converting into Seurat Objects | |
PBMC <- CreateSeuratObject(counts = counts, | |
assay = "peaks", | |
project = "ATAC", | |
meta.data = metadata) | |
PBMC <- SetFragments(object = PBMC,file = "atacFragments.tsv.gz") | |
# QC | |
PBMC <- NucleosomeSignal(PBMC) | |
PBMC$pctReadsPeaks <- PBMC$peak_region_fragments/PBMC$total | |
PBMC$blackListRatio <- PBMC$blacklist_region_fragments/PBMC$peak_region_fragments | |
PBMC$nucleosome_signal[is.infinite(PBMC$nucleosome_signal)] <- max(PBMC$nucleosome_signal[is.finite(PBMC$nucleosome_signal)]) | |
P1 <- VlnPlot(object = PBMC, features = c("pctReadsPeaks", "blackListRatio", "nucleosome_signal"), pt.size = 0.1) | |
P2A <- VlnPlot(object = PBMC,features = 'peak_region_fragments',pt.size = 0.1, log = TRUE) + NoLegend() | |
P2B <- FeatureScatter(PBMC,"peak_region_fragments",'nucleosome_signal', pt.size = 0.1) + NoLegend() | |
P2C <- FeatureScatter(PBMC,"peak_region_fragments",'blackListRatio', pt.size = 0.1) + NoLegend() | |
P2 <- CombinePlots(plots = list(P2A, P2B, P2C), ncol = 3) | |
CombinePlots(list(P1,P2),ncol = 1) | |
PBMC$nucGroup <- ifelse(PBMC$nucleosome_signal > 5, "NS > 5", "NS < 5") | |
PeriodPlot(PBMC, group.by = "nucGroup") | |
# Filtering | |
PBMC <- subset(x = PBMC, subset= peak_region_fragments > 1000 & | |
peak_region_fragments < 20000 & | |
pctReadsPeaks > 0.2 & | |
blackListRatio < 0.05 & | |
nucleosome_signal < 10) | |
# Normalization | |
PBMC <- RunTFIDF(PBMC) | |
PBMC <- FindTopFeatures(PBMC, min.cutoff = "q0") | |
# Dimentional Reduction | |
PBMC <- RunSVD(object = PBMC, assay = "peaks", | |
reduction.key = "LSI_", | |
reduction.name = "lsi") | |
PBMC <- RunUMAP(PBMC, reduction = "lsi", dims = 1:30) | |
PBMC <- FindNeighbors(PBMC, reduction = "lsi", dims = 1:30) | |
PBMC <- FindClusters(PBMC, resolution = 0.1) | |
UMAPPlot(PBMC, label = TRUE) + NoLegend() | |
# Extract gene coordinates from Ensembl, and ensure name formatting is consistent with Seurat object | |
gene.coords <- genes(EnsDb.Hsapiens.v75, filter = ~ gene_biotype == "protein_coding") | |
seqlevelsStyle(gene.coords) <- 'UCSC' | |
genebody.coords <- keepStandardChromosomes(gene.coords, pruning.mode = 'coarse') | |
genebodyandpromoter.coords <- Extend(x = gene.coords, upstream = 2000, downstream = 0) | |
# Create a gene by cell matrix | |
gene.activities <- FeatureMatrix( | |
fragments = "atacFragments.tsv.gz", | |
features = genebodyandpromoter.coords, | |
cells = colnames(PBMC), | |
chunk = 10 | |
) | |
# convert rownames from chromsomal coordinates into gene names | |
gene.key <- genebodyandpromoter.coords$gene_name | |
names(gene.key) <- GRangesToString(grange = genebodyandpromoter.coords) | |
rownames(gene.activities) <- gene.key[rownames(gene.activities)] | |
# add the gene activity matrix to the Seurat object as a new assay, and normalize it | |
PBMC[['RNA']] <- CreateAssayObject(counts = gene.activities) | |
PBMC <- NormalizeData( | |
object = PBMC, | |
assay = 'RNA', | |
normalization.method = 'LogNormalize', | |
scale.factor = median(PBMC$nCount_RNA) | |
) | |
# Setting the default | |
DefaultAssay(PBMC) <- 'RNA' | |
# Plotting marker genes | |
FeaturePlot( | |
object = PBMC, | |
features = c('MS4A1', 'CD3D', 'LEF1', 'NKG7', 'TREM1', 'LYZ'), | |
pt.size = 0.1, | |
max.cutoff = 'q95', | |
ncol = 3 | |
) | |
# RNA-seq | |
# Download the file from https://www.dropbox.com/s/zn6khirjafoyyxl/pbmc_10k_v3.rds?dl=0 | |
PBMC_rna <- readRDS("pbmc_10k_v3.rds") | |
# Applying CCA | |
transfer.anchors <- FindTransferAnchors( | |
reference = PBMC_rna, | |
query = PBMC, | |
reduction = 'cca' | |
) | |
# Transfering labels | |
predicted.labels <- TransferData( | |
anchorset = transfer.anchors, | |
refdata = PBMC_rna$celltype, | |
weight.reduction = PBMC[['lsi']] | |
) | |
PBMC <- AddMetaData(object = PBMC, metadata = predicted.labels) | |
#Plotting | |
P1 <- DimPlot( | |
object = PBMC_rna, | |
group.by = 'celltype', | |
label = TRUE, | |
repel = TRUE) + NoLegend() + ggtitle('scRNA-seq') | |
P2 <- DimPlot( | |
object = PBMC, | |
group.by = 'predicted.id', | |
label = TRUE, | |
repel = TRUE) + NoLegend() + ggtitle('scATAC-seq') | |
CombinePlots(list(P1,P2), ncol = 2) | |
# Differences in peaks | |
DefaultAssay(PBMC) <- 'peaks' | |
Idents(PBMC) <- PBMC$predicted.id | |
dePeaks <- FindMarkers( | |
object = PBMC, | |
ident.1 = "CD14+ Monocytes", | |
ident.2 = "CD16+ Monocytes", | |
min.pct = 0.2, | |
test.use = 'LR', | |
latent.vars = 'peak_region_fragments' | |
) | |
# Plotting the differences | |
plot1 <- VlnPlot( | |
object = PBMC, | |
features = rownames(dePeaks)[1], | |
ncol = 3, | |
pt.size = 0.1 | |
) + NoLegend() | |
plot2 <- FeaturePlot( | |
object = PBMC, | |
features = rownames(dePeaks)[1], | |
ncol = 3, | |
pt.size = 0.1 | |
) | |
CombinePlots(list(plot1,plot2)) | |
# Plotting the peaks | |
CoveragePlot( | |
object = PBMC, | |
region = rownames(dePeaks)[1], | |
sep = c(":", "-"), | |
annotation = EnsDb.Hsapiens.v75, | |
extend.upstream = 20000, | |
extend.downstream = 20000, | |
ncol = 1 | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment