Skip to content

Instantly share code, notes, and snippets.

@dosorio
Created July 31, 2019 18:10
Show Gist options
  • Save dosorio/0e5be7790deeafb192ed13cc8b0825b1 to your computer and use it in GitHub Desktop.
Save dosorio/0e5be7790deeafb192ed13cc8b0825b1 to your computer and use it in GitHub Desktop.
setwd("../Downloads/")
# BiocManager::install(c('TFBSTools', 'ggbio', 'motifmatchr',
# 'BSgenome','BSgenome.Hsapiens.UCSC.hg19',
# 'EnsDb.Hsapiens.v75','hdf5r'))
# devtools::install_github("timoast/signac")
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(EnsDb.Hsapiens.v75)
library(ggplot2)
set.seed(1234)
# Files downloaded from https://support.10xgenomics.com/single-cell-atac/datasets/1.1.0/atac_v1_pbmc_10k
download.file("http://cf.10xgenomics.com/samples/cell-atac/1.1.0/atac_v1_pbmc_10k/atac_v1_pbmc_10k_filtered_peak_bc_matrix.h5", "atacCounts.h5", method = 'curl')
download.file("http://cf.10xgenomics.com/samples/cell-atac/1.1.0/atac_v1_pbmc_10k/atac_v1_pbmc_10k_singlecell.csv", "atacMetadata.csv", method = 'curl')
download.file("http://cf.10xgenomics.com/samples/cell-atac/1.1.0/atac_v1_pbmc_10k/atac_v1_pbmc_10k_fragments.tsv.gz", "atacFragments.tsv.gz", method = 'curl')
download.file("http://cf.10xgenomics.com/samples/cell-atac/1.1.0/atac_v1_pbmc_10k/atac_v1_pbmc_10k_fragments.tsv.gz.tbi", "atacFragments.tsv.gz.tbi", method = 'curl')
# Loading data
counts <- Read10X_h5("atacCounts.h5")
metadata <- read.csv("atacMetadata.csv", header = TRUE, row.names = 1)
# Converting into Seurat Objects
PBMC <- CreateSeuratObject(counts = counts,
assay = "peaks",
project = "ATAC",
meta.data = metadata)
PBMC <- SetFragments(object = PBMC,file = "atacFragments.tsv.gz")
# QC
PBMC <- NucleosomeSignal(PBMC)
PBMC$pctReadsPeaks <- PBMC$peak_region_fragments/PBMC$total
PBMC$blackListRatio <- PBMC$blacklist_region_fragments/PBMC$peak_region_fragments
PBMC$nucleosome_signal[is.infinite(PBMC$nucleosome_signal)] <- max(PBMC$nucleosome_signal[is.finite(PBMC$nucleosome_signal)])
P1 <- VlnPlot(object = PBMC, features = c("pctReadsPeaks", "blackListRatio", "nucleosome_signal"), pt.size = 0.1)
P2A <- VlnPlot(object = PBMC,features = 'peak_region_fragments',pt.size = 0.1, log = TRUE) + NoLegend()
P2B <- FeatureScatter(PBMC,"peak_region_fragments",'nucleosome_signal', pt.size = 0.1) + NoLegend()
P2C <- FeatureScatter(PBMC,"peak_region_fragments",'blackListRatio', pt.size = 0.1) + NoLegend()
P2 <- CombinePlots(plots = list(P2A, P2B, P2C), ncol = 3)
CombinePlots(list(P1,P2),ncol = 1)
PBMC$nucGroup <- ifelse(PBMC$nucleosome_signal > 5, "NS > 5", "NS < 5")
PeriodPlot(PBMC, group.by = "nucGroup")
# Filtering
PBMC <- subset(x = PBMC, subset= peak_region_fragments > 1000 &
peak_region_fragments < 20000 &
pctReadsPeaks > 0.2 &
blackListRatio < 0.05 &
nucleosome_signal < 10)
# Normalization
PBMC <- RunTFIDF(PBMC)
PBMC <- FindTopFeatures(PBMC, min.cutoff = "q0")
# Dimentional Reduction
PBMC <- RunSVD(object = PBMC, assay = "peaks",
reduction.key = "LSI_",
reduction.name = "lsi")
PBMC <- RunUMAP(PBMC, reduction = "lsi", dims = 1:30)
PBMC <- FindNeighbors(PBMC, reduction = "lsi", dims = 1:30)
PBMC <- FindClusters(PBMC, resolution = 0.1)
UMAPPlot(PBMC, label = TRUE) + NoLegend()
# Extract gene coordinates from Ensembl, and ensure name formatting is consistent with Seurat object
gene.coords <- genes(EnsDb.Hsapiens.v75, filter = ~ gene_biotype == "protein_coding")
seqlevelsStyle(gene.coords) <- 'UCSC'
genebody.coords <- keepStandardChromosomes(gene.coords, pruning.mode = 'coarse')
genebodyandpromoter.coords <- Extend(x = gene.coords, upstream = 2000, downstream = 0)
# Create a gene by cell matrix
gene.activities <- FeatureMatrix(
fragments = "atacFragments.tsv.gz",
features = genebodyandpromoter.coords,
cells = colnames(PBMC),
chunk = 10
)
# convert rownames from chromsomal coordinates into gene names
gene.key <- genebodyandpromoter.coords$gene_name
names(gene.key) <- GRangesToString(grange = genebodyandpromoter.coords)
rownames(gene.activities) <- gene.key[rownames(gene.activities)]
# add the gene activity matrix to the Seurat object as a new assay, and normalize it
PBMC[['RNA']] <- CreateAssayObject(counts = gene.activities)
PBMC <- NormalizeData(
object = PBMC,
assay = 'RNA',
normalization.method = 'LogNormalize',
scale.factor = median(PBMC$nCount_RNA)
)
# Setting the default
DefaultAssay(PBMC) <- 'RNA'
# Plotting marker genes
FeaturePlot(
object = PBMC,
features = c('MS4A1', 'CD3D', 'LEF1', 'NKG7', 'TREM1', 'LYZ'),
pt.size = 0.1,
max.cutoff = 'q95',
ncol = 3
)
# RNA-seq
# Download the file from https://www.dropbox.com/s/zn6khirjafoyyxl/pbmc_10k_v3.rds?dl=0
PBMC_rna <- readRDS("pbmc_10k_v3.rds")
# Applying CCA
transfer.anchors <- FindTransferAnchors(
reference = PBMC_rna,
query = PBMC,
reduction = 'cca'
)
# Transfering labels
predicted.labels <- TransferData(
anchorset = transfer.anchors,
refdata = PBMC_rna$celltype,
weight.reduction = PBMC[['lsi']]
)
PBMC <- AddMetaData(object = PBMC, metadata = predicted.labels)
#Plotting
P1 <- DimPlot(
object = PBMC_rna,
group.by = 'celltype',
label = TRUE,
repel = TRUE) + NoLegend() + ggtitle('scRNA-seq')
P2 <- DimPlot(
object = PBMC,
group.by = 'predicted.id',
label = TRUE,
repel = TRUE) + NoLegend() + ggtitle('scATAC-seq')
CombinePlots(list(P1,P2), ncol = 2)
# Differences in peaks
DefaultAssay(PBMC) <- 'peaks'
Idents(PBMC) <- PBMC$predicted.id
dePeaks <- FindMarkers(
object = PBMC,
ident.1 = "CD14+ Monocytes",
ident.2 = "CD16+ Monocytes",
min.pct = 0.2,
test.use = 'LR',
latent.vars = 'peak_region_fragments'
)
# Plotting the differences
plot1 <- VlnPlot(
object = PBMC,
features = rownames(dePeaks)[1],
ncol = 3,
pt.size = 0.1
) + NoLegend()
plot2 <- FeaturePlot(
object = PBMC,
features = rownames(dePeaks)[1],
ncol = 3,
pt.size = 0.1
)
CombinePlots(list(plot1,plot2))
# Plotting the peaks
CoveragePlot(
object = PBMC,
region = rownames(dePeaks)[1],
sep = c(":", "-"),
annotation = EnsDb.Hsapiens.v75,
extend.upstream = 20000,
extend.downstream = 20000,
ncol = 1
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment