Skip to content

Instantly share code, notes, and snippets.

@slavailn
Created December 11, 2018 23:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slavailn/9bf9925d8f48eb9e706f007a35bfb019 to your computer and use it in GitHub Desktop.
Save slavailn/9bf9925d8f48eb9e706f007a35bfb019 to your computer and use it in GitHub Desktop.
Going through code examples in ggbio
library(GenomicRanges)
library(ggbio)
library(IRanges)
set.seed(1)
N <- 1000
gr <- GRanges(seqnames =
sample(c("chr1", "chr2", "chr3"),
size = N, replace = TRUE),
IRanges(
start = sample(1:300, size = N, replace = TRUE),
width = sample(70:75, size = N,replace = TRUE)),
strand = sample(c("+", "-", "*"), size = N,
replace = TRUE),
value = rnorm(N, 10, 3), score = rnorm(N, 100, 30),
sample = sample(c("Normal", "Tumor"),
size = N, replace = TRUE),
pair = sample(letters, size = N,
replace = TRUE))
gr
# GRanges object with 1000 ranges and 4 metadata columns:
# seqnames ranges strand | value score sample pair
# <Rle> <IRanges> <Rle> | <numeric> <numeric> <character> <character>
# [1] chr1 232-305 * | 8.120638567773 134.048952659746 Normal a
# [2] chr1 208-279 * | 10.5509299726662 133.357955351108 Tumor j
# [3] chr3 196-268 + | 7.49311416276986 73.8766709918397 Normal g
idx <- sample(1:length(gr), size = 50)
idx
# [1] 65 676 734 111 47 131 876 834 862 34 107 544 109 714 715 778 608 495
# 448 35 206 336 534 752 419 670 509 776 878 420 322 889
# [33] 203 786 291 553 373 270 251 922 609 79 888 853 872 687 652 530 383 822
# Default granges, geom bar
autoplot(gr[idx])
###################################################
### code chunk number 4: bar-default-pre
###################################################
set.seed(123)
gr.b <- GRanges(seqnames = "chr1", IRanges(start = seq(1, 100, by = 10),
width = sample(4:9, size = 10, replace = TRUE)),
score = rnorm(10, 10, 3), value = runif(10, 1, 100))
gr.b2 <- GRanges(seqnames = "chr2", IRanges(start = seq(1, 100, by = 10),
width = sample(4:9, size = 10, replace = TRUE)),
score = rnorm(10, 10, 3), value = runif(10, 1, 100))
gr.b <- c(gr.b, gr.b2)
head(gr.b)
# GRanges object with 6 ranges and 2 metadata columns:
# seqnames ranges strand | score value
# <Rle> <IRanges> <Rle> | <numeric> <numeric>
# [1] chr1 1-9 * | 8.31857306034336 89.0643922903109
# [2] chr1 11-19 * | 9.30946753155016 69.5875372095034
# [3] chr1 21-28 * | 14.6761249424474 64.4101745630614
p1 <- autoplot(gr.b, geom = "bar")
## use value to fill the bar
p2 <- autoplot(gr.b, geom = "bar", aes(fill = value))
tracks(default = p1, fill = p2)
# Plot gr object
gr
# GRanges object with 1000 ranges and 4 metadata columns:
# seqnames ranges strand | value score sample pair
# <Rle> <IRanges> <Rle> | <numeric> <numeric> <character> <character>
# [1] chr1 232-305 * | 8.120638567773 134.048952659746 Normal a
# [2] chr1 208-279 * | 10.5509299726662 133.357955351108 Tumor j
# [3] chr3 196-268 + | 7.49311416276986 73.8766709918397 Normal g
# [4] chr2 23-95 + | 14.7858424064134 106.321947552185 Normal w
autoplot(gr[idx], geom = "arch", aes(color = value), facets = sample ~ seqnames)
gra <- GRanges("chr1", IRanges(c(1,7,20), end = c(4,9,30)), group = c("a", "a", "b"))
gra
# GRanges object with 3 ranges and 1 metadata column:
# seqnames ranges strand | group
# <Rle> <IRanges> <Rle> | <character>
# [1] chr1 1-4 * | a
# [2] chr1 7-9 * | a
# [3] chr1 20-30 * | b
## if you desn't specify group, then group based on stepping levels, and gaps are computed without
## considering extra group method
p1 <- autoplot(gra, aes(fill = group), geom = "alignment")
## when use group method, gaps only computed for grouped intervals.
## default is group.selfish = TRUE, each group keep one row.
## in this way, group labels could be shown as y axis.
p2 <- autoplot(gra, aes(fill = group, group = group), geom = "alignment")
## group.selfish = FALSE, save space
p3 <- autoplot(gra, aes(fill = group, group = group), geom = "alignment", group.selfish = FALSE)
tracks( 'non-group' = p1, 'group.selfish = TRUE' = p2 , 'group.selfish = FALSE' = p3)
## Autoplot circle
###################################################
### code chunk number 9: gr-autoplot-circle
###################################################
gr[idx]
autoplot(gr[idx], layout = 'circle')
seqlengths(gr) <- c(400, 500, 700)
values(gr)$to.gr <- gr[sample(1:length(gr), size = length(gr))]
idx <- sample(1:length(gr), size = 50)
gr <- gr[idx]
ggplot() + layout_circle(gr, geom = "ideo", fill = "gray70", radius = 7, trackWidth = 3) +
layout_circle(gr, geom = "bar", radius = 10, trackWidth = 4,
aes(fill = score, y = score)) +
layout_circle(gr, geom = "point", color = "red", radius = 14,
trackWidth = 3, grid = TRUE, aes(y = score)) +
layout_circle(gr, geom = "link", linked.to = "to.gr", radius = 6, trackWidth = 1)
# Circular plot
ggplot() + layout_circle(gr, geom = "ideo", fill = "gray70", radius = 7, trackWidth = 3) +
layout_circle(gr, geom = "bar", radius = 10, trackWidth = 4, aes(fill = score, y = score)) +
layout_circle(gr, geom = "point", color = "red", radius = 14, trackWidth = 3, grid = TRUE, aes(y = score)) +
layout_circle(gr, geom = "link", linked.to = "to.gr", radius = 6, trackWidth = 1)
data(hg19Ideogram, package = "biovizBase")
sq <- seqinfo(hg19Ideogram)
sq
# Seqinfo object with 93 sequences from hg19 genome:
# seqnames seqlengths isCircular genome
# chr1 249250621 <NA> hg19
# chr1_gl000191_random 106433 <NA> hg19
# chr1_gl000192_random 547496 <NA> hg19
# chr2 243199373 <NA> hg19
autoplot(sq[paste0("chr", c(1:22, "X"))])
# Code chunk ir-load
set.seed(1)
N <- 100
ir <- IRanges(start = sample(1:300, size = N, replace = TRUE),
width = sample(70:75, size = N,replace = TRUE))
# IRanges object with 100 ranges and 0 metadata columns:
# start end width
# <integer> <integer> <integer>
# [1] 80 152 73
# [2] 112 183 72
# [3] 172 242 71
## add meta data
df <- DataFrame(value = rnorm(N, 10, 3), score = rnorm(N, 100, 30),
sample = sample(c("Normal", "Tumor"),
size = N, replace = TRUE),
pair = sample(letters, size = N,
replace = TRUE))
df
# DataFrame with 100 rows and 4 columns
# value score sample pair
# <numeric> <numeric> <character> <character>
# 1 8.13889996832763 112.282055189528 Tumor y
# 2 10.1263476194327 150.666198586121 Tumor x
# 3 7.26723505434266 147.597653003259 Normal t
# 4 10.4740863172122 90.072765979517 Tumor r
# 5 8.03624606824355 31.442933941226 Tumor q
# Use this data frame as metadata
values(ir) <- df
ir
# IRanges object with 100 ranges and 4 metadata columns:
# start end width | value score sample pair
# <integer> <integer> <integer> | <numeric> <numeric> <character> <character>
# [1] 80 152 73 | 8.13889996832763 112.282055189528 Tumor y
# [2] 112 183 72 | 10.1263476194327 150.666198586121 Tumor x
# [3] 172 242 71 | 7.26723505434266 147.597653003259 Normal t
p1 <- autoplot(ir)
p2 <- autoplot(ir, aes(fill = pair)) + theme(legend.position = "none")
p3 <- autoplot(ir, stat = "coverage", geom = "line", facets = sample ~. )
p4 <- autoplot(ir, stat = "reduce")
tracks(p1, p2, p3, p4)
# GRanges list simulation
set.seed(1)
N <- 100
## ======================================================================
## Simulated GRanges
## ======================================================================
gr <- GRanges(seqnames =
sample(c("chr1", "chr2", "chr3"),
size = N, replace = TRUE),
IRanges(
start = sample(1:300, size = N, replace = TRUE),
width = sample(30:40, size = N,replace = TRUE)),
strand = sample(c("+", "-", "*"), size = N,
replace = TRUE),
value = rnorm(N, 10, 3), score = rnorm(N, 100, 30),
sample = sample(c("Normal", "Tumor"),
size = N, replace = TRUE),
pair = sample(letters, size = N,
replace = TRUE))
# GRanges object with 100 ranges and 4 metadata columns:
# seqnames ranges strand | value score sample pair
# <Rle> <IRanges> <Rle> | <numeric> <numeric> <character> <character>
# [1] chr1 72-101 * | 13.2233228748337 102.319093682108 Normal n
# [2] chr1 195-233 * | 15.6869643225957 91.0939407353014 Tumor e
# [3] chr1 293-324 * | 8.19100808918472 64.5027327870199 Tumor f
# [4] chr3 114-148 + | 8.82739653783059 100.338780652717 Tumor z
grl <- split(gr, values(gr)$pair)
grl
p1 <- autoplot(grl, group.selfish = TRUE)
p2 <- autoplot(grl, group.selfish = TRUE, main.geom = "arrowrect", gap.geom = "segment")
tracks(p1, p2)
autoplot(grl, aes(fill = ..grl_name..))
## Rle simulation
set.seed(1)
lambda <- c(rep(0.001, 4500), seq(0.001, 10, length = 500),
seq(10, 0.001, length = 500))
lambda
## @knitr create
xVector <- rpois(1e4, lambda)
xRle <- Rle(xVector)
xRle
## This is useful to plot peaks, enrichemnt around TSS, etc.
p1 <- autoplot(xRle)
p2 <- autoplot(xRle, nbin = 80)
p3 <- autoplot(xRle, geom = "heatmap", nbin = 200)
tracks( 'nbin = 30'= p1, "nbin = 80" = p2, "nbin = 200(heatmap)" = p3)
## Same here enrichment around a point, different graphics
p1 <- autoplot(xRle, stat = "identity")
p2 <- autoplot(xRle, stat = "identity", geom = "point", color = "red")
tracks( 'line' = p1, "point" = p2)
# Overlayed bargraph and heatmap
p1 <- autoplot(xRle, type = "viewMaxs", stat = "slice", lower = 5)
p2 <- autoplot(xRle, type = "viewMaxs", stat = "slice", lower = 5, geom = "heatmap")
tracks( 'bar' = p1, "heatmap" = p2)
# Rle List based histograms with different bin width and heatmaps of the same overlayed
xRleList <- RleList(xRle, 2L * xRle)
xRleList
p1 <- autoplot(xRleList)
p2 <- autoplot(xRleList, nbin = 80)
p3 <- autoplot(xRleList, geom = "heatmap", nbin = 200)
tracks( 'nbin = 30' = p1, "nbin = 80" = p2, "nbin = 200(heatmap)" = p3)
# Rle List bar graphs and scatter plot overlayed
p1 <- autoplot(xRleList, stat = "identity")
p2 <- autoplot(xRleList, stat = "identity", geom = "point", color = "red")
tracks('line' = p1, "point" = p2)
# Zoomed in on the area? Bar graph and heatmap overlayed
p1 <- autoplot(xRleList, type = "viewMaxs", stat = "slice", lower = 5)
p2 <- autoplot(xRleList, type = "viewMaxs", stat = "slice", lower = 5, geom = "heatmap")
tracks('bar' = p1, "heatmap" = p2)
# Expanded transcript view and reduced view in separate panels
# Gene models obtained from
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
data(genesymbol, package = "biovizBase")
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
p1 <- autoplot(txdb, which = genesymbol["ALDOA"], names.expr = "tx_name:::gene_id")
p2 <- autoplot(txdb, which = genesymbol["ALDOA"], stat = "reduce", color = "brown",
fill = "brown")
tracks(full = p1, reduce = p2, heights = c(5, 1)) + ylab("")
library(EnsDb.Hsapiens.v75)
ensdb <- EnsDb.Hsapiens.v75
# We are using gene name filter to retrieve transcripts for the gene
p1 <- autoplot(ensdb, which = GenenameFilter("ALDOA"), names.expr = "gene_name")
p2 <- autoplot(ensdb, which = ~ genename == "ALDOA", stat = "reduce",
color = "brown", fill = "brown")
# Expanded and reduced view of ALDOA gene transcripts
tracks(full = p1, reduce = p2, heights = c(5, 1)) + ylab("")
## Alternatively we can specify GRanges filter and display all overlapping genes
gr <- GRanges(seqnames=16, IRanges(30768000, 30770000), strand="+")
autoplot(ensdb, GRangesFilter(gr, "any"), names.expr="gene_name")
## Just submitting the GRanges object also works.
autoplot(ensdb, gr, names.expr="gene_name")
## Or genes encoded on both strands
gr <- GRanges(seqnames = 16, IRanges(30768000, 30770000), strand = "*")
autoplot(ensdb, GRangesFilter(gr), names.expr="gene_name")
## Also, we can spefify directly the gene ids and plot all transcripts of these
## genes (not only those overlapping with the region)
autoplot(ensdb, GeneIdFilter(c("ENSG00000196118", "ENSG00000156873")))
######################## Loading and displaying alignments ##################################
library(GenomicAlignments)
data("genesymbol", package = "biovizBase")
# genesymbol is just GRanges object
bamfile <- system.file("extdata", "SRR027894subRBM17.bam",
package="biovizBase")
which <- keepStandardChromosomes(genesymbol["RBM17"])
## Need to set use.names = TRUE
ga <- readGAlignments(bamfile,
param = ScanBamParam(which = which),
use.names = TRUE)
ga
# Overlay of default, bar and coverage view.
p1 <- autoplot(ga)
p2 <- autoplot(ga, geom = "rect")
p3 <- autoplot(ga, geom = "line", stat = "coverage")
tracks(default = p1, rect = p2, coverage = p3)
# Plot bedGraph file
bedgraph <- "metilene_qval.0.05.bedgraph"
autoplot(bedgraph, aes(fill = score))
############################# Matrix ###################################
volcano <- volcano[20:70, 20:60] - 150
# Autoplot matrix
autoplot(volcano)
autoplot(volcano, xlab = "xlab", main = "main", ylab = "ylab")
## special scale theme for 0-centered value
autoplot(volcano, geom = "raster") + scale_fill_fold_change()
## when a matrix has colnames and rownames label them by default
colnames(volcano) <- sort(sample(1:300, size = ncol(volcano), replace = FALSE))
autoplot(volcano) + scale_fill_fold_change()
rownames(volcano) <- letters[sample(1:24, size = nrow(volcano), replace = TRUE)]
autoplot(volcano)
## even with row/col names, you could also disable it and just use numeric indel
autoplot(volcano, colnames.label = FALSE)
autoplot(volcano, rownames.label = FALSE, colnames.label = FALSE)
## don't want the axis has label??
autoplot(volcano, axis.text.x = FALSE)
autoplot(volcano, axis.text.y = FALSE)
# or totally remove axis
colnames(volcano) <- lapply(letters[sample(1:24, size = ncol(volcano),
replace = TRUE)],
function(x){
paste(rep(x, 7), collapse = "")
})
autoplot(volcano)
# Change angle to deal with overlapping labels
autoplot(volcano, axis.text.angle = -45, hjust = 0)
## when character is the value
x <- sample(c(letters[1:3], NA), size = 100, replace = TRUE)
mx <- matrix(x, nrow = 5)
autoplot(mx)
## tile gives you a white margin
rownames(mx) <- LETTERS[1:5]
autoplot(mx, color = "white")
colnames(mx) <- LETTERS[1:20]
autoplot(mx, color = "white")
autoplot(mx, color = "white", size = 2)
## weird in aes(), though works
## default tile is flexible
autoplot(mx, aes(width = 0.6, height = 0.6))
autoplot(mx, aes(width = 0.6, height = 0.6), na.value = "white")
autoplot(mx, aes(width = 0.6, height = 0.6)) + theme_clear()
lambda <- c(rep(0.001, 4500), seq(0.001, 10, length = 500),
seq(10, 0.001, length = 500))
xVector <- dnorm(1:5e3, mean = 1e3, sd = 200)
xRle <- Rle(xVector)
v1 <- Views(xRle, start = sample(.4e3:.6e3, size = 50, replace = FALSE), width =1000)
autoplot(v1)
names(v1) <- letters[sample(1:24, size = length(v1), replace = TRUE)]
autoplot(v1)
autoplot(v1, geom = "tile", aes(width = 0.5, height = 0.5))
autoplot(v1, geom = "line")
autoplot(v1, geom = "line", aes(color = row)) + theme(legend.position = "none")
autoplot(v1, geom = "line", facets = NULL)
autoplot(v1, geom = "line", facets = NULL, alpha = 0.1)
############################## Expressionset ############################################
library(Biobase)
data(sample.ExpressionSet)
sample.ExpressionSet
set.seed(1)
## select 50 features
idx <- sample(seq_len(dim(sample.ExpressionSet)[1]), size = 50)
eset <- sample.ExpressionSet[idx,]
eset
autoplot(as.matrix(pData(eset)))
## default heatmap
p1 <- autoplot(eset)
p1
p2 <- p1 + scale_fill_fold_change()
p2
autoplot(eset)
autoplot(eset, geom = "tile", color = "white", size = 2)
autoplot(eset, geom = "tile", aes(width = 0.6, height = 0.6))
autoplot(eset, pheno.plot = TRUE)
idx <- order(pData(eset)[,1])
eset2 <- eset[,idx]
autoplot(eset2, pheno.plot = TRUE)
## default heatmap
p1 <- autoplot(eset)
p2 <- p1 + scale_fill_fold_change()
p2
autoplot(eset)
autoplot(eset, geom = "tile", color = "white", size = 2)
autoplot(eset, geom = "tile", aes(width = 0.6, height = 0.6))
dev.off()
autoplot(eset, pheno.plot = TRUE)
idx <- order(pData(eset)[,1])
eset2 <- eset[,idx]
autoplot(eset2, pheno.plot = TRUE)
## parallel coordinate plot
autoplot(eset, type = "pcp")
library(SummarizedExperiment)
nrows <- 200; ncols <- 6
counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
counts2 <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)),
IRanges(floor(runif(200, 1e5, 1e6)), width=100),
strand=sample(c("+", "-"), 200, TRUE))
rowRanges
colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3),
row.names=LETTERS[1:6])
colData
sset <- SummarizedExperiment(assays=SimpleList(counts=counts,
counts2 = counts2),
rowRanges=rowRanges, colData=colData)
sset
autoplot(sset) + scale_fill_fold_change()
dev.off()
autoplot(sset, pheno.plot = TRUE)
## pcp - parallel coordinates plot
autoplot(sset, type = "pcp")
## Boxplot
autoplot(sset, type = "boxplot")
#############################################################################
## Plotting BSgenome
library(BSgenome.Hsapiens.UCSC.hg19)
data(genesymbol, package = "biovizBase")
p1 <- autoplot(Hsapiens, which = resize(genesymbol["ALDOA"], width = 50))
p1
p2 <- autoplot(Hsapiens, which = resize(genesymbol["ALDOA"], width = 50), geom = "rect")
p2
tracks(text = p1, rect = p2)
############################################################################
## Plot ranges linked to data
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(ggbio)
data(genesymbol, package = "biovizBase")
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
txdb
model <- exonsBy(txdb, by = "tx")
model17 <- subsetByOverlaps(model, genesymbol["RBM17"])
exons <- exons(txdb)
exon17 <- subsetByOverlaps(exons, genesymbol["RBM17"])
exon.new <- reduce(exon17)
values(exon.new)$sample1 <- rnorm(length(exon.new), 10, 3)
values(exon.new)$sample2 <- rnorm(length(exon.new), 10, 10)
values(exon.new)$score <- rnorm(length(exon.new))
values(exon.new)$significant <- sample(c(TRUE,FALSE), size = length(exon.new), replace = TRUE)
exon.new
# This plot shows expression values for exons same in DEXseq paper
plotRangesLinkedToData(exon.new, stat.y = c("sample1", "sample2"))
plotRangesLinkedToData(exon.new, stat.y = 1:2)
plotRangesLinkedToData(exon.new, stat.y = 1:2, size = 3, linetype = 4)
plotRangesLinkedToData(exon.new, stat.y = 1:2, size = 3, linetype = 4,
sig = "significant")
plotRangesLinkedToData(exon.new, stat.y = 1:2, size = 3, linetype = 4,
sig = "significant", sig.col = c("gray90","red"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment