Created
December 11, 2018 23:28
-
-
Save slavailn/9bf9925d8f48eb9e706f007a35bfb019 to your computer and use it in GitHub Desktop.
Going through code examples in ggbio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(GenomicRanges) | |
library(ggbio) | |
library(IRanges) | |
set.seed(1) | |
N <- 1000 | |
gr <- GRanges(seqnames = | |
sample(c("chr1", "chr2", "chr3"), | |
size = N, replace = TRUE), | |
IRanges( | |
start = sample(1:300, size = N, replace = TRUE), | |
width = sample(70:75, size = N,replace = TRUE)), | |
strand = sample(c("+", "-", "*"), size = N, | |
replace = TRUE), | |
value = rnorm(N, 10, 3), score = rnorm(N, 100, 30), | |
sample = sample(c("Normal", "Tumor"), | |
size = N, replace = TRUE), | |
pair = sample(letters, size = N, | |
replace = TRUE)) | |
gr | |
# GRanges object with 1000 ranges and 4 metadata columns: | |
# seqnames ranges strand | value score sample pair | |
# <Rle> <IRanges> <Rle> | <numeric> <numeric> <character> <character> | |
# [1] chr1 232-305 * | 8.120638567773 134.048952659746 Normal a | |
# [2] chr1 208-279 * | 10.5509299726662 133.357955351108 Tumor j | |
# [3] chr3 196-268 + | 7.49311416276986 73.8766709918397 Normal g | |
idx <- sample(1:length(gr), size = 50) | |
idx | |
# [1] 65 676 734 111 47 131 876 834 862 34 107 544 109 714 715 778 608 495 | |
# 448 35 206 336 534 752 419 670 509 776 878 420 322 889 | |
# [33] 203 786 291 553 373 270 251 922 609 79 888 853 872 687 652 530 383 822 | |
# Default granges, geom bar | |
autoplot(gr[idx]) | |
################################################### | |
### code chunk number 4: bar-default-pre | |
################################################### | |
set.seed(123) | |
gr.b <- GRanges(seqnames = "chr1", IRanges(start = seq(1, 100, by = 10), | |
width = sample(4:9, size = 10, replace = TRUE)), | |
score = rnorm(10, 10, 3), value = runif(10, 1, 100)) | |
gr.b2 <- GRanges(seqnames = "chr2", IRanges(start = seq(1, 100, by = 10), | |
width = sample(4:9, size = 10, replace = TRUE)), | |
score = rnorm(10, 10, 3), value = runif(10, 1, 100)) | |
gr.b <- c(gr.b, gr.b2) | |
head(gr.b) | |
# GRanges object with 6 ranges and 2 metadata columns: | |
# seqnames ranges strand | score value | |
# <Rle> <IRanges> <Rle> | <numeric> <numeric> | |
# [1] chr1 1-9 * | 8.31857306034336 89.0643922903109 | |
# [2] chr1 11-19 * | 9.30946753155016 69.5875372095034 | |
# [3] chr1 21-28 * | 14.6761249424474 64.4101745630614 | |
p1 <- autoplot(gr.b, geom = "bar") | |
## use value to fill the bar | |
p2 <- autoplot(gr.b, geom = "bar", aes(fill = value)) | |
tracks(default = p1, fill = p2) | |
# Plot gr object | |
gr | |
# GRanges object with 1000 ranges and 4 metadata columns: | |
# seqnames ranges strand | value score sample pair | |
# <Rle> <IRanges> <Rle> | <numeric> <numeric> <character> <character> | |
# [1] chr1 232-305 * | 8.120638567773 134.048952659746 Normal a | |
# [2] chr1 208-279 * | 10.5509299726662 133.357955351108 Tumor j | |
# [3] chr3 196-268 + | 7.49311416276986 73.8766709918397 Normal g | |
# [4] chr2 23-95 + | 14.7858424064134 106.321947552185 Normal w | |
autoplot(gr[idx], geom = "arch", aes(color = value), facets = sample ~ seqnames) | |
gra <- GRanges("chr1", IRanges(c(1,7,20), end = c(4,9,30)), group = c("a", "a", "b")) | |
gra | |
# GRanges object with 3 ranges and 1 metadata column: | |
# seqnames ranges strand | group | |
# <Rle> <IRanges> <Rle> | <character> | |
# [1] chr1 1-4 * | a | |
# [2] chr1 7-9 * | a | |
# [3] chr1 20-30 * | b | |
## if you desn't specify group, then group based on stepping levels, and gaps are computed without | |
## considering extra group method | |
p1 <- autoplot(gra, aes(fill = group), geom = "alignment") | |
## when use group method, gaps only computed for grouped intervals. | |
## default is group.selfish = TRUE, each group keep one row. | |
## in this way, group labels could be shown as y axis. | |
p2 <- autoplot(gra, aes(fill = group, group = group), geom = "alignment") | |
## group.selfish = FALSE, save space | |
p3 <- autoplot(gra, aes(fill = group, group = group), geom = "alignment", group.selfish = FALSE) | |
tracks( 'non-group' = p1, 'group.selfish = TRUE' = p2 , 'group.selfish = FALSE' = p3) | |
## Autoplot circle | |
################################################### | |
### code chunk number 9: gr-autoplot-circle | |
################################################### | |
gr[idx] | |
autoplot(gr[idx], layout = 'circle') | |
seqlengths(gr) <- c(400, 500, 700) | |
values(gr)$to.gr <- gr[sample(1:length(gr), size = length(gr))] | |
idx <- sample(1:length(gr), size = 50) | |
gr <- gr[idx] | |
ggplot() + layout_circle(gr, geom = "ideo", fill = "gray70", radius = 7, trackWidth = 3) + | |
layout_circle(gr, geom = "bar", radius = 10, trackWidth = 4, | |
aes(fill = score, y = score)) + | |
layout_circle(gr, geom = "point", color = "red", radius = 14, | |
trackWidth = 3, grid = TRUE, aes(y = score)) + | |
layout_circle(gr, geom = "link", linked.to = "to.gr", radius = 6, trackWidth = 1) | |
# Circular plot | |
ggplot() + layout_circle(gr, geom = "ideo", fill = "gray70", radius = 7, trackWidth = 3) + | |
layout_circle(gr, geom = "bar", radius = 10, trackWidth = 4, aes(fill = score, y = score)) + | |
layout_circle(gr, geom = "point", color = "red", radius = 14, trackWidth = 3, grid = TRUE, aes(y = score)) + | |
layout_circle(gr, geom = "link", linked.to = "to.gr", radius = 6, trackWidth = 1) | |
data(hg19Ideogram, package = "biovizBase") | |
sq <- seqinfo(hg19Ideogram) | |
sq | |
# Seqinfo object with 93 sequences from hg19 genome: | |
# seqnames seqlengths isCircular genome | |
# chr1 249250621 <NA> hg19 | |
# chr1_gl000191_random 106433 <NA> hg19 | |
# chr1_gl000192_random 547496 <NA> hg19 | |
# chr2 243199373 <NA> hg19 | |
autoplot(sq[paste0("chr", c(1:22, "X"))]) | |
# Code chunk ir-load | |
set.seed(1) | |
N <- 100 | |
ir <- IRanges(start = sample(1:300, size = N, replace = TRUE), | |
width = sample(70:75, size = N,replace = TRUE)) | |
# IRanges object with 100 ranges and 0 metadata columns: | |
# start end width | |
# <integer> <integer> <integer> | |
# [1] 80 152 73 | |
# [2] 112 183 72 | |
# [3] 172 242 71 | |
## add meta data | |
df <- DataFrame(value = rnorm(N, 10, 3), score = rnorm(N, 100, 30), | |
sample = sample(c("Normal", "Tumor"), | |
size = N, replace = TRUE), | |
pair = sample(letters, size = N, | |
replace = TRUE)) | |
df | |
# DataFrame with 100 rows and 4 columns | |
# value score sample pair | |
# <numeric> <numeric> <character> <character> | |
# 1 8.13889996832763 112.282055189528 Tumor y | |
# 2 10.1263476194327 150.666198586121 Tumor x | |
# 3 7.26723505434266 147.597653003259 Normal t | |
# 4 10.4740863172122 90.072765979517 Tumor r | |
# 5 8.03624606824355 31.442933941226 Tumor q | |
# Use this data frame as metadata | |
values(ir) <- df | |
ir | |
# IRanges object with 100 ranges and 4 metadata columns: | |
# start end width | value score sample pair | |
# <integer> <integer> <integer> | <numeric> <numeric> <character> <character> | |
# [1] 80 152 73 | 8.13889996832763 112.282055189528 Tumor y | |
# [2] 112 183 72 | 10.1263476194327 150.666198586121 Tumor x | |
# [3] 172 242 71 | 7.26723505434266 147.597653003259 Normal t | |
p1 <- autoplot(ir) | |
p2 <- autoplot(ir, aes(fill = pair)) + theme(legend.position = "none") | |
p3 <- autoplot(ir, stat = "coverage", geom = "line", facets = sample ~. ) | |
p4 <- autoplot(ir, stat = "reduce") | |
tracks(p1, p2, p3, p4) | |
# GRanges list simulation | |
set.seed(1) | |
N <- 100 | |
## ====================================================================== | |
## Simulated GRanges | |
## ====================================================================== | |
gr <- GRanges(seqnames = | |
sample(c("chr1", "chr2", "chr3"), | |
size = N, replace = TRUE), | |
IRanges( | |
start = sample(1:300, size = N, replace = TRUE), | |
width = sample(30:40, size = N,replace = TRUE)), | |
strand = sample(c("+", "-", "*"), size = N, | |
replace = TRUE), | |
value = rnorm(N, 10, 3), score = rnorm(N, 100, 30), | |
sample = sample(c("Normal", "Tumor"), | |
size = N, replace = TRUE), | |
pair = sample(letters, size = N, | |
replace = TRUE)) | |
# GRanges object with 100 ranges and 4 metadata columns: | |
# seqnames ranges strand | value score sample pair | |
# <Rle> <IRanges> <Rle> | <numeric> <numeric> <character> <character> | |
# [1] chr1 72-101 * | 13.2233228748337 102.319093682108 Normal n | |
# [2] chr1 195-233 * | 15.6869643225957 91.0939407353014 Tumor e | |
# [3] chr1 293-324 * | 8.19100808918472 64.5027327870199 Tumor f | |
# [4] chr3 114-148 + | 8.82739653783059 100.338780652717 Tumor z | |
grl <- split(gr, values(gr)$pair) | |
grl | |
p1 <- autoplot(grl, group.selfish = TRUE) | |
p2 <- autoplot(grl, group.selfish = TRUE, main.geom = "arrowrect", gap.geom = "segment") | |
tracks(p1, p2) | |
autoplot(grl, aes(fill = ..grl_name..)) | |
## Rle simulation | |
set.seed(1) | |
lambda <- c(rep(0.001, 4500), seq(0.001, 10, length = 500), | |
seq(10, 0.001, length = 500)) | |
lambda | |
## @knitr create | |
xVector <- rpois(1e4, lambda) | |
xRle <- Rle(xVector) | |
xRle | |
## This is useful to plot peaks, enrichemnt around TSS, etc. | |
p1 <- autoplot(xRle) | |
p2 <- autoplot(xRle, nbin = 80) | |
p3 <- autoplot(xRle, geom = "heatmap", nbin = 200) | |
tracks( 'nbin = 30'= p1, "nbin = 80" = p2, "nbin = 200(heatmap)" = p3) | |
## Same here enrichment around a point, different graphics | |
p1 <- autoplot(xRle, stat = "identity") | |
p2 <- autoplot(xRle, stat = "identity", geom = "point", color = "red") | |
tracks( 'line' = p1, "point" = p2) | |
# Overlayed bargraph and heatmap | |
p1 <- autoplot(xRle, type = "viewMaxs", stat = "slice", lower = 5) | |
p2 <- autoplot(xRle, type = "viewMaxs", stat = "slice", lower = 5, geom = "heatmap") | |
tracks( 'bar' = p1, "heatmap" = p2) | |
# Rle List based histograms with different bin width and heatmaps of the same overlayed | |
xRleList <- RleList(xRle, 2L * xRle) | |
xRleList | |
p1 <- autoplot(xRleList) | |
p2 <- autoplot(xRleList, nbin = 80) | |
p3 <- autoplot(xRleList, geom = "heatmap", nbin = 200) | |
tracks( 'nbin = 30' = p1, "nbin = 80" = p2, "nbin = 200(heatmap)" = p3) | |
# Rle List bar graphs and scatter plot overlayed | |
p1 <- autoplot(xRleList, stat = "identity") | |
p2 <- autoplot(xRleList, stat = "identity", geom = "point", color = "red") | |
tracks('line' = p1, "point" = p2) | |
# Zoomed in on the area? Bar graph and heatmap overlayed | |
p1 <- autoplot(xRleList, type = "viewMaxs", stat = "slice", lower = 5) | |
p2 <- autoplot(xRleList, type = "viewMaxs", stat = "slice", lower = 5, geom = "heatmap") | |
tracks('bar' = p1, "heatmap" = p2) | |
# Expanded transcript view and reduced view in separate panels | |
# Gene models obtained from | |
library(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
data(genesymbol, package = "biovizBase") | |
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene | |
p1 <- autoplot(txdb, which = genesymbol["ALDOA"], names.expr = "tx_name:::gene_id") | |
p2 <- autoplot(txdb, which = genesymbol["ALDOA"], stat = "reduce", color = "brown", | |
fill = "brown") | |
tracks(full = p1, reduce = p2, heights = c(5, 1)) + ylab("") | |
library(EnsDb.Hsapiens.v75) | |
ensdb <- EnsDb.Hsapiens.v75 | |
# We are using gene name filter to retrieve transcripts for the gene | |
p1 <- autoplot(ensdb, which = GenenameFilter("ALDOA"), names.expr = "gene_name") | |
p2 <- autoplot(ensdb, which = ~ genename == "ALDOA", stat = "reduce", | |
color = "brown", fill = "brown") | |
# Expanded and reduced view of ALDOA gene transcripts | |
tracks(full = p1, reduce = p2, heights = c(5, 1)) + ylab("") | |
## Alternatively we can specify GRanges filter and display all overlapping genes | |
gr <- GRanges(seqnames=16, IRanges(30768000, 30770000), strand="+") | |
autoplot(ensdb, GRangesFilter(gr, "any"), names.expr="gene_name") | |
## Just submitting the GRanges object also works. | |
autoplot(ensdb, gr, names.expr="gene_name") | |
## Or genes encoded on both strands | |
gr <- GRanges(seqnames = 16, IRanges(30768000, 30770000), strand = "*") | |
autoplot(ensdb, GRangesFilter(gr), names.expr="gene_name") | |
## Also, we can spefify directly the gene ids and plot all transcripts of these | |
## genes (not only those overlapping with the region) | |
autoplot(ensdb, GeneIdFilter(c("ENSG00000196118", "ENSG00000156873"))) | |
######################## Loading and displaying alignments ################################## | |
library(GenomicAlignments) | |
data("genesymbol", package = "biovizBase") | |
# genesymbol is just GRanges object | |
bamfile <- system.file("extdata", "SRR027894subRBM17.bam", | |
package="biovizBase") | |
which <- keepStandardChromosomes(genesymbol["RBM17"]) | |
## Need to set use.names = TRUE | |
ga <- readGAlignments(bamfile, | |
param = ScanBamParam(which = which), | |
use.names = TRUE) | |
ga | |
# Overlay of default, bar and coverage view. | |
p1 <- autoplot(ga) | |
p2 <- autoplot(ga, geom = "rect") | |
p3 <- autoplot(ga, geom = "line", stat = "coverage") | |
tracks(default = p1, rect = p2, coverage = p3) | |
# Plot bedGraph file | |
bedgraph <- "metilene_qval.0.05.bedgraph" | |
autoplot(bedgraph, aes(fill = score)) | |
############################# Matrix ################################### | |
volcano <- volcano[20:70, 20:60] - 150 | |
# Autoplot matrix | |
autoplot(volcano) | |
autoplot(volcano, xlab = "xlab", main = "main", ylab = "ylab") | |
## special scale theme for 0-centered value | |
autoplot(volcano, geom = "raster") + scale_fill_fold_change() | |
## when a matrix has colnames and rownames label them by default | |
colnames(volcano) <- sort(sample(1:300, size = ncol(volcano), replace = FALSE)) | |
autoplot(volcano) + scale_fill_fold_change() | |
rownames(volcano) <- letters[sample(1:24, size = nrow(volcano), replace = TRUE)] | |
autoplot(volcano) | |
## even with row/col names, you could also disable it and just use numeric indel | |
autoplot(volcano, colnames.label = FALSE) | |
autoplot(volcano, rownames.label = FALSE, colnames.label = FALSE) | |
## don't want the axis has label?? | |
autoplot(volcano, axis.text.x = FALSE) | |
autoplot(volcano, axis.text.y = FALSE) | |
# or totally remove axis | |
colnames(volcano) <- lapply(letters[sample(1:24, size = ncol(volcano), | |
replace = TRUE)], | |
function(x){ | |
paste(rep(x, 7), collapse = "") | |
}) | |
autoplot(volcano) | |
# Change angle to deal with overlapping labels | |
autoplot(volcano, axis.text.angle = -45, hjust = 0) | |
## when character is the value | |
x <- sample(c(letters[1:3], NA), size = 100, replace = TRUE) | |
mx <- matrix(x, nrow = 5) | |
autoplot(mx) | |
## tile gives you a white margin | |
rownames(mx) <- LETTERS[1:5] | |
autoplot(mx, color = "white") | |
colnames(mx) <- LETTERS[1:20] | |
autoplot(mx, color = "white") | |
autoplot(mx, color = "white", size = 2) | |
## weird in aes(), though works | |
## default tile is flexible | |
autoplot(mx, aes(width = 0.6, height = 0.6)) | |
autoplot(mx, aes(width = 0.6, height = 0.6), na.value = "white") | |
autoplot(mx, aes(width = 0.6, height = 0.6)) + theme_clear() | |
lambda <- c(rep(0.001, 4500), seq(0.001, 10, length = 500), | |
seq(10, 0.001, length = 500)) | |
xVector <- dnorm(1:5e3, mean = 1e3, sd = 200) | |
xRle <- Rle(xVector) | |
v1 <- Views(xRle, start = sample(.4e3:.6e3, size = 50, replace = FALSE), width =1000) | |
autoplot(v1) | |
names(v1) <- letters[sample(1:24, size = length(v1), replace = TRUE)] | |
autoplot(v1) | |
autoplot(v1, geom = "tile", aes(width = 0.5, height = 0.5)) | |
autoplot(v1, geom = "line") | |
autoplot(v1, geom = "line", aes(color = row)) + theme(legend.position = "none") | |
autoplot(v1, geom = "line", facets = NULL) | |
autoplot(v1, geom = "line", facets = NULL, alpha = 0.1) | |
############################## Expressionset ############################################ | |
library(Biobase) | |
data(sample.ExpressionSet) | |
sample.ExpressionSet | |
set.seed(1) | |
## select 50 features | |
idx <- sample(seq_len(dim(sample.ExpressionSet)[1]), size = 50) | |
eset <- sample.ExpressionSet[idx,] | |
eset | |
autoplot(as.matrix(pData(eset))) | |
## default heatmap | |
p1 <- autoplot(eset) | |
p1 | |
p2 <- p1 + scale_fill_fold_change() | |
p2 | |
autoplot(eset) | |
autoplot(eset, geom = "tile", color = "white", size = 2) | |
autoplot(eset, geom = "tile", aes(width = 0.6, height = 0.6)) | |
autoplot(eset, pheno.plot = TRUE) | |
idx <- order(pData(eset)[,1]) | |
eset2 <- eset[,idx] | |
autoplot(eset2, pheno.plot = TRUE) | |
## default heatmap | |
p1 <- autoplot(eset) | |
p2 <- p1 + scale_fill_fold_change() | |
p2 | |
autoplot(eset) | |
autoplot(eset, geom = "tile", color = "white", size = 2) | |
autoplot(eset, geom = "tile", aes(width = 0.6, height = 0.6)) | |
dev.off() | |
autoplot(eset, pheno.plot = TRUE) | |
idx <- order(pData(eset)[,1]) | |
eset2 <- eset[,idx] | |
autoplot(eset2, pheno.plot = TRUE) | |
## parallel coordinate plot | |
autoplot(eset, type = "pcp") | |
library(SummarizedExperiment) | |
nrows <- 200; ncols <- 6 | |
counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows) | |
counts2 <- matrix(runif(nrows * ncols, 1, 1e4), nrows) | |
rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)), | |
IRanges(floor(runif(200, 1e5, 1e6)), width=100), | |
strand=sample(c("+", "-"), 200, TRUE)) | |
rowRanges | |
colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3), | |
row.names=LETTERS[1:6]) | |
colData | |
sset <- SummarizedExperiment(assays=SimpleList(counts=counts, | |
counts2 = counts2), | |
rowRanges=rowRanges, colData=colData) | |
sset | |
autoplot(sset) + scale_fill_fold_change() | |
dev.off() | |
autoplot(sset, pheno.plot = TRUE) | |
## pcp - parallel coordinates plot | |
autoplot(sset, type = "pcp") | |
## Boxplot | |
autoplot(sset, type = "boxplot") | |
############################################################################# | |
## Plotting BSgenome | |
library(BSgenome.Hsapiens.UCSC.hg19) | |
data(genesymbol, package = "biovizBase") | |
p1 <- autoplot(Hsapiens, which = resize(genesymbol["ALDOA"], width = 50)) | |
p1 | |
p2 <- autoplot(Hsapiens, which = resize(genesymbol["ALDOA"], width = 50), geom = "rect") | |
p2 | |
tracks(text = p1, rect = p2) | |
############################################################################ | |
## Plot ranges linked to data | |
library(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
library(ggbio) | |
data(genesymbol, package = "biovizBase") | |
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene | |
txdb | |
model <- exonsBy(txdb, by = "tx") | |
model17 <- subsetByOverlaps(model, genesymbol["RBM17"]) | |
exons <- exons(txdb) | |
exon17 <- subsetByOverlaps(exons, genesymbol["RBM17"]) | |
exon.new <- reduce(exon17) | |
values(exon.new)$sample1 <- rnorm(length(exon.new), 10, 3) | |
values(exon.new)$sample2 <- rnorm(length(exon.new), 10, 10) | |
values(exon.new)$score <- rnorm(length(exon.new)) | |
values(exon.new)$significant <- sample(c(TRUE,FALSE), size = length(exon.new), replace = TRUE) | |
exon.new | |
# This plot shows expression values for exons same in DEXseq paper | |
plotRangesLinkedToData(exon.new, stat.y = c("sample1", "sample2")) | |
plotRangesLinkedToData(exon.new, stat.y = 1:2) | |
plotRangesLinkedToData(exon.new, stat.y = 1:2, size = 3, linetype = 4) | |
plotRangesLinkedToData(exon.new, stat.y = 1:2, size = 3, linetype = 4, | |
sig = "significant") | |
plotRangesLinkedToData(exon.new, stat.y = 1:2, size = 3, linetype = 4, | |
sig = "significant", sig.col = c("gray90","red")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment