Skip to content

Instantly share code, notes, and snippets.

@PeteHaitch
Created August 27, 2014 00:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PeteHaitch/fdb66d360446ff96ed4b to your computer and use it in GitHub Desktop.
Save PeteHaitch/fdb66d360446ff96ed4b to your computer and use it in GitHub Desktop.
Reproducible example for question on Bioc-Devel https://stat.ethz.ch/pipermail/bioc-devel/2014-August/006106.html
## Necessary packages (BioC-devel)
library(GenomicRanges)
library(S4Vectors)
## Class A using a DataFrameOrNULL in internalPos slot
setClassUnion(name = "DataFrameOrNULL", members = c("DataFrame", "NULL"))
setClass("A",
contains = "GRanges",
representation(
internalPos = "DataFrameOrNULL",
size = "integer"),
prototype(
internalPos = NULL,
size = NA_integer_)
)
setMethod(GenomicRanges:::extraColumnSlotNames, "A",
function(x) {
c("internalPos")
})
A <- function(seqnames = Rle(), tuples = matrix(),
strand = Rle("*", length(seqnames)), ...,
seqlengths = NULL, seqinfo = NULL) {
# Get size of tuples
if (all(is.na(tuples))) {
size <- NA_integer_
} else {
size <- ncol(tuples)
}
# Create IRanges
if (!is.na(size)) {
ranges <- IRanges(start = tuples[, 1], end = tuples[, size])
} else {
ranges <- IRanges()
}
# Create internalPos
if (is.na(size) || size < 3) {
internalPos <- NULL
} else {
internalPos <- DataFrame(tuples[, seq(from = 2L, to = size - 1, by = 1L),
drop = FALSE])
}
# Create GRanges
gr <- GRanges(seqnames = seqnames, ranges = ranges, strand = strand,
seqlengths = seqlengths, seqinfo = seqinfo, ...)
new("A", gr, internalPos = internalPos, size = size)
}
# Class B using matrixOrNULL in internalPos slot
setClassUnion(name = "matrixOrNULL", members = c("matrix", "NULL"))
setClass("B",
contains = "GRanges",
representation(
internalPos = "matrixOrNULL",
size = "integer"),
prototype(
internalPos = NULL,
size = NA_integer_)
)
setMethod(GenomicRanges:::extraColumnSlotNames, "B",
function(x) {
c("internalPos")
})
B <- function(seqnames = Rle(), tuples = matrix(),
strand = Rle("*", length(seqnames)), ...,
seqlengths = NULL, seqinfo = NULL) {
# Get size of tuples
if (all(is.na(tuples))) {
size <- NA_integer_
} else {
size <- ncol(tuples)
}
# Create IRanges
if (!is.na(size)) {
ranges <- IRanges(start = tuples[, 1], end = tuples[, size])
} else {
ranges <- IRanges()
}
# Create internalPos
if (is.na(size) || size < 3) {
internalPos <- NULL
} else {
internalPos <- tuples[, seq(from = 2L, to = size - 1, by = 1L),
drop = FALSE]
}
# Create GRanges
gr <- GRanges(seqnames = seqnames, ranges = ranges, strand = strand,
seqlengths = seqlengths, seqinfo = seqinfo, ...)
new("B", gr, internalPos = internalPos, size = size)
}
# Function to make two tuples object of class A or B of given size and replace
# an element of the first with an element of the second to create a third
# object. Essentially, make a call to S4Vectors::replaceROWS with each class.
# If size < 3 then internalPos is NULL otherwise it is a DataFrame
# (resp. matrix) with "size - 2" columns.
f <- function(size = 3, class = c("A", "B")) {
seqinfo <- Seqinfo(paste0("chr", 1:3), c(1000, 2000, 1500), NA, "mock1")
if (class == "A") {
# Create object of class A
x <- A(seqnames = Rle('chr1', 10),
tuples = matrix(seq(1, size * 10), ncol = size), seqinfo = seqinfo)
# Create another object of class A
value <- A(seqnames = Rle('chr3', 1),
tuples = matrix(seq(size * 10 + 1, size * 10 + size), ncol = size),
seqinfo = seqinfo)
# Try to replace the first element of a with aa
xx <- replaceROWS(x, 1, value)
} else if (class == "B") {
# Create object of class B
x <- B(seqnames = Rle('chr1', 1),
tuples = matrix(seq(1, size * 10), ncol = size), seqinfo = seqinfo)
# Create another object of class A
value <- B(seqnames = Rle('chr3', 1),
tuples = matrix(seq(size * 10 + 1, size * 10 + size), ncol = size),
seqinfo = seqinfo)
# Try to replace the first element of a with aa
xx <- replaceROWS(x, 1, value)
}
return(xx)
}
## Define a replaceROWS function with signature NULL
## Perhaps more generally useful than my package?
setMethod("replaceROWS",
"NULL",
function(x, i, value) {
NULL
}
)
## Class A - works regardless of size. By this I mean replaceROWS also updates
# the internalPos slot.
# NB: the show method (which is inherited from GenomicRanges) will fail when
# size < 3 but this isn't important here. Use str() to see that the object
# was indeed properly updated.
a1 <- f(1, "A")
a1@internalPos
a2 <- f(2, "A")
a2@internalPos
# Works if size > 2. By this I mean it also updates the internalPos slot.
a3 <- f(3, "A")
a3@internalPos
a4 <- f(4, "A")
a4@internalPos
## Class B - works if size < 3, fails otherwise.
# NB: the show method (which is inherited from GenomicRanges) will fail when
# size < 3 but this isn't important here. Use str() to see that the object
# was indeed properly updated.
b1 <- f(1, "B")
b1@internalPos
b2 <- f(2, "B")
b2@internalPos
# Fails if size = 3 due to "ans_ecs <- GenomicRanges:::extraColumnSlotsAsDF(x)"
# in the replaceROWS method for B (which is inherited from GenomicRanges).
# This means that the subsequent call to
# "update(x, seqnames = ans_seqnames, ranges = ans_ranges, strand = ans_strand,
# elementMetadata = ans_mcols, .slotList = as.list(ans_ecs))" in replaceROWS
# gets a DataFrame, rather than a matrix, for the internalPos slot and
# causes the error.
b3 <- f(3, "B")
# Fails with a slightly different error if size > 3, which is again due to
# "ans_ecs <- GenomicRanges:::extraColumnSlotsAsDF(x)" in the replaceROWS
# method for B (which is inherited from GenomicRanges). The difference being
# that it fails at the subsequent call to
# "ans_ecs <- replaceROWS(ans_ecs, i, value_ecs[seq_len(ans_necs)])" in
# replaceROWS
b4 <- f(4, "B")
## What I could do:
# (1): Just use class A, i.e. a DataFrame for internalPos, since this works
# out-of-the-box.
# (2) : I could define a replaceROWS method for class B but it would be nice
# for it to work via inheritance to GenomicRanges when the subclass has a
# matrix in a slot that is an extraColumnSlots (unless this is not allowed).
sessionInfo()
#
# R version 3.1.1 (2014-07-10)
# Platform: x86_64-apple-darwin13.1.0 (64-bit)
#
# locale:
# [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
#
# attached base packages:
# [1] parallel stats graphics grDevices utils datasets methods
# [8] base
#
# other attached packages:
# [1] GenomicRanges_1.17.35 GenomeInfoDb_1.1.18 IRanges_1.99.24
# [4] S4Vectors_0.1.2 BiocGenerics_0.11.4
#
# loaded via a namespace (and not attached):
# [1] packrat_0.4.0.12 stats4_3.1.1 tools_3.1.1 XVector_0.5.7
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment