Skip to content

Instantly share code, notes, and snippets.

@mikelove
Last active May 11, 2024 11:05
Show Gist options
  • Save mikelove/d9cdc90dc4fae7c47112e939260cee4e to your computer and use it in GitHub Desktop.
Save mikelove/d9cdc90dc4fae7c47112e939260cee4e to your computer and use it in GitHub Desktop.
Example code for joining two range sets where second one is also attached to an SE
library(SummarizedExperiment)
library(plyranges)
# example data
m <- matrix(rnorm(600), nrow=100)
r1 <- data.frame(seqnames=1, start=1:50 * 100 + 2501,
width=5, id1=paste0("u",formatC(1:50,width=3,flag="0"))) |>
as_granges()
r2 <- data.frame(seqnames=1, start=1:100 * 100 + 1,
width=5, id2=paste0("v",formatC(1:100,width=3,flag="0"))) |>
as_granges()
rownames(m) <- r2$id2
colnames(m) <- paste0("sample",1:6)
# this matrix is associated wit r2 (and regions id2)
se <- SummarizedExperiment(list(counts=m),
rowRanges=r2,
colData=DataFrame(z=1:6))
# the function should do someting like this:
# join based on ranges
olap <- r1 |>
join_overlap_inner(r2, maxgap=100)
# expand the SE
olap_se <- se[olap$id2,] # contains duplicate rows sometimes
rowData(olap_se)$id1 <- olap$id1 # add IDs from the query ranges
rowData(olap_se)$combined <- with(rowData(olap_se), paste(id1, id2, sep="-"))
library(dplyr)
# compute something, e.g. pick the range with top variance in the assay data
idx <- olap_se |>
rowData() |>
as_tibble() |>
mutate(variance = rowVars(assay(olap_se))) |>
group_by(id1) |>
slice(which.max(variance)) |>
ungroup() |>
arrange(id1)
# subset to an SE that now matches the size of query ranges
final_se <- olap_se[ match(idx$combined, rowData(olap_se)$combined), ]
library(SummarizedExperiment)
library(plyranges)
# example data
m <- matrix(rnorm(600), nrow=100)
r1 <- data.frame(seqnames=1, start=1:50 * 100 + 2501,
width=5, id1=paste0("u",formatC(1:50,width=3,flag="0"))) |>
as_granges()
r2 <- data.frame(seqnames=1, start=1:100 * 100 + 1,
width=5, id2=paste0("v",formatC(1:100,width=3,flag="0"))) |>
as_granges()
rownames(m) <- r2$id2
colnames(m) <- paste0("sample",1:6)
# this matrix is associated wit r2 (and regions id2)
se <- SummarizedExperiment(list(counts=m),
rowRanges=r2,
colData=DataFrame(z=1:6))
# the function should do someting like this:
# join based on ranges
olap <- r1 |>
join_overlap_inner(r2, maxgap=100)
# expand the SE
olap_se <- se[olap$id2,] # contains duplicate rows sometimes
rowData(olap_se)$id1 <- olap$id1 # add IDs from the query ranges
rowData(olap_se)$combined <- with(rowData(olap_se), paste(id1, id2, sep="-"))
library(dplyr)
library(tidyr)
library(purrr)
rowData(olap_se) |>
as_tibble() |>
select(id1) |>
bind_cols(assay(olap_se)) |>
nest(data = -id1) |>
mutate(row_ave = map(data, \(mat) {
colMeans(mat)
})) |>
pull(row_ave) |>
bind_rows() |>
as.matrix()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment