Skip to content

Instantly share code, notes, and snippets.

@jokergoo
Last active September 12, 2022 18:27
Show Gist options
  • Save jokergoo/4161997c2a60ac90ae57017b19aea81e to your computer and use it in GitHub Desktop.
Save jokergoo/4161997c2a60ac90ae57017b19aea81e to your computer and use it in GitHub Desktop.
Visualize lift over between two assemblies
library(rtracklayer)
library(circlize)
library(EnrichedHeatmap)
viz_lift_over = function(from = "hg19", to = "hg38",
from_chromosomes = NULL, to_chromosomes = NULL,
window_size = 5000, links_to_sample = 10000) {
to2 = to
substr(to2, 1, 1) = toupper(substr(to2, 1, 1))
chain_file = qq("https://hgdownload.soe.ucsc.edu/gbdb/@{from}/liftOver/@{from}To@{to2}.over.chain.gz")
chain_file_basename = basename(chain_file)
tmp_dir = tempdir()
local_file = qq("@{tmp_dir}/@{chain_file_basename}")
download.file(chain_file, destfile = local_file)
system(qq("gzip -d -f '@{local_file}'"))
chain = import.chain(gsub("\\.gz$", "", local_file))
genome_df = read.chromInfo(species = from)$df
genome_gr = GRanges(seqnames = genome_df[, 1], ranges = IRanges(genome_df[, 2]+1, genome_df[, 3]))
genome_bins = makeWindows(genome_gr, w = window_size)
to_bins = liftOver(genome_bins, chain)
to_bins = reduce(to_bins, min = 500)
n = sapply(start(to_bins), length)
l = n > 0
to_bins = to_bins[l]
genome_bins = genome_bins[l]
# if map to multiple regions in `to`, use the first one
sq = seqnames(to_bins)
pa = PartitioningByEnd(sq)
df_to = data.frame(
chr = as.vector(unlist(sq))[start(pa)],
start = sapply(start(to_bins), function(x) x[1]),
end = sapply(end(to_bins), function(x) x[1])
)
df_from = data.frame(
chr = as.vector(seqnames(genome_bins)),
start = start(genome_bins),
end = end(genome_bins)
)
if(is.null(from_chromosomes)) {
from_chromosomes = circlize:::usable_chromosomes(from)
}
if(is.null(to_chromosomes)) {
to_chromosomes = circlize:::usable_chromosomes(to)
}
l = df_from[, 1] %in% from_chromosomes & df_to[, 1] %in% to_chromosomes
df_from = df_from[l, ]
df_to = df_to[l, ]
from_chromInfo = read.chromInfo(species = from)$df
from_chromInfo = from_chromInfo[from_chromInfo[, 1] %in% from_chromosomes, ]
to_chromInfo = read.chromInfo(species = to)$df
to_chromInfo = to_chromInfo[to_chromInfo[, 1] %in% to_chromosomes, ]
from_chromInfo[ ,1] = paste0("from_", from_chromInfo[, 1])
to_chromInfo[ ,1] = paste0("to_", to_chromInfo[, 1])
chromInfo = rbind(from_chromInfo, to_chromInfo)
chromosome.index = c(paste0("from_", from_chromosomes), paste0("to_", rev(to_chromosomes)))
chromInfo[, 1] = factor(chromInfo[ ,1], levels = chromosome.index)
df_from[, 1] = paste0("from_", df_from[, 1])
df_to[, 1] = paste0("to_", df_to[, 1])
# `to` genome is put on the top of the circle, where the x-axis is reversed
gsize = structure(chromInfo[, 3], names = as.vector(chromInfo[, 1]))
df_to[, 2] = gsize[df_to[, 1]] - df_to[, 2]
df_to[, 3] = gsize[df_to[, 1]] - df_to[, 3]
df_to[, 2:3] = df_to[, 3:2]
circos.clear()
circos.par(gap.after = c(rep(1, length(from_chromosomes) - 1), 5, rep(1, length(to_chromosomes) - 1), 5))
circos.genomicInitialize(chromInfo, plotType = NULL)
circos.track(ylim = c(0, 1), panel.fun = function(x, y) {
circos.text(CELL_META$xcenter, CELL_META$ylim[2] + mm_y(2),
gsub(".*chr", "", CELL_META$sector.index), cex = 0.6, niceFacing = TRUE)
}, track.height = mm_h(1), cell.padding = c(0, 0, 0, 0), bg.border = NA)
circos.track(ylim = c(0, 1), cell.padding = c(0, 0, 0, 0), bg.border = NA, bg.col = "grey", track.height = mm_h(1))
highlight.chromosome(paste0("from_", from_chromosomes),
col = "red", track.index = 1)
highlight.chromosome(paste0("to_", to_chromosomes),
col = "blue", track.index = 1)
if(nrow(df_from) > links_to_sample) {
ind = sample(nrow(df_from), links_to_sample)
} else {
ind = 1:nrow(df_from)
}
col = rand_color(length(to_chromosomes))
names(col) = paste0("to_", to_chromosomes)
circos.genomicLink(df_from[ind, ], df_to[ind, ], col = add_transparency(col[df_to[ind, 1]], 0.9))
set.current.cell(sector.index = paste0("from_", from_chromosomes[round(length(from_chromosomes)*1/5)]), track.index = 1)
circos.arrow(CELL_META$xlim[1], CELL_META$xlim[1] + mm_x(20),
y = 10, col = "grey")
set.current.cell(sector.index = paste0("to_", to_chromosomes[round(length(to_chromosomes)*1/5)]), track.index = 1)
circos.arrow(CELL_META$xlim[2], CELL_META$xlim[2]- mm_x(20),
y = 10, col = "grey")
circos.clear()
text(0.9, -0.9, from)
text(0.9, 0.9, to)
}
options(timeout = 60000)
pdf("lift_over_between_hg19_and_hg38.pdf", width = 10, height = 10)
viz_lift_over("hg19", "hg38")
dev.off()
pdf("lift_over_between_hg19_and_mm10.pdf", width = 10, height = 10)
viz_lift_over("hg19", "mm10")
dev.off()
pdf("lift_over_between_mm9_and_mm10.pdf", width = 10, height = 10)
viz_lift_over("mm9", "mm10")
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment