Skip to content

Instantly share code, notes, and snippets.

@mdozmorov
Created November 27, 2020 01:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mdozmorov/edc359ea0ed6ec44d484899363a601fc to your computer and use it in GitHub Desktop.
Save mdozmorov/edc359ea0ed6ec44d484899363a601fc to your computer and use it in GitHub Desktop.
How to liftOver Paired BED data
# How to liftOver Paired BED data
library(dplyr)
library(tidyr)
library(readxl)
library(rtracklayer)
# Landscape of Cohesin-Mediated Chromatin Loops in the Human Genome
# https://www.nature.com/articles/s41586-020-2151-x
# Supplementary Table 4 | Pan-cell type cohesin-mediated chromatin loops, hg19 coordinates, paired BED data
url1 <- "https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-020-2151-x/MediaObjects/41586_2020_2151_MOESM5_ESM.xlsx"
fileNameIn1 <- "loops.xlsx" # Download Excel data
download.file(url1, fileNameIn1)
mtx <- read_xlsx(fileNameIn1, skip = 4) # Read it in
mtx <- mtx[, 1:6] # First 6 columns are coordinates
colnames(mtx) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2")
# Save as BEDPE
mtx.bedpe <- mtx %>% mutate(unite(as.data.frame(mtx[, 1:6]), col = "name", sep = ":"),
score = ".", strand1 = ".", strand2 = ".")
fileNameOut1 <- "mtx.bedpe"
write.table(as.data.frame(mtx.bedpe), fileNameOut1, sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE)
# hg19 to hg38 conversion chain
url2 <- "http://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz"
fileNameIn2 <- "hg19ToHg38.over.chain.gz"
download.file(url2, fileNameIn2)
system(paste0("gzip -d ", fileNameIn2))
fileNameIn2 <- "hg19ToHg38.over.chain"
ch <- import.chain(fileNameIn2)
# Attempt to liftOver Pairs object
# Import from file
read12 <- import(fileNameOut1, format = "bedpe")
mtx.bedpe.hg38 <- liftOver(read12, ch) # Fails
# Attempt to liftOver individual GRanges, then merge
# Select first read
read1 <- mtx[, 1:3]
colnames(read1) <- c("chr", "start", "end")
# Add metadata identifier column - combined coordinates of both reads
read1 <- read1 %>% mutate(unite(as.data.frame(mtx[, 1:6]), col = "name", sep = ":"))
# GRanges for the first read
GR.read1 <- makeGRangesFromDataFrame(read1)
GR.read1.hg38 <- liftOver(GR.read1, ch)
GR.read1.hg38 %>% unlist() # Metadata missing, can't merge
# Python2 version of liftOver individual reads, then merge
# https://github.com/dphansti/liftOverBedpe
# python liftOverBedpe.py --lift liftOver --chain hg19ToHg38.over.chain --i mtx.bedpe --o mtxhg38.bedpe
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment