Created
November 27, 2020 01:33
-
-
Save mdozmorov/edc359ea0ed6ec44d484899363a601fc to your computer and use it in GitHub Desktop.
How to liftOver Paired BED data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# How to liftOver Paired BED data | |
library(dplyr) | |
library(tidyr) | |
library(readxl) | |
library(rtracklayer) | |
# Landscape of Cohesin-Mediated Chromatin Loops in the Human Genome | |
# https://www.nature.com/articles/s41586-020-2151-x | |
# Supplementary Table 4 | Pan-cell type cohesin-mediated chromatin loops, hg19 coordinates, paired BED data | |
url1 <- "https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-020-2151-x/MediaObjects/41586_2020_2151_MOESM5_ESM.xlsx" | |
fileNameIn1 <- "loops.xlsx" # Download Excel data | |
download.file(url1, fileNameIn1) | |
mtx <- read_xlsx(fileNameIn1, skip = 4) # Read it in | |
mtx <- mtx[, 1:6] # First 6 columns are coordinates | |
colnames(mtx) <- c("chrom1", "start1", "end1", "chrom2", "start2", "end2") | |
# Save as BEDPE | |
mtx.bedpe <- mtx %>% mutate(unite(as.data.frame(mtx[, 1:6]), col = "name", sep = ":"), | |
score = ".", strand1 = ".", strand2 = ".") | |
fileNameOut1 <- "mtx.bedpe" | |
write.table(as.data.frame(mtx.bedpe), fileNameOut1, sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE) | |
# hg19 to hg38 conversion chain | |
url2 <- "http://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz" | |
fileNameIn2 <- "hg19ToHg38.over.chain.gz" | |
download.file(url2, fileNameIn2) | |
system(paste0("gzip -d ", fileNameIn2)) | |
fileNameIn2 <- "hg19ToHg38.over.chain" | |
ch <- import.chain(fileNameIn2) | |
# Attempt to liftOver Pairs object | |
# Import from file | |
read12 <- import(fileNameOut1, format = "bedpe") | |
mtx.bedpe.hg38 <- liftOver(read12, ch) # Fails | |
# Attempt to liftOver individual GRanges, then merge | |
# Select first read | |
read1 <- mtx[, 1:3] | |
colnames(read1) <- c("chr", "start", "end") | |
# Add metadata identifier column - combined coordinates of both reads | |
read1 <- read1 %>% mutate(unite(as.data.frame(mtx[, 1:6]), col = "name", sep = ":")) | |
# GRanges for the first read | |
GR.read1 <- makeGRangesFromDataFrame(read1) | |
GR.read1.hg38 <- liftOver(GR.read1, ch) | |
GR.read1.hg38 %>% unlist() # Metadata missing, can't merge | |
# Python2 version of liftOver individual reads, then merge | |
# https://github.com/dphansti/liftOverBedpe | |
# python liftOverBedpe.py --lift liftOver --chain hg19ToHg38.over.chain --i mtx.bedpe --o mtxhg38.bedpe |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment