Created
March 11, 2022 13:14
-
-
Save mikelove/6951dd3274ef54da8780a09d550703a5 to your computer and use it in GitHub Desktop.
Variant overlapping peaks example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Variants and peaks coverage | |
# Michael Love | |
# March 11, 2022 | |
library(readr) | |
library(dplyr) | |
library(tidyr) | |
library(plyranges) | |
# read_narrowpeaks -> just works | |
p <- read_narrowpeaks("485_peaks_blacklistFilter.narrowPeak", genome="hg19") | |
# custom import code for the variants table | |
v0 <- read_tsv("all.var.tsv") | |
v <- v0 %>% | |
select(seqnames=chr, start=hg19_pos, | |
name=Orig_name, lead=Lead, | |
rsid, alt, ref, n_alt, lookup_code) %>% | |
mutate(width=1) %>% # granges needs a 'width' or 'end' | |
drop_na(start) %>% # variants that don't have hg19 pos | |
as_granges() # data.frame -> GRanges conversion | |
genome(v) <- "hg19" # helps avoid build inconsistency errors | |
# add peak data as additional columns, somewhat a hack | |
p_trim <- p %>% select(peak=name) %>% | |
mutate(peak_start=start, peak_width=width) | |
# filter lead only, remove lead column, overlap with peaks | |
o <- v %>% filter(lead==1) %>% | |
select(-lead) %>% | |
join_overlap_inner(p_trim) | |
# now we have the start and width of the overlapping peak, | |
# so we can compute relative position of the variant | |
o <- o %>% mutate(rel_pos = start - peak_start + 1, | |
rel_frac = rel_pos / peak_width) | |
# sanity check | |
all(o$rel_pos > 0) | |
all(o$rel_frac > 0) | |
all(o$rel_frac <= 1) | |
# histogram of rel_frac | |
library(ggplot2) | |
dat <- o %>% as.data.frame() %>% tibble() | |
ggplot(dat, aes(rel_frac)) + geom_histogram() | |
# histogram of rel_frac ~ peak_width | |
range(dat$peak_width) | |
dat <- dat %>% | |
mutate(width_bin = cut(peak_width, | |
breaks=c(200,300,400,500, | |
1000,2000,4000), | |
include.lowest=TRUE)) | |
dat %>% ggplot(aes(rel_frac)) + | |
geom_histogram(bins=15) + | |
facet_wrap(~width_bin) | |
dat %>% ggplot(aes(rel_frac)) + | |
geom_histogram(aes(y=..density..), bins=15) + | |
geom_density(color="red", size=1.5) + | |
facet_wrap(~width_bin) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment