Skip to content

Instantly share code, notes, and snippets.

@mikelove
Created March 11, 2022 13:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikelove/6951dd3274ef54da8780a09d550703a5 to your computer and use it in GitHub Desktop.
Save mikelove/6951dd3274ef54da8780a09d550703a5 to your computer and use it in GitHub Desktop.
Variant overlapping peaks example
# Variants and peaks coverage
# Michael Love
# March 11, 2022
library(readr)
library(dplyr)
library(tidyr)
library(plyranges)
# read_narrowpeaks -> just works
p <- read_narrowpeaks("485_peaks_blacklistFilter.narrowPeak", genome="hg19")
# custom import code for the variants table
v0 <- read_tsv("all.var.tsv")
v <- v0 %>%
select(seqnames=chr, start=hg19_pos,
name=Orig_name, lead=Lead,
rsid, alt, ref, n_alt, lookup_code) %>%
mutate(width=1) %>% # granges needs a 'width' or 'end'
drop_na(start) %>% # variants that don't have hg19 pos
as_granges() # data.frame -> GRanges conversion
genome(v) <- "hg19" # helps avoid build inconsistency errors
# add peak data as additional columns, somewhat a hack
p_trim <- p %>% select(peak=name) %>%
mutate(peak_start=start, peak_width=width)
# filter lead only, remove lead column, overlap with peaks
o <- v %>% filter(lead==1) %>%
select(-lead) %>%
join_overlap_inner(p_trim)
# now we have the start and width of the overlapping peak,
# so we can compute relative position of the variant
o <- o %>% mutate(rel_pos = start - peak_start + 1,
rel_frac = rel_pos / peak_width)
# sanity check
all(o$rel_pos > 0)
all(o$rel_frac > 0)
all(o$rel_frac <= 1)
# histogram of rel_frac
library(ggplot2)
dat <- o %>% as.data.frame() %>% tibble()
ggplot(dat, aes(rel_frac)) + geom_histogram()
# histogram of rel_frac ~ peak_width
range(dat$peak_width)
dat <- dat %>%
mutate(width_bin = cut(peak_width,
breaks=c(200,300,400,500,
1000,2000,4000),
include.lowest=TRUE))
dat %>% ggplot(aes(rel_frac)) +
geom_histogram(bins=15) +
facet_wrap(~width_bin)
dat %>% ggplot(aes(rel_frac)) +
geom_histogram(aes(y=..density..), bins=15) +
geom_density(color="red", size=1.5) +
facet_wrap(~width_bin)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment