Created
December 22, 2020 04:46
-
-
Save SamBuckberry/e215b822c5f4694333b4eb723f06dc17 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(magrittr) | |
library(GenomicRanges) | |
library(BSgenome.Mmusculus.UCSC.mm10) | |
library(rtracklayer) | |
library(reshape2) | |
library(ggplot2) | |
library(dplyr) | |
library(JASPAR2016) | |
library(TFBSTools) | |
library(stringr) | |
source("~/polo_iPSC/R_functions/project_functions.R") | |
# Get the PWM data from the database and reformat | |
opts <- list() | |
opts[["all_versions"]] <- TRUE | |
opts[["collection"]] <- "CORE" | |
opts[["matrixtype"]] <- "PFM" | |
opts[["tax_group"]] <- "vertebrates" | |
# pwm_data <- getMatrixSet(JASPAR2014, opts) | |
# pwm_matrix_list_pfm <- Matrix(pwm_data) | |
# | |
# opts <- list() | |
# opts[["all_versions"]] <- TRUE | |
# opts[["collection"]] <- "PBM" | |
# opts[["matrixtype"]] <- "PFM" | |
# opts[["tax_group"]] <- "vertebrates" | |
# pwm_data <- getMatrixSet(JASPAR2014, opts) | |
# pwm_matrix_list_pbm <- Matrix(pwm_data) | |
#test <- getMatrixByName(JASPAR2016, name=c("T")) | |
pfmList <- getMatrixSet(JASPAR2016, opts) | |
pfmList <- Matrix(pfmList) | |
olig1 <- pfmList$MA0826.1 | |
# Combine databases | |
pwm_matrix_list <- c(pwm_matrix_list_pfm, pwm_matrix_list_pbm) | |
save(all_pwm, file = "~/polo_iPSC/resources/pwm_matrix_list.Rda") | |
load(file = "~/polo_iPSC/resources/pwm_matrix_list.Rda") | |
### Set the clusters into 4 groups | |
c1 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_1.bed") | |
c2 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_2.bed") | |
c3 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_3.bed") | |
c4 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_4.bed") | |
c5 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_5.bed") | |
c6 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_6.bed") | |
c7 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_7.bed") | |
c8 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_8.bed") | |
early <- c5 %>% gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/early.bed") | |
late <- GRangesList(c3, c7) %>% unlist() %>% | |
gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/late.bed") | |
trans <- GRangesList(c1, c6) %>% unlist() %>% | |
gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/trans.bed") | |
loss <- GRangesList(c2, c4) %>% unlist() %>% | |
gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/loss.bed") | |
stable <- c8 %>% gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/stable.bed") | |
# Get mofif PWMs | |
#os <- pwm_matrix_list$MA0142.1 | |
#klf4 <- pwm_matrix_list$MA0039.2 | |
zfp691 <- pwm_matrix_list$PB0098.1 | |
olig1 <- pfmList$MA0826.1 | |
ctcf <- pfmList$MA0139.1 | |
klf4 <- pfmList$MA0039.2 | |
os <- pfmList$MA0142.1 | |
brachyury <- pfmList$MA0009.2 | |
eomes <- pfmList$MA0800.1 | |
meox1 <- pfmList$MA0661.1 | |
nrf1 <- pfmList$MA0506.1 | |
tead4 <- pfmList$MA0809.1 | |
klf5 <- pfmList$MA0599.1 | |
yy1 <- pfmList$MA0095.2 | |
sp1 <- pfmList$MA0079.2 | |
tcf4 <- pfmList$MA0830.1 | |
myc <- pfmList$MA0147.2 | |
ap1 <- pfmList$MA0099.1 | |
nfy <- pfmList$MA0060.1 | |
# Set bed file for footprinting | |
early_bed <- "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/early.bed" | |
late_bed <- "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/late.bed" | |
trans_bed <- "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/trans.bed" | |
loss_bed <- "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/loss.bed" | |
stable_bed <- "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/stable.bed" | |
ips_occ <- "/Volumes/Datasets/atac_iPSC_combined_replicates.occ.bigwig" | |
d9_occ <- "/Volumes/Datasets/atac_d9_combined_replicates.occ.bigwig" | |
mef_occ <- "/Volumes/Datasets/atac_MEF_combined_replicates.occ.bigwig" | |
# PLot footprint series function | |
plot_footprint_series <- function(pwm, bed_file, occ_bigwig, min.score="75%", title=""){ | |
### ATAC-seq data import | |
ins_fls <- list.files("/Volumes/Datasets/", | |
pattern = "_combined_replicates.ins.bigwig", | |
full.names = TRUE)[c(6,2:4,1,5)] | |
norm_factors <- read.table("~/polo_iPSC/ATACseq/mapping_stats.txt")[c(6,2:4,1,5), 6] | |
atac_bias <- "/Volumes/Datasets/mm10_bias.Scores.bigwig" | |
# NucleoATAC | |
nuc_fls <- list.files("/Volumes/Datasets/", | |
pattern = ".nucleoatac_signal.bigwig", | |
full.names = TRUE)[c(6,2:4,1,5)] | |
#occ_bigwig <- "/Volumes/Datasets/atac_iPSC_combined_replicates.occ.bigwig" | |
### Get regions with PWM match | |
regions <- read_bed(bed_file) | |
regions_pwm <- motif_gr(regions, pwm = pwm, min.score = min.score) | |
# Split by nucleosome occupancy class | |
occ <- range_summary(bigwig = occ_bigwig, | |
gr = regions_pwm, range = 20, | |
log = FALSE, aggregate = FALSE) | |
occ_means <- rowMeans(occ) | |
set_occ_class <- function(x){ | |
dat <- NULL | |
if (x < 0.1) { | |
dat <- "A" | |
} else if (x >= 0.1 & x < 0.3) { | |
dat <- "B" | |
} else if (x >= 0.3 & x < 0.5) { | |
dat <- "C" | |
} else if (x > 0.5) { | |
dat <- "D" | |
} | |
return(dat) | |
} | |
occ_class <- lapply(occ_means, set_occ_class) %>% unlist() | |
## Calc ins / bias | |
calc_ins_over_bias <- function(x, bias, occ_bigwig, gr, range=150){ | |
# Get the bigwig file and lib size | |
bigwig <- ins_fls[x] | |
lib_size <- norm_factors[x] / 1e6 | |
# Extract ATAC signal | |
ins <- range_summary(bigwig = bigwig, gr = gr, range = range, | |
log = FALSE, aggregate = FALSE) | |
# Count normalise | |
ins <- ins / lib_size | |
# Get signal bias | |
bias_scores <- range_summary(bigwig = bias, gr = gr, range = range, | |
log = TRUE, aggregate = FALSE) | |
## INSERTION / BIAS calc on individual positions | |
# Normalise | |
ins_over_bias <- ins / bias_scores | |
ins_over_bias$class <- occ_class | |
dat_melt <- melt(ins_over_bias) | |
dat_mean <- dat_melt %>% | |
group_by(variable, class) %>% | |
summarise(yy=mean(value)) | |
# ######## AGGREGATE insertions / bias | |
# # Average for each class for ins and bias seperately calculation | |
# ins$class <- occ_class | |
# dat_melt <- melt(ins) | |
# dat_mean <- dat_melt %>% | |
# group_by(variable, class) %>% | |
# summarise(yy=mean(value)) | |
# | |
# bias_scores$class <- occ_class | |
# bias_melt <- melt(bias_scores) | |
# bias_mean <- bias_melt %>% | |
# group_by(variable, class) %>% | |
# summarise(yy=mean(value)) | |
# | |
# all(dat_mean$class == bias_mean$class) | |
# all(dat_mean$variable == bias_mean$variable) | |
# | |
# dat_mean$yy <- dat_mean$yy / bias_mean$yy | |
###### | |
# Mean calculation for each postion | |
#agg <- colMeans(ins_over_bias) | |
# Reformat | |
#df <- data.frame(x=-range:range, y=agg) | |
# Get the sample ID | |
id <- basename(bigwig) %>% str_split(pattern = "_") %>% unlist() | |
#df$id <- id[2] | |
dat_mean$id <- id[2] | |
dat_mean$variable <- as.character(dat_mean$variable) %>% as.numeric() | |
return(dat_mean) | |
} | |
footprints <- lapply(1:length(ins_fls), calc_ins_over_bias, | |
bias=atac_bias, gr=regions_pwm, | |
occ_bigwig=occ_bigwig) | |
footprints <- do.call(rbind, footprints) | |
footprints$id <- factor(footprints$id, levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
# Set the title with group sizes | |
sz <- table(occ_class) | |
sz <- str_c(names(sz), "=", sz) %>% | |
paste(sep="", collapse=", ") | |
main <- str_c(title, sz, sep = " ") | |
gg <- ggplot(footprints, aes(x = variable, y = yy, fill = 'blue')) + | |
#stat_smooth(span = 10) + | |
#geom_area(aes(fill = 'blue', stat = "bin")) + | |
geom_line(size=0.2) + | |
facet_grid(id~class) + | |
xlab("Distance from motif") + | |
ylab("Normalised insertions / insertion bias") + | |
ggtitle(main) + | |
theme_bw() + | |
theme(plot.background = element_blank(), | |
panel.border = element_blank(), | |
strip.background = element_rect(size = 0), | |
panel.grid.minor = element_blank(), | |
axis.line = element_line(), | |
axis.text = element_text(size=8)) | |
return(gg) | |
} | |
# Early footprints -------------------------------------------------------- | |
pdf("~/polo_iPSC/ATACseq/plots/footprinting/early_cluster_footprint_series.pdf", width = 5, height = 5) | |
plot_footprint_series(pwm = os, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "Oct4-Sox2") | |
plot_footprint_series(pwm = klf4, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "Klf4") | |
plot_footprint_series(pwm = ctcf, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "CTCF") | |
plot_footprint_series(pwm = tead4, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "TEAD4") | |
plot_footprint_series(pwm = tcf4, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "Tcf4") | |
dev.off() | |
pdf("~/polo_iPSC/ATACseq/plots/footprinting/stable_cluster_footprint_series.pdf") | |
plot_footprint_series(pwm = os, bed_file = stable_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "Oct4-Sox2") | |
plot_footprint_series(pwm = klf4, bed_file = stable_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "Klf4") | |
plot_footprint_series(pwm = ctcf, bed_file = stable_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "CTCF") | |
plot_footprint_series(pwm = sp1, bed_file = stable_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "SP1") | |
dev.off() | |
pdf("~/polo_iPSC/ATACseq/plots/footprinting/late_cluster_footprint_series.pdf") | |
plot_footprint_series(pwm = yy1, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "Yy1") | |
plot_footprint_series(pwm = nfy, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "80%", title = "Nfy") | |
dev.off() | |
pdf("~/polo_iPSC/ATACseq/plots/footprinting/transient_cluster_footprint_series.pdf") | |
plot_footprint_series(pwm = os, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "75%", title = "Oct4-Sox2") | |
plot_footprint_series(pwm = klf4, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "85%", title = "Klf4") | |
plot_footprint_series(pwm = olig1, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "85%", title = "Olig1") | |
plot_footprint_series(pwm = ctcf, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "85%", title = "CTCF") | |
plot_footprint_series(pwm = brachyury, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "85%", title = "T (Brachyury)") | |
plot_footprint_series(pwm = eomes, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "85%", title = "Eomes") | |
plot_footprint_series(pwm = nrf1, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "85%", title = "Nrf1") | |
plot_footprint_series(pwm = tead4, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "85%", title = "Tead4") | |
plot_footprint_series(pwm = klf5, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "85%", title = "Klf5") | |
plot_footprint_series(pwm = myc, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "85%", title = "c-Myc") | |
plot_footprint_series(pwm = ap1, bed_file = trans_bed, occ_bigwig = d9_occ, | |
min.score = "85%", title = "AP-1") | |
dev.off() | |
plot_footprint_series(pwm = olig1, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "Olig1") | |
plot_footprint_series(pwm = ctcf, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "CTCF") | |
plot_footprint_series(pwm = brachyury, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "T (Brachyury)") | |
plot_footprint_series(pwm = eomes, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "Eomes") | |
plot_footprint_series(pwm = nrf1, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "Nrf1") | |
plot_footprint_series(pwm = tead4, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "Tead4") | |
plot_footprint_series(pwm = klf5, bed_file = early_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "Klf5") | |
dev.off() | |
# Late footprints --------------------------------------------------------- | |
pdf("~/polo_iPSC/ATACseq/plots/footprinting/late_cluster_footprint_series.pdf") | |
plot_footprint_series(pwm = os, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "Oct4-Sox2") | |
plot_footprint_series(pwm = klf4, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "Klf4") | |
plot_footprint_series(pwm = olig1, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "Olig1") | |
plot_footprint_series(pwm = ctcf, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "CTCF") | |
plot_footprint_series(pwm = brachyury, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "T (Brachyury)") | |
plot_footprint_series(pwm = eomes, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "Eomes") | |
plot_footprint_series(pwm = nrf1, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "75%", title = "Nrf1") | |
plot_footprint_series(pwm = tead4, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "Tead4") | |
plot_footprint_series(pwm = klf5, bed_file = late_bed, occ_bigwig = ips_occ, | |
min.score = "85%", title = "Klf5") | |
dev.off() | |
# Transient footprints ---------------------------------------------------- | |
# Loss footprints | |
pdf("~/polo_iPSC/ATACseq/plots/footprinting/loss_cluster_footprint_series.pdf") | |
plot_footprint_series(pwm = meox1, bed_file = loss_bed, occ_bigwig = mef_occ, | |
min.score = "85%", title = "Meox1") | |
plot_footprint_series(pwm = ap1, bed_file = loss_bed, occ_bigwig = mef_occ, | |
min.score = "85%", title = "AP1") | |
dev.off() | |
# Plot footprint heatmaps ------------------------------------------------- | |
calc_position_average <- function(bigwig_list, gr, name_list, pwm, range=500, | |
min.score="75%"){ | |
# Set the | |
gr <- motif_gr(gr = gr, pwm = pwm, min.score = min.score) | |
range_average <- function(x){ | |
df <- range_summary(bigwig = bigwig_list[x], gr = gr, range = range, | |
log = FALSE, aggregate = FALSE) %>% melt() | |
df$variable <- as.character(df$variable) | |
df <- df %>% dplyr::group_by(variable) %>% dplyr::summarise(yy=mean(value)) | |
df$variable <- as.character(df$variable) %>% as.numeric() | |
df$id <- name_list[x] | |
return(df) | |
} | |
dat <- lapply(1:length(bigwig_list), range_average) | |
dat <- do.call(rbind, dat) | |
return(dat) | |
} | |
## Heatmap footprints | |
name_list <- c("MEF", "d3", "d6", "d9", "d12", "iPSC") | |
heatmap_footprints <- function(bigwig_list, gr, name_list, pwm, title="", range=500, | |
min.score="75%", heat_cols=topo.colors(100)){ | |
df <- calc_position_average(bigwig_list = bigwig_list, gr = gr, | |
name_list = name_list, pwm = pwm, range = range, | |
min.score = min.score) | |
df$id <- factor(df$id, levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
colnames(df) <- c("variable", "Density", "id") | |
gg <- ggplot(df, aes(x = variable, y = 1, fill = Density)) + | |
facet_grid(id~.) + | |
geom_line() + | |
geom_raster(interpolate=TRUE) + | |
scale_fill_gradientn(colours = heat_cols) + | |
ylab("") + | |
xlab("Distance to motif") + | |
ggtitle(title) + | |
theme_bw() + | |
scale_x_continuous(expand = c(0, 0)) + | |
scale_y_continuous(expand = c(0, 0)) + | |
theme(axis.text = element_text(size=8), | |
axis.text.y = element_blank(), | |
axis.ticks.y = element_blank(), | |
panel.grid = element_blank(), | |
#panel.border = element_blank(), | |
strip.text.y = element_text(angle = 0), | |
strip.background = element_rect(fill = "white", | |
color = "white")) | |
gg | |
} | |
heatmap_normalised_insertions <- function(bigwig_list, gr, name_list, pwm, | |
heat_cols=topo.colors(100), | |
title="", range=range, min.score="75%"){ | |
calc_normalised_average <- function(bigwig_list, gr, name_list, pwm, | |
range=500, min.score="75%"){ | |
# Set the | |
gr <- motif_gr(gr = gr, pwm = pwm, min.score = min.score) | |
range_average <- function(x){ | |
df <- range_summary(bigwig = bigwig_list[x], gr = gr, range = range, | |
log = FALSE, aggregate = FALSE) %>% melt() | |
df <- df %>% group_by(variable) %>% summarise(yy=mean(value)) | |
df$variable <- as.character(df$variable) %>% as.numeric() | |
df$id <- name_list[x] | |
# Normalise insertion count by library size | |
norm_factors <- read.table("ATACseq/mapping_stats.txt")[c(6,2:4,1,5), 6] | |
lib_size <- norm_factors[x] / 1e6 | |
df$yy <- df$yy / lib_size | |
return(df) | |
} | |
dat <- lapply(1:length(bigwig_list), range_average) | |
dat <- do.call(rbind, dat) | |
return(dat) | |
} | |
df <- calc_normalised_average(bigwig_list = bigwig_list, gr = gr, | |
name_list = name_list, pwm = pwm, range = range, | |
min.score = min.score) | |
df$id <- factor(df$id, levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
colnames(df) <- c("variable", "Density", "id") | |
gg <- ggplot(df, aes(x = variable, y = 1, fill = Density)) + | |
facet_grid(id~.) + | |
#geom_line() + | |
geom_raster(interpolate=TRUE) + | |
scale_fill_gradientn(colours = heat_cols) + | |
ylab("") + | |
xlab("Distance to motif") + | |
ggtitle(title) + | |
theme_bw() + | |
theme(axis.text = element_text(size=8), | |
axis.text.y = element_blank(), | |
axis.ticks.y = element_blank(), | |
panel.grid = element_blank(), | |
panel.border = element_blank(), | |
panel.spacing = unit(0, "lines"), | |
strip.text.y = element_text(angle = 0), | |
strip.background = element_rect(fill = "white", | |
color = "white")) | |
gg | |
} | |
calc_meth_average <- function(bigwig_list, gr, name_list, pwm, range=500, | |
min.score="75%"){ | |
# Set the | |
gr <- motif_gr(gr = gr, pwm = pwm, min.score = min.score) | |
range_average <- function(x){ | |
df <- range_summary_meth(bigwig = bigwig_list[x], gr = gr, | |
range = range, log = FALSE, | |
aggregate = FALSE) %>% melt() | |
## Mean with NA's removed | |
df <- df %>% group_by(variable) %>% | |
summarise(yy=mean(value, na.rm = TRUE)) | |
df$variable <- as.character(df$variable) %>% as.numeric() | |
df$id <- name_list[x] | |
return(df) | |
} | |
dat <- lapply(1:length(bigwig_list), range_average) | |
dat <- do.call(rbind, dat) | |
return(dat) | |
} | |
heatmap_footprints_methylation <- function(bigwig_list, gr, name_list, pwm, title="", range=500, | |
min.score="75%", heat_cols=topo.colors(100)){ | |
df <- calc_meth_average(bigwig_list = bigwig_list, gr = gr, | |
name_list = name_list, pwm = pwm, range = range, | |
min.score = min.score) | |
df$id <- factor(df$id, levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
colnames(df) <- c("variable", "Density", "id") | |
gg <- ggplot(df, aes(x = variable, y = 1, fill = Density)) + | |
facet_grid(id~.) + | |
#geom_line() + | |
geom_raster(interpolate=TRUE) + | |
scale_fill_gradientn(colours = heat_cols) + | |
scale_x_continuous(expand = c(0, 0)) + | |
scale_y_continuous(expand = c(0, 0)) + | |
ylab("") + | |
xlab("Distance to motif") + | |
ggtitle(title) + | |
theme_bw() + | |
theme(axis.text = element_text(size=8), | |
axis.text.y = element_blank(), | |
axis.ticks.y = element_blank(), | |
panel.grid = element_blank(), | |
#panel.border = element_blank(), | |
strip.text.y = element_text(angle = 0), | |
strip.background = element_rect(fill = "white", | |
color = "white")) | |
gg | |
} | |
early <- bed_to_gr("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/early.bed") | |
trans <- bed_to_gr("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/trans.bed") | |
late <- bed_to_gr("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/late.bed") | |
loss <- bed_to_gr("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/loss.bed") | |
stable <- bed_to_gr("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/stable.bed") | |
# ChIP-seq | |
oct_fls <- list.files(path = "/Volumes/Datasets", pattern = "_oct_subtract.bw", | |
full.names = TRUE)[c(10,2,4,6,1,9)] | |
sox_fls <- list.files(path = "/Volumes/Datasets", pattern = "_sox_subtract.bw", | |
full.names = TRUE)[c(10,3,5,6,1,9)] | |
# ATAC-seq | |
nuc_fls <- list.files("/Volumes/Datasets/", | |
pattern = ".nucleoatac_signal.bigwig", | |
full.names = TRUE)[c(6,2:4,1,5)] | |
occ_fls <- list.files("/Volumes/Datasets/", | |
pattern = "_combined_replicates.occ.bigwig", | |
full.names = TRUE)[c(6,2:4,1,5)] | |
ins_fls <- list.files("/Volumes/Datasets/", | |
pattern = "_combined_replicates.ins.bigwig", | |
full.names = TRUE)[c(6,2:4,1,5)] | |
# Methylation files | |
mc_fls <- list.files("/Volumes/Datasets/", pattern = "mmiPS", full.names = TRUE) | |
# Generate the plots | |
heatmap_footprints(bigwig_list = occ_fls, gr = early, range = 750, | |
title = "Early Oct4-Sox2 Nucleosome occupancy", | |
name_list = name_list, pwm = os) | |
heatmap_footprints(bigwig_list = nuc_fls, gr = early, range = 750, | |
title = "C1 Oct4-Sox2 NucleoATAC signal", | |
name_list = name_list, pwm = os_pwm) | |
reds <- colorRampPalette(colors = c('white', 'orange', 'red')) | |
blues <- colorRampPalette(colors = c('white', 'green', 'blue')) | |
cool_warm <- colorRampPalette(c('blue4', 'green4', 'yellow1')) | |
black_red <- colorRampPalette(c('black', 'orange', 'red')) | |
pdf(file = "~/polo_iPSC/ATACseq/plots/footprinting/OS_early_cluster_oct_sox_atac_heatmap.pdf", | |
height = 2.5, width = 3) | |
heatmap_normalised_insertions(bigwig_list = ins_fls, gr = early, range = 500, | |
title = "Early Oct4-Sox2 Tn5 insertions", | |
name_list = name_list, pwm = os) | |
heatmap_footprints(bigwig_list = oct_fls, gr = early, name_list = name_list, range = 1000, | |
heat_cols = reds(25), pwm = os, title = "Early Oct4 ChIP-seq") | |
heatmap_footprints(bigwig_list = sox_fls, gr = early, name_list = name_list, range = 1000, | |
heat_cols = reds(25), pwm = os, title = "Early Sox2 ChIP-seq") | |
heatmap_footprints_methylation(bigwig_list = mc_fls, gr = early, name_list = name_list, range = 4000, | |
heat_cols = black_red(25), pwm = os, title = "Early DNA methylation") | |
dev.off() | |
heatmap_footprints_methylation(bigwig_list = mc_fls, gr = early, name_list = name_list, range = 2000, | |
heat_cols = reds(25), pwm = os, title = "C1 DNA methylation") | |
pdf(file = "~/polo_iPSC/ATACseq/plots/footprinting/OS_early_cluster_insertions_heatmap.pdf", | |
height = 2, width = 3) | |
heatmap_normalised_insertions(bigwig_list = ins_fls, gr = early, range = 500, | |
title = "Early Oct4-Sox2 Tn5 insertions", | |
name_list = name_list, pwm = os) | |
heatmap_normalised_insertions(bigwig_list = ins_fls, gr = stable, range = 500, | |
title = "Stable Oct4-Sox2 Tn5 insertions", | |
name_list = name_list, pwm = os) | |
heatmap_normalised_insertions(bigwig_list = ins_fls, gr = stable, range = 800, | |
title = "Stable CTCF Tn5 insertions", | |
name_list = name_list, pwm = ctcf) | |
heatma | |
dev.off() | |
heatmap_normalised_insertions(bigwig_list = ins_fls, gr = early, range = 500, | |
title = "C1 Klf4 Tn5 insertions", | |
name_list = name_list, pwm = klf4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment