Created
December 22, 2020 04:48
-
-
Save SamBuckberry/ee5c88c390cc6cbb75a52fb64b62d83f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(magrittr) | |
library(GenomicRanges) | |
library(BSgenome.Mmusculus.UCSC.mm10) | |
library(rtracklayer) | |
library(reshape2) | |
library(ggplot2) | |
library(dplyr) | |
library(JASPAR2016) | |
library(TFBSTools) | |
library(stringr) | |
source("~/polo_iPSC/R_functions/project_functions.R") | |
# Get the PWM data from the database and reformat | |
opts <- list() | |
opts[["all_versions"]] <- TRUE | |
opts[["collection"]] <- "CORE" | |
opts[["matrixtype"]] <- "PFM" | |
opts[["tax_group"]] <- "vertebrates" | |
# pwm_data <- getMatrixSet(JASPAR2014, opts) | |
# pwm_matrix_list_pfm <- Matrix(pwm_data) | |
# | |
# opts <- list() | |
# opts[["all_versions"]] <- TRUE | |
# opts[["collection"]] <- "PBM" | |
# opts[["matrixtype"]] <- "PFM" | |
# opts[["tax_group"]] <- "vertebrates" | |
# pwm_data <- getMatrixSet(JASPAR2014, opts) | |
# pwm_matrix_list_pbm <- Matrix(pwm_data) | |
#test <- getMatrixByName(JASPAR2016, name=c("T")) | |
pfmList <- getMatrixSet(JASPAR2016, opts) | |
#pfmList <- Matrix(pfmList) | |
tf_name <- pfmList@listData$MA0002.1@name | |
tf <- pfmList[names(pfmList) == "MA0002.1"] | |
save(pfmList, file = "~/polo_iPSC/resources/pfmList.Rda") | |
load(file = "~/polo_iPSC/resources/pfmList.Rda") | |
### Set the clusters into 4 groups | |
c1 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_1.bed") | |
c2 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_2.bed") | |
c3 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_3.bed") | |
c4 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_4.bed") | |
c5 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_5.bed") | |
c6 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_6.bed") | |
c7 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_7.bed") | |
c8 <- read_bed("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_8.bed") | |
early <- c5 %>% gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/early.bed") | |
late <- GRangesList(c3, c7) %>% unlist() %>% | |
gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/late.bed") | |
trans <- GRangesList(c1, c6) %>% unlist() %>% | |
gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/trans.bed") | |
loss <- GRangesList(c2, c4) %>% unlist() %>% | |
gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/loss.bed") | |
stable <- c8 %>% gr_to_bed(out_path = "~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/stable.bed") | |
# Get mofif PWMs | |
olig1 <- pfmList$MA0826.1 | |
ctcf <- pfmList$MA0139.1 | |
klf4 <- pfmList$MA0039.2 | |
os <- pfmList$MA0142.1 | |
brachyury <- pfmList$MA0009.2 | |
eomes <- pfmList$MA0800.1 | |
meox1 <- pfmList$MA0661.1 | |
nrf1 <- pfmList$MA0506.1 | |
tead4 <- pfmList$MA0809.1 | |
klf5 <- pfmList$MA0599.1 | |
yy1 <- pfmList$MA0095.2 | |
sp1 <- pfmList$MA0079.2 | |
tcf4 <- pfmList$MA0830.1 | |
myc <- pfmList$MA0147.2 | |
ap1 <- pfmList$MA0099.1 | |
nfy <- pfmList$MA0060.1 | |
## Insertion files | |
ins_fls <- list.files("/Volumes/Datasets/", | |
pattern = "_combined_replicates.ins.bigwig", | |
full.names = TRUE)[c(6,2:4,1,5)] | |
mC_fls <- list.files("/Volumes/Datasets/", | |
pattern = ".CG.level.unstranded.bigwig", | |
full.names = TRUE) | |
## Library sizes | |
lib_sizes <- read.table("~/polo_iPSC/ATACseq/mapping_stats.txt")[c(6,2:4,1,5), ] | |
####****** Make sure ins_fls and lib sizes are in correct order*******########### | |
## Cluster bed files | |
cluster_beds <- c("~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_1.bed", | |
"~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_2.bed", | |
"~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_3.bed", | |
"~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_4.bed", | |
"~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_5.bed", | |
"~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_6.bed", | |
"~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_7.bed", | |
"~/polo_iPSC/ATACseq/processed_data/atac_cluster_peaks/c_means_peaks/cluster_8.bed") | |
## Nucleosome occupancy files | |
ips_occ <- "/Volumes/Datasets/atac_iPSC_combined_replicates.occ.bigwig" | |
d9_occ <- "/Volumes/Datasets/atac_d9_combined_replicates.occ.bigwig" | |
mef_occ <- "/Volumes/Datasets/atac_MEF_combined_replicates.occ.bigwig" | |
## Bias track | |
bias_bw <- "/Volumes/Datasets/mm10_bias.Scores.bigwig" | |
### Footprint analysis function | |
cluster <- 5 | |
pwm <- pfmList$MA0139.1 | |
occ_bigwig <- ips_occ | |
title <- "test" | |
min.score="85%" | |
width=45 | |
mC_bw <- "/Volumes/Datasets/mmiPS_1__0Day.CG.level.unstranded.bigwig" | |
run_footprint <- function(pwm, cluster, occ_bigwig, title="", | |
min.score="85%", width=300){ | |
# Get the cluster bed file to GRanges | |
cluster_gr <- bed_to_gr(cluster_beds[cluster]) | |
# Find the motif positions in the ranges | |
regions_pwm <- motif_gr(gr = cluster_gr, pwm = pwm, | |
min.score = min.score) | |
# Calculate footprint metrics | |
calc_footprint <- function(ins_bw, bias_bw, mC_bw, regions_pwm, occ_bigwig, | |
width=width, lib_size=1){ | |
# Split by nucleosome occupancy class | |
occ <- range_summary(bigwig = occ_bigwig, | |
gr = regions_pwm, range = 20, | |
log = FALSE, aggregate = FALSE) | |
occ_means <- rowMeans(occ) | |
set_occ_class <- function(x){ | |
dat <- NULL | |
if (x < 0.1) { | |
dat <- "A" | |
} else if (x >= 0.1 & x < 0.3) { | |
dat <- "B" | |
} else if (x >= 0.3 & x < 0.5) { | |
dat <- "C" | |
} else if (x > 0.5) { | |
dat <- "D" | |
} | |
return(dat) | |
} | |
occ_class <- lapply(occ_means, set_occ_class) %>% unlist() | |
# Get data frame of region and class for export | |
loci_id <- str_c(seqnames(regions_pwm), start(regions_pwm), sep = ":") %>% | |
str_c(end(regions_pwm), sep = "-") | |
region_class <- data.frame(loci=loci_id, occ_class=occ_class) | |
## Scale 0-1 function | |
range01 <- function(x){(x-min(x))/(max(x)-min(x))} | |
## Calculate footprint stats for each class | |
calc_range_summary <- function(class="A"){ | |
# insertion sumary | |
ins <- range_summary(bigwig = ins_bw, | |
gr = regions_pwm[occ_class == class], | |
range = width, log = FALSE, aggregate = FALSE) | |
# Bias summary | |
bias <- range_summary(bigwig = bias_bw, | |
gr = regions_pwm[occ_class == class], | |
range = width, log = TRUE, aggregate = FALSE) | |
mC <- range_summary_meth(bigwig = mC_bw, | |
gr = regions_pwm[occ_class == class], | |
range = width, log = FALSE, aggregate = FALSE) | |
mC_mean <- colMeans(mC, na.rm = TRUE) | |
# Bias correction | |
ins_over_bias <- ins / bias | |
ins_over_bias_cpm <- (ins / lib_size) / bias | |
df <- data.frame(position=as.numeric(colnames(ins)), | |
signal=colSums(ins), | |
bias=colSums(bias), | |
sob=colSums(ins_over_bias), | |
sob_cpm=colSums(ins_over_bias_cpm), | |
sob_scaled=range01(colSums(ins_over_bias) / lib_size), | |
bias_scaled=range01(colSums(bias)), | |
class=class, | |
mCG_mean=mC_mean) | |
return(df) | |
} | |
dat <- lapply(X = unique(occ_class), calc_range_summary) | |
dat <- do.call(rbind, dat) | |
## Calculate the footprint and flank average scores | |
motif_flank <- round(x = ncol(pwm) / 2, digits = 0) - 2 | |
dat$motif_flank <- NA | |
dat$motif_flank[dat$position <= -motif_flank] <- "UP" | |
dat$motif_flank[dat$position >= motif_flank] <- "DOWN" | |
dat$motif_flank[abs(dat$position) < motif_flank] <- "MOTIF" | |
calc_fp_scores <- function(class){ | |
## Calculate footprint quality 0-1 | |
flank_up_avg <- dat$sob_scaled[dat$motif_flank == "UP" & | |
dat$class == class] %>% mean() | |
flank_down_avg <- dat$sob_scaled[dat$motif_flank == "DOWN" & | |
dat$class == class] %>% mean() | |
flank_avg <- (flank_up_avg + flank_down_avg) / 2 | |
motif_avg <- dat$sob_scaled[dat$motif_flank == "MOTIF" & | |
dat$class == class] %>% mean() | |
fp_score <- flank_avg - motif_avg | |
## Calculate fp to flank ration | |
flank_unscaled_up_avg <- dat$sob[dat$motif_flank == "UP" & | |
dat$class == class] %>% mean() | |
flank_unscaled_down_avg <- dat$sob[dat$motif_flank == "DOWN" & | |
dat$class == class] %>% mean() | |
flank_unscaled_avg <- (flank_unscaled_up_avg + flank_unscaled_down_avg) / 2 | |
motif_unscaled_avg <- dat$sob[dat$motif_flank == "MOTIF" & | |
dat$class == class] %>% mean() | |
fp_ratio <- flank_unscaled_avg / motif_unscaled_avg | |
fp_diff <- flank_unscaled_avg - motif_unscaled_avg | |
# Number of reagions per class | |
n_regions <- sum(occ_class == class) | |
return(c(class=class, flank_avg=flank_avg, | |
motif_avg=motif_avg, fp_score=fp_score, | |
flank_unscaled_avg=flank_unscaled_avg, | |
motif_unscaled_avg=motif_unscaled_avg, | |
fp_ratio=fp_ratio, | |
fp_diff=fp_diff, | |
n_regions=n_regions)) | |
} | |
fp_scores <- lapply(unique(occ_class), calc_fp_scores) | |
fp_scores <- do.call(rbind, fp_scores) %>% data.frame() | |
gg <- ggplot(dat, aes(x = position, y = sob, group=class)) + | |
geom_line() + | |
facet_grid(class~.) | |
return(list(fp_scores=fp_scores, dat=dat, | |
region_class=region_class, | |
ins_bw=ins_bw, ggplot=gg)) | |
} | |
calc_series <- function(x, regions_pwm, occ_bigwig){ | |
series_dat <- lapply(ins_fls[x], FUN = calc_footprint, | |
mC_bw = mC_fls[x], | |
regions_pwm = regions_pwm, | |
occ_bigwig = occ_bigwig, | |
bias_bw = bias_bw, width = width, | |
lib_size=lib_sizes$records[x]) | |
return(series_dat) | |
} | |
series <- lapply(1:length(ins_fls), FUN = calc_series, | |
regions_pwm=regions_pwm, occ_bigwig=occ_bigwig) | |
names(series) <- c("MEF", "d3", "d6", "d9", "d12", "iPSC") | |
## Summaries data series | |
summarise_series <- function(x){ | |
d1 <- series[x][[1]] | |
d1 <- d1[[1]]$dat | |
d1$timepoint <- names(series[x]) | |
return(d1) | |
} | |
series_summary <- lapply(1:length(series), summarise_series) | |
series_summary <- do.call(rbind, series_summary) | |
## Get occupancy class numbers | |
occ_table <- table(series$MEF[[1]]$region_class$occ_class) %>% | |
data.frame() | |
## Summaries TF scores | |
summarise_score <- function(x){ | |
d1 <- series[x][[1]] | |
d1 <- d1[[1]]$fp_scores | |
d1$timepoint <- names(series[x]) | |
return(d1) | |
} | |
score_summary <- lapply(1:length(series), summarise_score) | |
score_summary <- do.call(rbind, score_summary) | |
## Generate plots | |
plot_fp_scores <- function(fp, title){ | |
get_fp_score <- function(x){ | |
dat <- fp[x] | |
dat <- dat[[1]][[1]]$fp_scores | |
dat <- dat[ ,c(1, 8, 9)] | |
dat$timepoint <- names(fp)[x] | |
return(dat) | |
} | |
fp_scores <- lapply(1:length(fp), get_fp_score) | |
fp_scores <- do.call(rbind, fp_scores) | |
fp_scores$fp_diff <- as.character(fp_scores$fp_diff) %>% | |
as.numeric() | |
fp_scores$timepoint <- factor(fp_scores$timepoint, | |
levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
gg <- ggplot(fp_scores, aes(x = timepoint, y = fp_diff, | |
group=class, colour=class)) + | |
geom_line(size=2) + | |
ylab("Footprint score") + | |
xlab("Timepoint") + | |
theme_bw() + | |
ggtitle(title) | |
return(gg) | |
} | |
plot_footprints <- function(fp, title){ | |
# Get the data for each timepoint | |
get_fp_data <- function(fl_name){ | |
dat <-fp[names(fp) == fl_name] | |
dat <- dat[[1]][[1]]$dat | |
dat$name <- str_replace(fl_name, | |
pattern = "_combined_replicates.ins.bigwig", | |
replacement = "") %>% | |
str_replace(pattern = "atac_", replacement = "") | |
return(dat) | |
} | |
fp_dat <- lapply(names(fp), get_fp_data) | |
fp_dat <- do.call(rbind, fp_dat) | |
# set levels fo classes | |
fp_dat$class <- factor(fp_dat$class, levels=c("A", "B", "C", "D")) | |
#fp_dat <- fp_dat[fp_dat$class == "C", ] | |
fp_dat$name <- factor(fp_dat$name, | |
levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
gg <- ggplot(fp_dat, aes(x = position, y = sob_cpm, | |
group = name, colour=class)) + | |
geom_line() + | |
facet_grid(name~class) + | |
ylab("Normalised insertions / bias") + | |
xlab("Relative position to motif centre") + | |
theme_bw() + | |
theme(panel.grid = element_blank()) | |
ggtitle(title) | |
return(gg) | |
} | |
plot_fp_range <- function(fp, title){ | |
get_fp_range <- function(x){ | |
dat <- fp[x] | |
dat <- dat[[1]][[1]]$fp_scores | |
dat <- dat[ ,c(1, 5, 6, 9)] | |
dat$timepoint <- names(fp)[x] | |
return(dat) | |
} | |
fp_range <- lapply(1:length(fp), get_fp_range) | |
fp_range <- do.call(rbind, fp_range) | |
fp_range$flank<- as.character(fp_range$flank_unscaled_avg) %>% | |
as.numeric() | |
fp_range$motif <- as.character(fp_range$motif_unscaled_avg) %>% | |
as.numeric() | |
fp_range$timepoint <- factor(fp_range$timepoint, | |
levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
fp_range$counts <- str_c(fp_range$class, "=", fp_range$n_regions) | |
gg <- ggplot(fp_range, aes(timepoint, ymin = motif, | |
ymax=flank, colour = counts)) + | |
#geom_linerange() + | |
geom_errorbar(width=0.5) + | |
geom_ribbon(aes(x=timepoint, ymin = motif, ymax=flank, group=class), | |
fill='grey80', colour=NA) + | |
geom_errorbar(width=0.5) + | |
facet_grid(class~.) + | |
theme_bw() + | |
xlab("Timepoint") + | |
ylab("Footprint score") + | |
theme(panel.grid = element_blank(), | |
axis.text.x = element_text(angle = 45, vjust = 0.5)) + | |
ggtitle(title) | |
return(gg) | |
} | |
plot_fp_class_A <- function(fp, title){ | |
# Get the data for each timepoint | |
get_fp_data <- function(fl_name){ | |
dat <-fp[names(fp) == fl_name] | |
dat <- dat[[1]][[1]]$dat | |
dat$name <- str_replace(fl_name, | |
pattern = "_combined_replicates.ins.bigwig", | |
replacement = "") %>% | |
str_replace(pattern = "atac_", replacement = "") | |
return(dat) | |
} | |
fp_dat <- lapply(names(fp), get_fp_data) | |
fp_dat <- do.call(rbind, fp_dat) | |
fp_dat <- fp_dat[fp_dat$class == "A", ] | |
fp_dat$name <- factor(fp_dat$name, | |
levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
gg <- ggplot(fp_dat, aes(x = position, y = sob_cpm, group = name)) + | |
geom_line() + | |
facet_grid(~name) + | |
ylab("") + | |
xlab("Position relative to motif centre") + | |
theme_bw() + | |
theme(panel.grid = element_blank(), | |
strip.background = element_blank()) + | |
ggtitle(str_c(title, " n=", occ_table[occ_table$Var1 == "A", 2])) | |
return(gg) | |
} | |
plot_fp_mc_level <- function(fp, title){ | |
# Get the data for each timepoint | |
get_fp_data <- function(fl_name){ | |
dat <-fp[names(fp) == fl_name] | |
dat <- dat[[1]][[1]]$dat | |
dat$name <- str_replace(fl_name, | |
pattern = "_combined_replicates.ins.bigwig", | |
replacement = "") %>% | |
str_replace(pattern = "atac_", replacement = "") | |
return(dat) | |
} | |
fp_dat <- lapply(names(fp), get_fp_data) | |
fp_dat <- do.call(rbind, fp_dat) | |
fp_dat <- fp_dat[fp_dat$class == "A", ] | |
fp_dat$name <- factor(fp_dat$name, | |
levels=c("MEF", "d3", "d6", "d9", "d12", "iPSC")) | |
gg <- ggplot(fp_dat, aes(x = position, y = mCG_mean, group = name)) + | |
#geom_line() + | |
geom_point(alpha=0.1) + | |
#geom_smooth() + | |
facet_grid(~name) + | |
ylab("mCG/CG") + | |
xlab("Position relative to motif centre") + | |
theme_bw() + | |
theme(panel.grid = element_blank(), | |
strip.background = element_blank()) + | |
ggtitle(str_c(title, " n=", occ_table[occ_table$Var1 == "A", 2])) | |
return(gg) | |
} | |
gg_scores <- plot_fp_scores(fp = series, title = title) | |
gg_fp <- plot_footprints(fp = series, title = title) | |
gg_range <- plot_fp_range(fp = series, title = title) | |
gg_class_A <- plot_fp_class_A(fp = series, title = title) | |
gg_mc_mean <- plot_fp_mc_level(fp = series, title = title) | |
## Output | |
out <- list(summary=series_summary, scores=score_summary, | |
occ_class=series$MEF[[1]]$region_class, | |
gg_scores=gg_scores, gg_fp=gg_fp, gg_mc_mean=gg_mc_mean, | |
gg_range=gg_range, gg_class_A=gg_class_A) | |
return(out) | |
} | |
# CTCF footprinting ------------------------------------------------------- | |
ctcf <- pfmList$MA0139.1 | |
ctcf_c5 <- run_footprint(pwm = ctcf, cluster = 5, occ_bigwig = ips_occ, | |
title = "C5 CTCF", min.score = "85%") | |
# NRF1 foot printing ------------------------------------------------------ | |
nrf1 <- pfmList$MA0506.1 | |
nrf1_c8 <- run_footprint(pwm = nrf1, cluster = 8, occ_bigwig = ips_occ, | |
title = "C8 NRF1", min.score = "85%") | |
nrf1_c8$gg_fp | |
nrf1_c8$gg_class_A + xlim(-50, 50) | |
nrf1_c8$gg_mc_mean + | |
geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
# Oct4-Sox2 footprinting ------------------------------------------------- | |
os <- pfmList$MA0142.1 | |
os_c1 <- run_footprint(pwm = os, cluster = 1, occ_bigwig = d9_occ, | |
title = "C1 Oct4-Sox2", min.score = "75%") | |
os_c2 <- run_footprint(pwm = os, cluster = 2, occ_bigwig = mef_occ, | |
title = "C2 Oct4-Sox2", min.score = "75%") | |
os_c3 <- run_footprint(pwm = os, cluster = 3, occ_bigwig = ips_occ, | |
title = "C3 Oct4-Sox2", min.score = "75%") | |
# os_c4 <- run_footprint(pwm = os, cluster = 4, occ_bigwig = mef_occ, | |
# title = "C4 Oct4-Sox2", min.score = "75%") | |
os_c5 <- run_footprint(pwm = os, cluster = 5, occ_bigwig = ips_occ, | |
title = "C5 Oct4-Sox2", min.score = "75%") | |
os_c6 <- run_footprint(pwm = os, cluster = 6, occ_bigwig = d9_occ, | |
title = "C6 Oct4-Sox2", min.score = "75%") | |
os_c7 <- run_footprint(pwm = os, cluster = 7, occ_bigwig = ips_occ, | |
title = "C7 Oct4-Sox2", min.score = "75%") | |
os_c8 <- run_footprint(pwm = os, cluster = 8, occ_bigwig = ips_occ, | |
title = "C8 Oct4-Sox2", min.score = "75%") | |
os_c1$gg_class_A + xlim(-100, 100) | |
os_c1$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
os_c5$gg_class_A + xlim(-100, 100) | |
os_c5$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
os_c6$gg_class_A + xlim(-40, 40) | |
os_c6$gg_fp | |
os_c6$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
# Sox2 footprinting ------------------------------------------------------ | |
# sox2 <- pfmList$MA0143.3 | |
# | |
# sox2_c1 <- run_footprint(pwm = sox2, cluster = 1, occ_bigwig = d9_occ, | |
# title = "C1 Sox2", min.score = "85%") | |
# sox2_c5 <- run_footprint(pwm = sox2, cluster = 5, occ_bigwig = ips_occ, | |
# title = "C5 Sox2", min.score = "85%") | |
# sox2_c6 <- run_footprint(pwm = sox2, cluster = 6, occ_bigwig = d9_occ, | |
# title = "C6 Sox2", min.score = "85%") | |
# sox2_c7 <- run_footprint(pwm = sox2, cluster = 7, occ_bigwig = ips_occ, | |
# title = "C7 Sox2", min.score = "85%") | |
# sox2_c8 <- run_footprint(pwm = sox2, cluster = 8, occ_bigwig = ips_occ, | |
# title = "C8 Sox2", min.score = "85%") | |
# Klf4 foot printing ------------------------------------------------------ | |
klf4 <- pfmList$MA0039.2 | |
klf4_c1 <- run_footprint(pwm = klf4, cluster = 1, occ_bigwig = d9_occ, | |
title = "C1 klf4") | |
klf4_c2 <- run_footprint(pwm = klf4, cluster = 2, occ_bigwig = mef_occ, | |
title = "C2 klf4") | |
klf4_c3 <- run_footprint(pwm = klf4, cluster = 3, occ_bigwig = ips_occ, | |
title = "C3 klf4") | |
klf4_c5 <- run_footprint(pwm = klf4, cluster = 5, occ_bigwig = ips_occ, | |
title = "C5 klf4") | |
klf4_c6 <- run_footprint(pwm = klf4, cluster = 6, occ_bigwig = d9_occ, | |
title = "C6 klf4") | |
klf4_c7 <- run_footprint(pwm = klf4, cluster = 7, occ_bigwig = ips_occ, | |
title = "C7 klf4") | |
klf4_c8 <- run_footprint(pwm = klf4, cluster = 8, occ_bigwig = ips_occ, | |
title = "C8 klf4") | |
# AP1 foot printing ------------------------------------------------------- | |
ap1 <- pfmList$MA0099.1 | |
ap1_c1 <- run_footprint(pwm = ap1, cluster = 1, occ_bigwig = d9_occ, | |
title = "C1 AP-1") | |
ap1_c2 <- run_footprint(pwm = ap1, cluster = 2, occ_bigwig = mef_occ, | |
title = "C2 AP-1") | |
ap1_c4 <- run_footprint(pwm = ap1, cluster = 4, occ_bigwig = mef_occ, | |
title = "C4 AP-1") | |
# Footprint plotting ------------------------------------------------------ | |
save(ctcf_c5, nrf1_c8, | |
os_c1, os_c2, os_c3, os_c5, os_c6, os_c7, os_c8, | |
klf4_c1, klf4_c2, klf4_c3, klf4_c5, klf4_c6, klf4_c7, klf4_c8, | |
ap1_c1, ap1_c2, ap1_c4, | |
file = "ATACseq/processed_data/footprint_data.Rdata") | |
load("ATACseq/processed_data/footprint_data.Rdata") | |
pdf("ATACseq/plots/footprinting/footprints_all.pdf", width = 6, height = 2) | |
ctcf_c5$gg_class_A + xlim(-40, 40) | |
ctcf_c5$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
nrf1_c8$gg_class_A + xlim(-40, 40) | |
nrf1_c8$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
os_c1$gg_class_A + xlim(-40, 40) | |
os_c1$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
os_c2$gg_class_A + xlim(-40, 40) | |
os_c2$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
os_c3$gg_class_A + xlim(-40, 40) | |
os_c3$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
os_c5$gg_class_A + xlim(-40, 40) | |
os_c5$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
os_c6$gg_class_A + xlim(-40, 40) | |
os_c6$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
os_c7$gg_class_A + xlim(-40, 40) | |
os_c7$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
os_c8$gg_class_A + xlim(-40, 40) | |
os_c8$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
klf4_c1$gg_class_A + xlim(-40, 40) | |
klf4_c1$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
klf4_c2$gg_class_A + xlim(-40, 40) | |
klf4_c2$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
klf4_c3$gg_class_A + xlim(-40, 40) | |
klf4_c3$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
klf4_c5$gg_class_A + xlim(-40, 40) | |
klf4_c5$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
klf4_c6$gg_class_A + xlim(-40, 40) | |
klf4_c6$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
klf4_c7$gg_class_A + xlim(-40, 40) | |
klf4_c7$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
klf4_c8$gg_class_A + xlim(-40, 40) | |
klf4_c8$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
ap1_c1$gg_class_A + xlim(-40, 40) | |
ap1_c1$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
ap1_c2$gg_class_A + xlim(-40, 40) | |
ap1_c2$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
ap1_c4$gg_class_A + xlim(-40, 40) | |
ap1_c4$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
dev.off() | |
# Atf3 footprinting ------------------------------------------------------- | |
atf3 <- pfmList$MA0605.1 | |
atf3_c1 <- run_footprint(pwm = atf3, cluster = 1, occ_bigwig = d9_occ, | |
title = "C1 Atf3", width = 1000) | |
atf3_c2 <- run_footprint(pwm = atf3, cluster = 2, occ_bigwig = mef_occ, | |
title = "C2 Atf3", width = 1000) | |
atf3_c4 <- run_footprint(pwm = atf3, cluster = 4, occ_bigwig = mef_occ, | |
title = "C2 Atf3", width = 1000) | |
atf3_c1$gg_class_A + xlim(-50, 50) | |
atf3_c1$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
atf3_c2$gg_class_A + xlim(-50, 50) | |
atf3_c2$gg_mc_mean + | |
geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
atf3_c4$gg_class_A + xlim(-50, 50) | |
atf3_c4$gg_mc_mean + | |
geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
# Nfy footprinting -------------------------------------------------------- | |
nfy <- pfmList$MA0060.1 | |
nfy_c3 <- run_footprint(pwm = nfy, cluster = 3, occ_bigwig = ips_occ, | |
title = "C3 NFY", width = 250) | |
nfy_c7 <- run_footprint(pwm = nfy, cluster = 7, occ_bigwig = ips_occ, | |
title = "C7 NFY", width = 250) | |
nfy_c3$gg_class_A + xlim(-40, 40) | |
nfy_c3$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
nfy_c7$gg_class_A + xlim(-40, 40) | |
nfy_c7$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
# AP-2 Tfap2c ------------------------------------------------------------- | |
ap2_g <- pfmList$MA0815.1 | |
ap2_g_c5 <- run_footprint(pwm = ap2_g, cluster = 5, occ_bigwig = ips_occ, | |
title = "C5 AP-2 gamma") | |
ap2_g_c6 <- run_footprint(pwm = ap2_g, cluster = 6, occ_bigwig = d9_occ, | |
title = "C6 AP-2 gamma") | |
ap2_g_c5$gg_class_A + xlim(-40, 40) | |
ap2_g_c5$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
ap2_g_c6$gg_class_A + xlim(-40, 40) | |
ap2_g_c6$gg_mc_mean + geom_smooth(span=0.1, colour='red', se=FALSE, size=0.5) | |
# Klf5 -------------------------------------------------------------------- | |
klf5 <- pfmList$MA0599.1 | |
klf5_c1 <- run_footprint(pwm = klf5, cluster = 1, occ_bigwig = d9_occ, | |
title = "C1 klf5") | |
klf5_c2 <- run_footprint(pwm = klf5, cluster = 2, occ_bigwig = mef_occ, | |
title = "C2 klf5") | |
klf5_c3 <- run_footprint(pwm = klf5, cluster = 3, occ_bigwig = ips_occ, | |
title = "C3 klf5") | |
klf5_c5 <- run_footprint(pwm = klf5, cluster = 5, occ_bigwig = ips_occ, | |
title = "C5 klf5") | |
klf5_c6 <- run_footprint(pwm = klf5, cluster = 6, occ_bigwig = d9_occ, | |
title = "C6 klf5") | |
klf5_c7 <- run_footprint(pwm = klf5, cluster = 7, occ_bigwig = ips_occ, | |
title = "C7 klf5") | |
klf5_c8 <- run_footprint(pwm = klf5, cluster = 8, occ_bigwig = ips_occ, | |
title = "C8 klf5") | |
# Yy1 --------------------------------------------------------------------- | |
yy1 <- pfmList$MA0095.2 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment