Last active
August 29, 2015 14:11
-
-
Save rtraborn/0f674eaadc2143641812 to your computer and use it in GitHub Desktop.
figure_2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Code to generate Figure 2 ## current code generates a two-panel figure; the lower panel (SD) is problematic at this point (12/12/14) | |
#loading appropriate packages | |
library('GenomicRanges') | |
#loading R script | |
source("tsr_overlaps.R") | |
#setting the working directory | |
setwd("/DATA/GROUP/rtraborn/tsrchitect_test") | |
#removing scientific notation | |
options(scipen=999) | |
#importing the mapped, PEAT-derived TSSs from cagep | |
read.table(file="A_thaliana_PEAT.cagep",header=F) -> At_PEAT_TSSs | |
#importing the putative promoter annotations from TSRchitect | |
read.table(file="atpeatbed.bed",header=F) -> At_PEAT_TSRs | |
#naming the BED columns from the above | |
c("chr","start","end","id","Num_TSSs","strand") -> names(At_PEAT_TSRs) | |
#creating a GenomicRanges object from the At_PEAT_TSRs bed file | |
TSRs_bed <- with(At_PEAT_TSRs, GRanges(chr, IRanges(start, end), strand, Num_TSSs, id=id)) | |
#will print top of object #works in previous (12/8) commit | |
TSRs_bed | |
#naming the columns from the cagep file with TSSs | |
c("chr","id","strand","start","end") -> names(At_PEAT_TSSs) | |
#creating a GenomicRanges objects from the At_PEAT_TSSs file | |
TSSs_PEAT <- with(At_PEAT_TSSs, GRanges(chr, IRanges(start,end), strand,id=id)) | |
#this will print top of TSSs_PEAT object #worked without issue 12/9 | |
TSSs_PEAT | |
#finding the overlaps between TSRs and TSSs | |
findOverlaps(TSRs_bed,TSSs_PEAT) -> TSR_ol | |
#running tsr_overlaps | |
tsr_overlaps(TSR_ol,At_PEAT_TSSs,TSRs_bed) -> complete_df | |
#printing the output | |
print(head(complete_df)) | |
#setting working directory to our git repo | |
setwd("/home/rtraborn/manuscripts/figure_repos/Figure_2") | |
#preparing the figure | |
png(file="Figure_2_plot.png",height=1800,width=900) | |
par(mfrow=c(2, 1)) #setting up two figures, above and below | |
na.omit(complete_df$entropy) -> entropy_vec | |
na.omit(complete_df$var) -> var_vec | |
plot(density(entropy_vec), main = "Distribution of Promoter Entropy within promoters",xlab="Shannon Entropy of TSSs in Putative Promoters (TSRs)\n Most uniform -> Most Diverse",ylab="Density",col="red2") | |
plot(density(sd_vec), main = "Distribution of the variances of TSSs within promoters", xlab="Variances of TSSs in Putative Promoters (TSRs)\n (n=5097 genes) ", ylab="Density",col="blue2",xlim=c(0,100000)) | |
dev.off() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###iterates over a findOverlaps() GRanges object and calculates the shannon entropy score and standard deviation for each TSR | |
tsr_overlaps <- function(overlaps,cagepGR,TSR) { | |
library('entropy') | |
overlaps_df <- as.data.frame(overlaps) | |
TSS_df <- as.data.frame(cagepGR) | |
TSR_df <- as.data.frame(TSR) | |
final_df <- array(NA,c(nrow(TSR_df),4)) | |
for (i in unique(overlaps_df[,1])) { | |
i -> this_TSR | |
print(this_TSR) | |
which(overlaps_df[,1]==this_TSR) -> TSS_index | |
TSS_df[TSS_index,] -> TSS_slice | |
if (TSS_slice$strand[1]=="+") { #for positive strand | |
TSS_slice$start -> TSS_list | |
} | |
if (TSS_slice$strand[1]=="-") { #for negative strand | |
TSS_slice$end -> TSS_list | |
} | |
TSR_df$id[1] -> this_gene | |
as.numeric(TSS_list) -> TSS_list | |
var(TSS_list) -> var_TSS | |
#print(var_TSS) #for debugging | |
entropy(TSS_list,method="ML",unit="log2") -> shan_ent_TSS | |
#print(shan_ent_TSS) #for debugging | |
length(TSS_index) -> num_TSS | |
final_df[i,] <- c(this_gene,var_TSS,shan_ent_TSS,num_TSS) | |
} | |
final_df <- as.data.frame(final_df) | |
names(final_df) <- c("gene","var","entropy","nTSS") #adding the names to the data_frame | |
return(final_df) | |
} | |
The second panel (SDs distribution) needs a closer look. I'll address this tomorrow.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Fixing png() and par() so both panels render properly