Skip to content

Instantly share code, notes, and snippets.

@rtraborn
Last active August 29, 2015 14:11
Show Gist options
  • Save rtraborn/0f674eaadc2143641812 to your computer and use it in GitHub Desktop.
Save rtraborn/0f674eaadc2143641812 to your computer and use it in GitHub Desktop.
figure_2
## Code to generate Figure 2 ## current code generates a two-panel figure; the lower panel (SD) is problematic at this point (12/12/14)
#loading appropriate packages
library('GenomicRanges')
#loading R script
source("tsr_overlaps.R")
#setting the working directory
setwd("/DATA/GROUP/rtraborn/tsrchitect_test")
#removing scientific notation
options(scipen=999)
#importing the mapped, PEAT-derived TSSs from cagep
read.table(file="A_thaliana_PEAT.cagep",header=F) -> At_PEAT_TSSs
#importing the putative promoter annotations from TSRchitect
read.table(file="atpeatbed.bed",header=F) -> At_PEAT_TSRs
#naming the BED columns from the above
c("chr","start","end","id","Num_TSSs","strand") -> names(At_PEAT_TSRs)
#creating a GenomicRanges object from the At_PEAT_TSRs bed file
TSRs_bed <- with(At_PEAT_TSRs, GRanges(chr, IRanges(start, end), strand, Num_TSSs, id=id))
#will print top of object #works in previous (12/8) commit
TSRs_bed
#naming the columns from the cagep file with TSSs
c("chr","id","strand","start","end") -> names(At_PEAT_TSSs)
#creating a GenomicRanges objects from the At_PEAT_TSSs file
TSSs_PEAT <- with(At_PEAT_TSSs, GRanges(chr, IRanges(start,end), strand,id=id))
#this will print top of TSSs_PEAT object #worked without issue 12/9
TSSs_PEAT
#finding the overlaps between TSRs and TSSs
findOverlaps(TSRs_bed,TSSs_PEAT) -> TSR_ol
#running tsr_overlaps
tsr_overlaps(TSR_ol,At_PEAT_TSSs,TSRs_bed) -> complete_df
#printing the output
print(head(complete_df))
#setting working directory to our git repo
setwd("/home/rtraborn/manuscripts/figure_repos/Figure_2")
#preparing the figure
png(file="Figure_2_plot.png",height=1800,width=900)
par(mfrow=c(2, 1)) #setting up two figures, above and below
na.omit(complete_df$entropy) -> entropy_vec
na.omit(complete_df$var) -> var_vec
plot(density(entropy_vec), main = "Distribution of Promoter Entropy within promoters",xlab="Shannon Entropy of TSSs in Putative Promoters (TSRs)\n Most uniform -> Most Diverse",ylab="Density",col="red2")
plot(density(sd_vec), main = "Distribution of the variances of TSSs within promoters", xlab="Variances of TSSs in Putative Promoters (TSRs)\n (n=5097 genes) ", ylab="Density",col="blue2",xlim=c(0,100000))
dev.off()
###iterates over a findOverlaps() GRanges object and calculates the shannon entropy score and standard deviation for each TSR
tsr_overlaps <- function(overlaps,cagepGR,TSR) {
library('entropy')
overlaps_df <- as.data.frame(overlaps)
TSS_df <- as.data.frame(cagepGR)
TSR_df <- as.data.frame(TSR)
final_df <- array(NA,c(nrow(TSR_df),4))
for (i in unique(overlaps_df[,1])) {
i -> this_TSR
print(this_TSR)
which(overlaps_df[,1]==this_TSR) -> TSS_index
TSS_df[TSS_index,] -> TSS_slice
if (TSS_slice$strand[1]=="+") { #for positive strand
TSS_slice$start -> TSS_list
}
if (TSS_slice$strand[1]=="-") { #for negative strand
TSS_slice$end -> TSS_list
}
TSR_df$id[1] -> this_gene
as.numeric(TSS_list) -> TSS_list
var(TSS_list) -> var_TSS
#print(var_TSS) #for debugging
entropy(TSS_list,method="ML",unit="log2") -> shan_ent_TSS
#print(shan_ent_TSS) #for debugging
length(TSS_index) -> num_TSS
final_df[i,] <- c(this_gene,var_TSS,shan_ent_TSS,num_TSS)
}
final_df <- as.data.frame(final_df)
names(final_df) <- c("gene","var","entropy","nTSS") #adding the names to the data_frame
return(final_df)
}
@rtraborn
Copy link
Author

Fixing png() and par() so both panels render properly

@rtraborn
Copy link
Author

The second panel (SDs distribution) needs a closer look. I'll address this tomorrow.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment