Last active
August 29, 2015 14:13
-
-
Save rtraborn/7f37e35a0c7589ab0f5d to your computer and use it in GitHub Desktop.
Figure 5 Figures
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Code needed to generate Figure 5, which is a validation or correlation between annotated TSRs and elements consistent with promoter | |
## Written on January 24, 2015 | |
#loading the dependencies | |
library('GenomicRanges') | |
#setting the working directory as appropriate | |
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/") | |
#importing the putative promoter annotations that were made by TSRchitect | |
At_PEAT_TSRs <- read.table(file="atPEAT_TSRs_mid.bed",header=T) | |
#naming the columns from the above BED file | |
names(At_PEAT_TSRs) <- c("chr","start","end","id","Num_TSSs","strand","midpoint") | |
#making sure the file was imported properly | |
head(At_PEAT_TSRs) | |
#creating a GRanges object | |
TSRs_GR <- makeGRangesFromDataFrame(At_PEAT_TSRs,seqnames.field="chr",start.field="start",end.field="end",strand.field="strand") | |
#making sure the GRanges object was created successfully | |
TSRs_GR | |
#adding flanking sequence [-500,200] to the TSS (start) of the object | |
TSRs_fl_GR <- promoters(TSRs_GR,upstream=500,downstream=250) | |
#moving into the appropriate directory | |
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/at_stability_values") | |
#importing the PPred. Stability files | |
#read.table(file="at_Chr1_stb.txt",header=T) -> C1_stb | |
read.table(file="at_Chr2_stb.txt",header=T) -> C2_stb | |
#read.table(file="at_Chr3_stb.txt",header=T) -> C3_stb | |
#read.table(file="at_Chr4_stb.txt",header=T) -> C4_stb | |
#read.table(file="at_Chr5_stb.txt",header=T) -> C5_stb | |
#adding a new column to each dataframe | |
#C1_stb$chr <- "Chr1" | |
C2_stb$chr <- "Chr2" | |
#C3_stb$chr <- "Chr3" | |
#C4_stb$chr <- "Chr4" | |
#C5_stb$chr <- "Chr5" | |
#joining all five dataframes | |
#at_stb_df <- rbind(C1_stb,C2_stb,C3_stb,C4_stb,C5_stb) | |
#at_stb_df <- C1_stb #so far this is just for chromosome 1; need to remove this line once the appropriate unit testing has been completed | |
#at_stb_df | |
at_stb_df <- C2_stb | |
print(dim(at_stb_df)) | |
#creating another GRanges object from the stability data | |
at_stb_GR <- GRanges(seqnames=at_stb_df$chr,ranges=IRanges(start=at_stb_df$coord,end=at_stb_df$coord),score=at_stb_df$stb_value) | |
#checking to see what the new file looks like | |
at_stb_GR | |
#extracting the overlaps | |
data_OL <- findOverlaps(TSRs_fl_GR,at_stb_GR,type="any") | |
#checking the new GRanges object | |
data_OL | |
#converting to a GRanges object | |
OL_df <- as.data.frame(data_OL) | |
print(head(OL_df)) | |
#how many TSRs are there in the At_PEAT_TSRs file? | |
nStart <- min(OL_df$queryHits) | |
nTSRs <- max(OL_df$queryHits) | |
print(nTSRs) #for debugging | |
#creating a matrix that will contain all of the stability data | |
stb_matrix <- matrix(NA,nrow=length(nStart:nTSRs),ncol=750) #need to double-check / unit test this part (3/18) | |
#a loop that iterates over OL_df to retrieve the stability scores from the (giant) stb datafram ## also need to test this (3/18) | |
for (i in 1:length(nStart:nTSRs)){ | |
unique(OL_df$queryHits) -> unique_vector | |
unique_vector[i] -> thisTSR | |
which(OL_df[,1]==thisTSR) -> q_index | |
at_stb_df[q_index,2] -> stb_vector | |
stb_vector -> stb_matrix[i,] | |
} | |
#checking to see what stb_matrix looks like | |
print(head(stb_matrix)) | |
write.table(stb_matrix,file="stb_matrix_sample.txt",col.names=FALSE,row.names=FALSE) | |
#creating a vector with the average stablity value created at each position (from -500 to +250) | |
stb_mean_vec <- apply(stb_matrix,2,FUN=mean) | |
coord_vec <- -500:250 | |
coord_vec <- coord_vec[-501] #removing the 'zero' | |
#final data frame | |
stb_ave_df <- data.frame(coord=coord_vec,stb=stb_mean_vec) | |
#writing it to a file | |
write.table(stb_ave_df,file="stb_averages_TSRs",quote=F,row.names=FALSE,col.names=TRUE) | |
#graphing the output from figure_5_workflow.R | |
stb_averages <- stb_ave_df | |
#writing the file in my data home directory | |
setwd("/DATA/GROUP/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_5/") | |
#naming the file and setting up the plotting device | |
png(file="figure_5_stb_plot.png",height=700,width=960) | |
plot.new() | |
plot(stb_averages$coord,stb_averages$stb,col="darkblue",type="l",xlab="Genomic position relative to the TSS",ylab="Average DNA stablity value (free energy)") | |
dev.off() | |
print("Figure 5 Complete!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#converts all 5 TAIR ChrN_stb files into separate readable genome_coordinate files | |
source(file="stb_convert.R") | |
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/at_stability_values") | |
print("Importing PPred Stb Files") | |
read.table(file="TAIR10_chr1_stb.txt",header=TRUE,skip=3,row.names=1) -> Chr1_stb | |
read.table(file="TAIR10_chr2_stb.txt",header=TRUE,skip=3,row.names=1) -> Chr2_stb | |
read.table(file="TAIR10_chr3_stb.txt",header=TRUE,skip=3,row.names=1) -> Chr3_stb | |
read.table(file="TAIR10_chr4_stb.txt",header=TRUE,skip=3,row.names=1) -> Chr4_stb | |
read.table(file="TAIR10_chr5_stb.txt",header=TRUE,skip=3,row.names=1) -> Chr5_stb | |
print("Import Complete") | |
print("Coverting Chromosome 1") | |
stb_convert(Chr1_stb) -> Chr1_out | |
as.data.frame(Chr1_out) -> Chr1_out | |
c("coord","stb_value") -> names(Chr1_out) | |
print("Converting Chromosome 2") | |
stb_convert(Chr2_stb) -> Chr2_out | |
as.data.frame(Chr2_out) -> Chr2_out | |
c("coord","stb_value") -> names(Chr2_out) | |
print("Converting Chromosome 3") | |
stb_convert(Chr3_stb) -> Chr3_out | |
as.data.frame(Chr3_out) -> Chr3_out | |
c("coord","stb_value") -> names(Chr3_out) | |
print("Converting Chromosome 4") | |
stb_convert(Chr4_stb) -> Chr4_out | |
as.data.frame(Chr4_out) -> Chr4_out | |
c("coord","stb_value") -> names(Chr4_out) | |
print("Converting Chromosome 5") | |
stb_convert(Chr5_stb) -> Chr5_out | |
as.data.frame(Chr5_out) -> Chr5_out | |
c("coord","stb_value") -> names(Chr5_out) | |
print("Conversion complete") | |
print("Writing output tables") | |
write.table(Chr1_out,file="at_Chr1_stb.txt",quote=F,sep="\t",col.names=T,row.names=T) | |
write.table(Chr2_out,file="at_Chr2_stb.txt",quote=F,sep="\t",col.names=T,row.names=T) | |
write.table(Chr3_out,file="at_Chr3_stb.txt",quote=F,sep="\t",col.names=T,row.names=T) | |
write.table(Chr4_out,file="at_Chr4_stb.txt",quote=F,sep="\t",col.names=T,row.names=T) | |
write.table(Chr5_out,file="at_Chr5_stb.txt",quote=F,sep="\t",col.names=T,row.names=T) | |
print("Job Complete!") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#converts Prom Predict stb output files into a single table with nucleotide coordinates and stability values | |
#written on 1/23/15 | |
stb_convert <- function(stb_file){ | |
nRows <- nrow(stb_file) | |
#print(nRows) | |
Row_coord <- as.numeric(rownames(stb_file)) | |
stb_length <- (nRows*986) | |
print(stb_length) | |
stb_array <- array(NA,c(stb_length,2)) | |
#print(dim(stb_array)) | |
stb_values <- as.numeric(stb_file[1,]) | |
stb_array[1:986,2] <- stb_values | |
#print(head(stb_array)) | |
coord_labels <- 7:(stb_length+6) | |
#print(length(coord_labels)) | |
stb_array[,1] <- coord_labels | |
for (i in 2:nRows) { | |
as.numeric(Row_coord[i]-14) -> start_pos | |
as.numeric(start_pos+985) -> end_pos | |
as.numeric(stb_file[i,]) -> stb_values | |
#print(length(stb_values)) | |
#print(start_pos) | |
#print(end_pos) | |
#print(length(start_pos:end_pos)) | |
stb_values -> stb_array[start_pos:end_pos,2] | |
} | |
which(is.na(stb_array[,2])) -> out_index | |
stb_array <- stb_array[-out_index,] | |
return(stb_array) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment