Skip to content

Instantly share code, notes, and snippets.

@rtraborn
Last active August 29, 2015 14:13
Show Gist options
  • Save rtraborn/7f37e35a0c7589ab0f5d to your computer and use it in GitHub Desktop.
Save rtraborn/7f37e35a0c7589ab0f5d to your computer and use it in GitHub Desktop.
Figure 5 Figures

Validation of promoter annotations using DNA structural features

# Code needed to generate Figure 5, which is a validation or correlation between annotated TSRs and elements consistent with promoter
## Written on January 24, 2015
#loading the dependencies
library('GenomicRanges')
#setting the working directory as appropriate
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/")
#importing the putative promoter annotations that were made by TSRchitect
At_PEAT_TSRs <- read.table(file="atPEAT_TSRs_mid.bed",header=T)
#naming the columns from the above BED file
names(At_PEAT_TSRs) <- c("chr","start","end","id","Num_TSSs","strand","midpoint")
#making sure the file was imported properly
head(At_PEAT_TSRs)
#creating a GRanges object
TSRs_GR <- makeGRangesFromDataFrame(At_PEAT_TSRs,seqnames.field="chr",start.field="start",end.field="end",strand.field="strand")
#making sure the GRanges object was created successfully
TSRs_GR
#adding flanking sequence [-500,200] to the TSS (start) of the object
TSRs_fl_GR <- promoters(TSRs_GR,upstream=500,downstream=250)
#moving into the appropriate directory
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/at_stability_values")
#importing the PPred. Stability files
#read.table(file="at_Chr1_stb.txt",header=T) -> C1_stb
read.table(file="at_Chr2_stb.txt",header=T) -> C2_stb
#read.table(file="at_Chr3_stb.txt",header=T) -> C3_stb
#read.table(file="at_Chr4_stb.txt",header=T) -> C4_stb
#read.table(file="at_Chr5_stb.txt",header=T) -> C5_stb
#adding a new column to each dataframe
#C1_stb$chr <- "Chr1"
C2_stb$chr <- "Chr2"
#C3_stb$chr <- "Chr3"
#C4_stb$chr <- "Chr4"
#C5_stb$chr <- "Chr5"
#joining all five dataframes
#at_stb_df <- rbind(C1_stb,C2_stb,C3_stb,C4_stb,C5_stb)
#at_stb_df <- C1_stb #so far this is just for chromosome 1; need to remove this line once the appropriate unit testing has been completed
#at_stb_df
at_stb_df <- C2_stb
print(dim(at_stb_df))
#creating another GRanges object from the stability data
at_stb_GR <- GRanges(seqnames=at_stb_df$chr,ranges=IRanges(start=at_stb_df$coord,end=at_stb_df$coord),score=at_stb_df$stb_value)
#checking to see what the new file looks like
at_stb_GR
#extracting the overlaps
data_OL <- findOverlaps(TSRs_fl_GR,at_stb_GR,type="any")
#checking the new GRanges object
data_OL
#converting to a GRanges object
OL_df <- as.data.frame(data_OL)
print(head(OL_df))
#how many TSRs are there in the At_PEAT_TSRs file?
nStart <- min(OL_df$queryHits)
nTSRs <- max(OL_df$queryHits)
print(nTSRs) #for debugging
#creating a matrix that will contain all of the stability data
stb_matrix <- matrix(NA,nrow=length(nStart:nTSRs),ncol=750) #need to double-check / unit test this part (3/18)
#a loop that iterates over OL_df to retrieve the stability scores from the (giant) stb datafram ## also need to test this (3/18)
for (i in 1:length(nStart:nTSRs)){
unique(OL_df$queryHits) -> unique_vector
unique_vector[i] -> thisTSR
which(OL_df[,1]==thisTSR) -> q_index
at_stb_df[q_index,2] -> stb_vector
stb_vector -> stb_matrix[i,]
}
#checking to see what stb_matrix looks like
print(head(stb_matrix))
write.table(stb_matrix,file="stb_matrix_sample.txt",col.names=FALSE,row.names=FALSE)
#creating a vector with the average stablity value created at each position (from -500 to +250)
stb_mean_vec <- apply(stb_matrix,2,FUN=mean)
coord_vec <- -500:250
coord_vec <- coord_vec[-501] #removing the 'zero'
#final data frame
stb_ave_df <- data.frame(coord=coord_vec,stb=stb_mean_vec)
#writing it to a file
write.table(stb_ave_df,file="stb_averages_TSRs",quote=F,row.names=FALSE,col.names=TRUE)
#graphing the output from figure_5_workflow.R
stb_averages <- stb_ave_df
#writing the file in my data home directory
setwd("/DATA/GROUP/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_5/")
#naming the file and setting up the plotting device
png(file="figure_5_stb_plot.png",height=700,width=960)
plot.new()
plot(stb_averages$coord,stb_averages$stb,col="darkblue",type="l",xlab="Genomic position relative to the TSS",ylab="Average DNA stablity value (free energy)")
dev.off()
print("Figure 5 Complete!")
#converts all 5 TAIR ChrN_stb files into separate readable genome_coordinate files
source(file="stb_convert.R")
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/at_stability_values")
print("Importing PPred Stb Files")
read.table(file="TAIR10_chr1_stb.txt",header=TRUE,skip=3,row.names=1) -> Chr1_stb
read.table(file="TAIR10_chr2_stb.txt",header=TRUE,skip=3,row.names=1) -> Chr2_stb
read.table(file="TAIR10_chr3_stb.txt",header=TRUE,skip=3,row.names=1) -> Chr3_stb
read.table(file="TAIR10_chr4_stb.txt",header=TRUE,skip=3,row.names=1) -> Chr4_stb
read.table(file="TAIR10_chr5_stb.txt",header=TRUE,skip=3,row.names=1) -> Chr5_stb
print("Import Complete")
print("Coverting Chromosome 1")
stb_convert(Chr1_stb) -> Chr1_out
as.data.frame(Chr1_out) -> Chr1_out
c("coord","stb_value") -> names(Chr1_out)
print("Converting Chromosome 2")
stb_convert(Chr2_stb) -> Chr2_out
as.data.frame(Chr2_out) -> Chr2_out
c("coord","stb_value") -> names(Chr2_out)
print("Converting Chromosome 3")
stb_convert(Chr3_stb) -> Chr3_out
as.data.frame(Chr3_out) -> Chr3_out
c("coord","stb_value") -> names(Chr3_out)
print("Converting Chromosome 4")
stb_convert(Chr4_stb) -> Chr4_out
as.data.frame(Chr4_out) -> Chr4_out
c("coord","stb_value") -> names(Chr4_out)
print("Converting Chromosome 5")
stb_convert(Chr5_stb) -> Chr5_out
as.data.frame(Chr5_out) -> Chr5_out
c("coord","stb_value") -> names(Chr5_out)
print("Conversion complete")
print("Writing output tables")
write.table(Chr1_out,file="at_Chr1_stb.txt",quote=F,sep="\t",col.names=T,row.names=T)
write.table(Chr2_out,file="at_Chr2_stb.txt",quote=F,sep="\t",col.names=T,row.names=T)
write.table(Chr3_out,file="at_Chr3_stb.txt",quote=F,sep="\t",col.names=T,row.names=T)
write.table(Chr4_out,file="at_Chr4_stb.txt",quote=F,sep="\t",col.names=T,row.names=T)
write.table(Chr5_out,file="at_Chr5_stb.txt",quote=F,sep="\t",col.names=T,row.names=T)
print("Job Complete!")
#converts Prom Predict stb output files into a single table with nucleotide coordinates and stability values
#written on 1/23/15
stb_convert <- function(stb_file){
nRows <- nrow(stb_file)
#print(nRows)
Row_coord <- as.numeric(rownames(stb_file))
stb_length <- (nRows*986)
print(stb_length)
stb_array <- array(NA,c(stb_length,2))
#print(dim(stb_array))
stb_values <- as.numeric(stb_file[1,])
stb_array[1:986,2] <- stb_values
#print(head(stb_array))
coord_labels <- 7:(stb_length+6)
#print(length(coord_labels))
stb_array[,1] <- coord_labels
for (i in 2:nRows) {
as.numeric(Row_coord[i]-14) -> start_pos
as.numeric(start_pos+985) -> end_pos
as.numeric(stb_file[i,]) -> stb_values
#print(length(stb_values))
#print(start_pos)
#print(end_pos)
#print(length(start_pos:end_pos))
stb_values -> stb_array[start_pos:end_pos,2]
}
which(is.na(stb_array[,2])) -> out_index
stb_array <- stb_array[-out_index,]
return(stb_array)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment