Last active
August 29, 2015 14:10
-
-
Save rtraborn/0c8b8370ccb906f3df2f to your computer and use it in GitHub Desktop.
TSRchitect_Figures
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###Code to generate for Figure 1#### | |
#loading depdencies | |
source("http://bioconductor.org/biocLite.R") | |
biocLite() | |
#installing and loading the major package that we'll use | |
biocLite("Sushi") | |
library("Sushi") | |
#setting up the local directory | |
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/") | |
#loading the data to be analyzed | |
load(file="TSS_distribution_figure.RData") | |
##generating the figure itself## | |
#file name | |
png(file="Figure_1_TSS_distribution.png",width=1000,height=1200) | |
#setting up the layout of the graphical device | |
m <- rbind(1,1,1,1,1,1,2,3,3,3,3,3) | |
layout(m) | |
par(mar=c(1,5,1,1)) | |
#entering the coordinates and chromosome number | |
chromend <- 100000 | |
chromstart <- 50000 #switched from 75000 | |
chrom <- c("Chr1") | |
#setting up the local directory | |
setwd("/home/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_1") | |
#plotting the TSS on the plus strand | |
plotBedgraph(At_Peat_bedgraph_plus, chrom,chromstart,100000,addscale=TRUE,color=SushiColors(2)(2)[1],transparency=0.5) | |
#y-axis | |
axis(side=2,las=2,tcl=.2) | |
mtext("Read Depth",side=2,line=2.75,cex=1,font=2) | |
#legend | |
legend("topright",inset=0.025,legend=c("Positive Strand","Negative Strand"),fill=opaque(SushiColors(2)(2)),border=SushiColors(2)(2),text.font=2,cex=1.0) | |
#plotting the gene structure | |
plotBed(beddata=TAIR10_bed,chrom="chr1",chromstart=chromstart,chromend=chromend,color="darkgreen",row="auto",wiggle=0.05, splitstrand=FALSE, plotbg="lightgrey") | |
#plotting the TSS on the negative strand (flipped) | |
plotBedgraph(At_Peat_bedgraph_minus,chrom,chromstart,chromend,addscale=TRUE,color=SushiColors(2)(2)[2],flip=TRUE,transparency=0.5) | |
#plotting the genomic coordinates | |
labelgenome(chrom,chromstart,chromend,side=3,n=3,scale="Kb") | |
#labels for the lower part of the plot | |
axis(side=2,las=2,tcl=.2,at=pretty(par("yaxp")[c(1,2)]),labels=-1*pretty(par("yaxp")[c(1,2)])) | |
mtext("Read Depth",side=2,line=2.75,cex=1,font=2) | |
dev.off() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###Code to generate Figure 1b#### | |
#writeen on 1/25/2015 | |
#loading depdencies | |
source("http://bioconductor.org/biocLite.R") | |
#installing and loading the major package that we'll use | |
#biocLite("Sushi") #uncomment if you have not already installed Sushi | |
library("Sushi") | |
#setting up the local directory | |
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/") | |
#loading the data to be analyzed | |
load(file="TSS_distribution_figure.RData") | |
names(At_Peat_bedgraph_minus) <- c("chr","start","end","score") | |
#which(At_Peat_bedgraph_minus[,1]=="Chr2") -> Chr2_index | |
#At_Peat_bedgraph_minus[Chr2_index,] -> At_Peat_bedgraph_minus | |
#print(head(At_Peat_bedgraph_plus)) | |
#print(tail(At_Peat_bedgraph_minus)) | |
#At_Peat_bedgraph_minus[(At_Peat_bedgraph_minus$start>16568187) & (At_Peat_bedgraph_minus$start<16568951),] -> At_Peat_bedgraph_minus | |
#sweeping out signal outside the promoter region (a quick fix) | |
#c("Chr2",0,16568240,0) -> dummy_vector | |
#rbind(dummy_vector,At_Peat_bedgraph_minus) -> At_Peat_bedgraph_minus2 | |
#c(19698289) -> At_Peat_bedgraph_minus2[137,3] | |
#c(1:137) -> rownames(At_Peat_bedgraph_minus2) | |
#print(head(At_Peat_bedgraph_minus2)) | |
#print(tail(At_Peat_bedgraph_minus2)) | |
#print(is(At_Peat_bedgraph_minus2)) | |
At_exons <- read.table(file="TAIR10_exons.bed") | |
#print(head(At_exons)) | |
##generating the figure itself## | |
#file name | |
png(file="Figure_1b_TSR_arch.png",width=1000,height=1200) | |
#setting up the layout of the graphical device | |
m <- rbind(1,1,1,1,1,1,2) | |
layout(m) | |
par(mar=c(1,5,1,1)) | |
#entering the coordinates and chromosome number | |
chromstart <- 16570000 | |
chromend <- 16573000 | |
chrom <- c("Chr2") | |
#setting up the local directory | |
setwd("/home/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_1") | |
#plotting the TSS on the negative strand | |
plotBedgraph(At_Peat_bedgraph_minus,chrom,chromstart,chromend,addscale=TRUE,color=SushiColors(2)(2)[1],transparency=0.5) | |
#y-axis | |
axis(side=2,las=2,tcl=.2) | |
mtext("Read Depth",side=2,line=2.75,cex=1,font=2) | |
#legend | |
#legend("topright",inset=0.025,legend=c("Positive Strand","Negative Strand"),fill=opaque(SushiColors(2)(2)),border=SushiColors(2)(2),text.font=2,cex=1.0) | |
#plotting the gene structure | |
plotBed(beddata=At_exons,chrom="Chr2",chromstart=chromstart,chromend=chromend,color="darkgreen",row="auto",wiggle=0.05, splitstrand=FALSE, plotbg="lightgrey") | |
#plotting the TSS on the positive strand | |
#plotBedgraph(At_Peat_bedgraph_plus,chrom,chromstart,chromend,addscale=TRUE,color=SushiColors(2)(2)[2],flip=TRUE,transparency=0.5) | |
#plotting the genomic coordinates | |
labelgenome(chrom,chromstart,chromend,side=3,n=3,scale="Kb") | |
#labels for the lower part of the plot | |
#axis(side=2,las=2,tcl=.2,at=pretty(par("yaxp")[c(1,2)]),labels=-1*pretty(par("yaxp")[c(1,2)])) | |
#mtext("Read Depth",side=2,line=2.75,cex=1,font=2) | |
dev.off() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###Code to generate Figure 1c#### | |
#written on 2/1/2015 | |
#setting up the local directory | |
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/") | |
#loading ggplot | |
library(ggplot2) | |
At_PEAT_out <- read.table(file="At_PEAT_output.txt",header=TRUE) | |
head(At_PEAT_out) | |
##generating the figure itself | |
#setting a new working directory | |
setwd("/home/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_1") | |
#file name | |
png(file="Figure_1c_TSR_properties.png",width=1000,height=1200) | |
m <- ggplot(At_PEAT_out,aes(x=TSR_Breadth)) | |
m + geom_histogram(colour="black",fill="orange2",binwidth=10) + xlim(0,250) + labs(x="Promoter breadth in base pairs (bp)",y="Number of promoters \n (n=5,097") | |
dev.off() #this will create the figure | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###Code to generate Figure 1d#### | |
#This showns a rice gene -- LOC_Os01g36930 -- and its TSS/promoter | |
#written on 2/3/2015 | |
#loading depdencies | |
source("http://bioconductor.org/biocLite.R") | |
#installing and loading the major package that we'll use | |
#biocLite("Sushi") #uncomment if you have not already installed Sushi | |
library("Sushi") | |
#setting up the local directory | |
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/") | |
#loading the data to be analyzed | |
osTSR <- read.table(file="os_example_gene.bed",header=TRUE) | |
osGenes <- read.table(file="o_sativa_genes.bed",header=FALSE) | |
##generating the figure itself## | |
#file name | |
png(file="Figure_1d_osTSR_arch.png",width=1000,height=1200) | |
#setting up the layout of the graphical device | |
# m <- rbind(1,1,1,1,1,1,2) | |
# layout(m) | |
# par(mar=c(1,5,1,1)) | |
#entering the coordinates and chromosome number | |
chromstart <- 20580000 | |
chromend <- 20590000 | |
chrom <- c("Chr1") | |
#setting up the local directory | |
setwd("/home/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_1") | |
#plotting the TSS on the negative strand | |
plot(hist(osTSR$start),xlim=c(chromstart,chromend),col="blue") | |
#y-axis | |
axis(side=2,las=2,tcl=.2) | |
#mtext("Read Depth",side=2,line=2.75,cex=1,font=2) | |
#plotting the gene structure | |
#plotBed(beddata=osGenes,chrom="Chr1",chromstart=chromstart,chromend=chromend,color="darkgreen",row="auto",wiggle=0.05, splitstrand=FALSE, plotbg="lightgrey") | |
#plotting the genomic coordinates | |
#labelgenome(chrom,chromstart,chromend,side=3,n=3,scale="Kb") | |
dev.off() | |
##need to create bedgraph file from rice EST data. Revisit later. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Code needed to generate Figure 1e, which shows the fraction of promoters in each class (i.e. TATA, TATA-less) | |
#loading the dependencies | |
library('ggplot2') | |
#setting the working directory as appropriate | |
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/") | |
#importing the appropriate data files | |
atTSR_TATA <- read.table(file="distribution_TSR_TATA_output.txt",header=FALSE) | |
atTSR_Inr <- read.table(file="distribution_TSR_Inr_output.txt",header=FALSE) | |
atTSR_Ypatch <- read.table(file="distribution_TSR_YPatch_output.txt",header=FALSE) | |
#naming the appropriate columns for all three comparison files | |
names(atTSR_TATA) <- c("chr","start","end","name", "location","strand","midpoint","chr2","start2","end2","name2","score","strand2","distance") | |
names(atTSR_Inr) <- c("chr","start","end","name", "location","strand","midpoint","chr2","start2","end2","name2","score","strand2","distance") | |
names(atTSR_Ypatch) <- c("chr","start","end","name", "location","strand","midpoint","chr2","start2","end2","name2","score","strand2","distance") | |
atTATA_genes_index <- which(atTSR_TATA$distance < 0 & atTSR_TATA$distance > -200) | |
atTATA_genes <- atTSR_TATA[atTATA_genes_index,] | |
nrow(atTATA_genes) | |
nrow(atTSR_TATA) | |
#atInr_genes_index <- which(atTSR_Inr$distance < 0 & atTSR_Inr$distance > -200) | |
atTSR_TATA$TATA <- c(rep("TATA-less",nrow(atTSR_TATA))) | |
atTSR_TATA$TATA[atTATA_genes_index] <- c("TATA") | |
#atTSR_TATA$Inr <- c("Inr-less") | |
#atTSR_TATA$Inr[atInr_genes_index] <- c("Inr") | |
#atTSR_TATA$class[atTATA_genes_index] <- 1 | |
#atTSR_TATA$class <- 0 | |
atTSR_TATA$feature <- c("Promoter") | |
#changing the working directory | |
setwd("/home/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_1/") | |
#naming the figure | |
png(file="figure_1e_promoter_class.png") | |
#plotting the figure | |
qplot(factor(feature),data=atTSR_TATA,fill=factor(TATA)) | |
#ggplot(atTSR_TATA,aes(x=feature,y=TATA,width=20)) + geom_bar() | |
#this will create the figure in the new working directory | |
dev.off() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##creates a pileup heatmap of motifs observed in promoters | |
## Created on 13 March 2015 | |
#Dependencies (uncomment when necessary) | |
#install.packages( c("data.table","plyr","reshape2","ggplot2","gridBase","devtools")) | |
#source("http://bioconductor.org/biocLite.R") | |
#biocLite(c("GenomicRanges","rtracklayer","impute","Rsamtools")) | |
#install the packages | |
library(devtools) | |
library(impute) | |
library(GenomicRanges) | |
#install the data package to be able to run examples in the vignette | |
#install_github("frenkiboy/genomationData",build_vignettes=FALSE) | |
library(genomation) | |
# create GRanges objects for TSR and TATA elements, respectively | |
#set working directory: | |
setwd("/DATA/GROUP/rtraborn/tsrchitect_test") | |
TATA_bed <- read.table(file="TATA_1k_window.bed",header=FALSE) | |
TATA.ind <- which(TATA_bed[,7] >= -250 & TATA_bed[,7]<=50) | |
total_index <- 1:nrow(TATA_bed) | |
no.TATA <- total_index[-TATA.ind] | |
print(head(no.TATA)) | |
print(length(no.TATA)) | |
names(TATA_bed) <- c("chr","start", "end","id","score","strand","distance") | |
#importing the PEAT .bam file | |
bam.file <- system.file('/extdata//peat.sorted.bam', package='genomation') | |
#importing the TSRs intervals (vs 1k windows) file | |
TSRs_intervals <- read.table(file="atPEAT_TSRs.bed",header=FALSE) | |
#naming the columns from the above BED file | |
names(TSRs_intervals) <- c("chr","start","end","id","Num_TSSs","strand","midpoint") | |
#importing the putative promoter annotations that were made by TSRchitect | |
At_PEAT_TSRs <- read.table(file="atPEAT_TSRs_mid_flank2.bed",header=FALSE) | |
#naming the columns from the above BED file | |
names(At_PEAT_TSRs) <- c("chr","start","end","id","Num_TSSs","strand","midpoint") | |
#creating a GenomicRanges object from the At_PEAT_TSRs bed file | |
TSRs_inv <- with(TSRs_intervals, GRanges(chr, IRanges(start, end), strand, midpoint, id=id)) | |
TSRs_inv | |
TSR_breadths <- width(TSRs_inv) | |
total_index <- 1:length(TSR_breadths) | |
TSR_peaked <- which(TSR_breadths < 50) | |
TSR_broad <- total_index[-TSR_peaked] | |
#creating a GenomicRanges object from the At_PEAT_TSRs bed file | |
TSRs_bed <- with(At_PEAT_TSRs, GRanges(chr, IRanges(start, end), strand, midpoint, id=id)) | |
promoters <- TSRs_bed | |
#creating a score matrix | |
sm <- ScoreMatrix(target=bam.file,window=promoters, type = "bam",strand.aware=TRUE) | |
sm.scaled = scaleScoreMatrix(sm) | |
#visualizing the overlaps using a heat map | |
png(file="PEAT_pileup_heatmap.png",height=960,width=960) | |
heatMatrix(sm.scaled,xcoords=c(-1000,1000),group=list(Peaked=TSR_peaked,Broad=TSR_broad),order=TRUE,col = c("lightgray","blue")) | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment