Skip to content

Instantly share code, notes, and snippets.

@rtraborn
Last active August 29, 2015 14:10
Show Gist options
  • Save rtraborn/0c8b8370ccb906f3df2f to your computer and use it in GitHub Desktop.
Save rtraborn/0c8b8370ccb906f3df2f to your computer and use it in GitHub Desktop.
TSRchitect_Figures
###Code to generate for Figure 1####
#loading depdencies
source("http://bioconductor.org/biocLite.R")
biocLite()
#installing and loading the major package that we'll use
biocLite("Sushi")
library("Sushi")
#setting up the local directory
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/")
#loading the data to be analyzed
load(file="TSS_distribution_figure.RData")
##generating the figure itself##
#file name
png(file="Figure_1_TSS_distribution.png",width=1000,height=1200)
#setting up the layout of the graphical device
m <- rbind(1,1,1,1,1,1,2,3,3,3,3,3)
layout(m)
par(mar=c(1,5,1,1))
#entering the coordinates and chromosome number
chromend <- 100000
chromstart <- 50000 #switched from 75000
chrom <- c("Chr1")
#setting up the local directory
setwd("/home/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_1")
#plotting the TSS on the plus strand
plotBedgraph(At_Peat_bedgraph_plus, chrom,chromstart,100000,addscale=TRUE,color=SushiColors(2)(2)[1],transparency=0.5)
#y-axis
axis(side=2,las=2,tcl=.2)
mtext("Read Depth",side=2,line=2.75,cex=1,font=2)
#legend
legend("topright",inset=0.025,legend=c("Positive Strand","Negative Strand"),fill=opaque(SushiColors(2)(2)),border=SushiColors(2)(2),text.font=2,cex=1.0)
#plotting the gene structure
plotBed(beddata=TAIR10_bed,chrom="chr1",chromstart=chromstart,chromend=chromend,color="darkgreen",row="auto",wiggle=0.05, splitstrand=FALSE, plotbg="lightgrey")
#plotting the TSS on the negative strand (flipped)
plotBedgraph(At_Peat_bedgraph_minus,chrom,chromstart,chromend,addscale=TRUE,color=SushiColors(2)(2)[2],flip=TRUE,transparency=0.5)
#plotting the genomic coordinates
labelgenome(chrom,chromstart,chromend,side=3,n=3,scale="Kb")
#labels for the lower part of the plot
axis(side=2,las=2,tcl=.2,at=pretty(par("yaxp")[c(1,2)]),labels=-1*pretty(par("yaxp")[c(1,2)]))
mtext("Read Depth",side=2,line=2.75,cex=1,font=2)
dev.off()
###Code to generate Figure 1b####
#writeen on 1/25/2015
#loading depdencies
source("http://bioconductor.org/biocLite.R")
#installing and loading the major package that we'll use
#biocLite("Sushi") #uncomment if you have not already installed Sushi
library("Sushi")
#setting up the local directory
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/")
#loading the data to be analyzed
load(file="TSS_distribution_figure.RData")
names(At_Peat_bedgraph_minus) <- c("chr","start","end","score")
#which(At_Peat_bedgraph_minus[,1]=="Chr2") -> Chr2_index
#At_Peat_bedgraph_minus[Chr2_index,] -> At_Peat_bedgraph_minus
#print(head(At_Peat_bedgraph_plus))
#print(tail(At_Peat_bedgraph_minus))
#At_Peat_bedgraph_minus[(At_Peat_bedgraph_minus$start>16568187) & (At_Peat_bedgraph_minus$start<16568951),] -> At_Peat_bedgraph_minus
#sweeping out signal outside the promoter region (a quick fix)
#c("Chr2",0,16568240,0) -> dummy_vector
#rbind(dummy_vector,At_Peat_bedgraph_minus) -> At_Peat_bedgraph_minus2
#c(19698289) -> At_Peat_bedgraph_minus2[137,3]
#c(1:137) -> rownames(At_Peat_bedgraph_minus2)
#print(head(At_Peat_bedgraph_minus2))
#print(tail(At_Peat_bedgraph_minus2))
#print(is(At_Peat_bedgraph_minus2))
At_exons <- read.table(file="TAIR10_exons.bed")
#print(head(At_exons))
##generating the figure itself##
#file name
png(file="Figure_1b_TSR_arch.png",width=1000,height=1200)
#setting up the layout of the graphical device
m <- rbind(1,1,1,1,1,1,2)
layout(m)
par(mar=c(1,5,1,1))
#entering the coordinates and chromosome number
chromstart <- 16570000
chromend <- 16573000
chrom <- c("Chr2")
#setting up the local directory
setwd("/home/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_1")
#plotting the TSS on the negative strand
plotBedgraph(At_Peat_bedgraph_minus,chrom,chromstart,chromend,addscale=TRUE,color=SushiColors(2)(2)[1],transparency=0.5)
#y-axis
axis(side=2,las=2,tcl=.2)
mtext("Read Depth",side=2,line=2.75,cex=1,font=2)
#legend
#legend("topright",inset=0.025,legend=c("Positive Strand","Negative Strand"),fill=opaque(SushiColors(2)(2)),border=SushiColors(2)(2),text.font=2,cex=1.0)
#plotting the gene structure
plotBed(beddata=At_exons,chrom="Chr2",chromstart=chromstart,chromend=chromend,color="darkgreen",row="auto",wiggle=0.05, splitstrand=FALSE, plotbg="lightgrey")
#plotting the TSS on the positive strand
#plotBedgraph(At_Peat_bedgraph_plus,chrom,chromstart,chromend,addscale=TRUE,color=SushiColors(2)(2)[2],flip=TRUE,transparency=0.5)
#plotting the genomic coordinates
labelgenome(chrom,chromstart,chromend,side=3,n=3,scale="Kb")
#labels for the lower part of the plot
#axis(side=2,las=2,tcl=.2,at=pretty(par("yaxp")[c(1,2)]),labels=-1*pretty(par("yaxp")[c(1,2)]))
#mtext("Read Depth",side=2,line=2.75,cex=1,font=2)
dev.off()
###Code to generate Figure 1c####
#written on 2/1/2015
#setting up the local directory
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/")
#loading ggplot
library(ggplot2)
At_PEAT_out <- read.table(file="At_PEAT_output.txt",header=TRUE)
head(At_PEAT_out)
##generating the figure itself
#setting a new working directory
setwd("/home/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_1")
#file name
png(file="Figure_1c_TSR_properties.png",width=1000,height=1200)
m <- ggplot(At_PEAT_out,aes(x=TSR_Breadth))
m + geom_histogram(colour="black",fill="orange2",binwidth=10) + xlim(0,250) + labs(x="Promoter breadth in base pairs (bp)",y="Number of promoters \n (n=5,097")
dev.off() #this will create the figure
###Code to generate Figure 1d####
#This showns a rice gene -- LOC_Os01g36930 -- and its TSS/promoter
#written on 2/3/2015
#loading depdencies
source("http://bioconductor.org/biocLite.R")
#installing and loading the major package that we'll use
#biocLite("Sushi") #uncomment if you have not already installed Sushi
library("Sushi")
#setting up the local directory
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/")
#loading the data to be analyzed
osTSR <- read.table(file="os_example_gene.bed",header=TRUE)
osGenes <- read.table(file="o_sativa_genes.bed",header=FALSE)
##generating the figure itself##
#file name
png(file="Figure_1d_osTSR_arch.png",width=1000,height=1200)
#setting up the layout of the graphical device
# m <- rbind(1,1,1,1,1,1,2)
# layout(m)
# par(mar=c(1,5,1,1))
#entering the coordinates and chromosome number
chromstart <- 20580000
chromend <- 20590000
chrom <- c("Chr1")
#setting up the local directory
setwd("/home/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_1")
#plotting the TSS on the negative strand
plot(hist(osTSR$start),xlim=c(chromstart,chromend),col="blue")
#y-axis
axis(side=2,las=2,tcl=.2)
#mtext("Read Depth",side=2,line=2.75,cex=1,font=2)
#plotting the gene structure
#plotBed(beddata=osGenes,chrom="Chr1",chromstart=chromstart,chromend=chromend,color="darkgreen",row="auto",wiggle=0.05, splitstrand=FALSE, plotbg="lightgrey")
#plotting the genomic coordinates
#labelgenome(chrom,chromstart,chromend,side=3,n=3,scale="Kb")
dev.off()
##need to create bedgraph file from rice EST data. Revisit later.
# Code needed to generate Figure 1e, which shows the fraction of promoters in each class (i.e. TATA, TATA-less)
#loading the dependencies
library('ggplot2')
#setting the working directory as appropriate
setwd("/DATA/GROUP/rtraborn/tsrchitect_test/")
#importing the appropriate data files
atTSR_TATA <- read.table(file="distribution_TSR_TATA_output.txt",header=FALSE)
atTSR_Inr <- read.table(file="distribution_TSR_Inr_output.txt",header=FALSE)
atTSR_Ypatch <- read.table(file="distribution_TSR_YPatch_output.txt",header=FALSE)
#naming the appropriate columns for all three comparison files
names(atTSR_TATA) <- c("chr","start","end","name", "location","strand","midpoint","chr2","start2","end2","name2","score","strand2","distance")
names(atTSR_Inr) <- c("chr","start","end","name", "location","strand","midpoint","chr2","start2","end2","name2","score","strand2","distance")
names(atTSR_Ypatch) <- c("chr","start","end","name", "location","strand","midpoint","chr2","start2","end2","name2","score","strand2","distance")
atTATA_genes_index <- which(atTSR_TATA$distance < 0 & atTSR_TATA$distance > -200)
atTATA_genes <- atTSR_TATA[atTATA_genes_index,]
nrow(atTATA_genes)
nrow(atTSR_TATA)
#atInr_genes_index <- which(atTSR_Inr$distance < 0 & atTSR_Inr$distance > -200)
atTSR_TATA$TATA <- c(rep("TATA-less",nrow(atTSR_TATA)))
atTSR_TATA$TATA[atTATA_genes_index] <- c("TATA")
#atTSR_TATA$Inr <- c("Inr-less")
#atTSR_TATA$Inr[atInr_genes_index] <- c("Inr")
#atTSR_TATA$class[atTATA_genes_index] <- 1
#atTSR_TATA$class <- 0
atTSR_TATA$feature <- c("Promoter")
#changing the working directory
setwd("/home/rtraborn/manuscripts/figure_repos/tsrchitect_figures/Figure_1/")
#naming the figure
png(file="figure_1e_promoter_class.png")
#plotting the figure
qplot(factor(feature),data=atTSR_TATA,fill=factor(TATA))
#ggplot(atTSR_TATA,aes(x=feature,y=TATA,width=20)) + geom_bar()
#this will create the figure in the new working directory
dev.off()
##creates a pileup heatmap of motifs observed in promoters
## Created on 13 March 2015
#Dependencies (uncomment when necessary)
#install.packages( c("data.table","plyr","reshape2","ggplot2","gridBase","devtools"))
#source("http://bioconductor.org/biocLite.R")
#biocLite(c("GenomicRanges","rtracklayer","impute","Rsamtools"))
#install the packages
library(devtools)
library(impute)
library(GenomicRanges)
#install the data package to be able to run examples in the vignette
#install_github("frenkiboy/genomationData",build_vignettes=FALSE)
library(genomation)
# create GRanges objects for TSR and TATA elements, respectively
#set working directory:
setwd("/DATA/GROUP/rtraborn/tsrchitect_test")
TATA_bed <- read.table(file="TATA_1k_window.bed",header=FALSE)
TATA.ind <- which(TATA_bed[,7] >= -250 & TATA_bed[,7]<=50)
total_index <- 1:nrow(TATA_bed)
no.TATA <- total_index[-TATA.ind]
print(head(no.TATA))
print(length(no.TATA))
names(TATA_bed) <- c("chr","start", "end","id","score","strand","distance")
#importing the PEAT .bam file
bam.file <- system.file('/extdata//peat.sorted.bam', package='genomation')
#importing the TSRs intervals (vs 1k windows) file
TSRs_intervals <- read.table(file="atPEAT_TSRs.bed",header=FALSE)
#naming the columns from the above BED file
names(TSRs_intervals) <- c("chr","start","end","id","Num_TSSs","strand","midpoint")
#importing the putative promoter annotations that were made by TSRchitect
At_PEAT_TSRs <- read.table(file="atPEAT_TSRs_mid_flank2.bed",header=FALSE)
#naming the columns from the above BED file
names(At_PEAT_TSRs) <- c("chr","start","end","id","Num_TSSs","strand","midpoint")
#creating a GenomicRanges object from the At_PEAT_TSRs bed file
TSRs_inv <- with(TSRs_intervals, GRanges(chr, IRanges(start, end), strand, midpoint, id=id))
TSRs_inv
TSR_breadths <- width(TSRs_inv)
total_index <- 1:length(TSR_breadths)
TSR_peaked <- which(TSR_breadths < 50)
TSR_broad <- total_index[-TSR_peaked]
#creating a GenomicRanges object from the At_PEAT_TSRs bed file
TSRs_bed <- with(At_PEAT_TSRs, GRanges(chr, IRanges(start, end), strand, midpoint, id=id))
promoters <- TSRs_bed
#creating a score matrix
sm <- ScoreMatrix(target=bam.file,window=promoters, type = "bam",strand.aware=TRUE)
sm.scaled = scaleScoreMatrix(sm)
#visualizing the overlaps using a heat map
png(file="PEAT_pileup_heatmap.png",height=960,width=960)
heatMatrix(sm.scaled,xcoords=c(-1000,1000),group=list(Peaked=TSR_peaked,Broad=TSR_broad),order=TRUE,col = c("lightgray","blue"))
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment