Skip to content

Instantly share code, notes, and snippets.

@rtraborn
Last active August 29, 2015 14:17
Show Gist options
  • Save rtraborn/669f01a59797107ed6f7 to your computer and use it in GitHub Desktop.
Save rtraborn/669f01a59797107ed6f7 to your computer and use it in GitHub Desktop.
Frequency of various motif elements in proximal promoters of various plants

##These workflows analyze and visualize the frequency of various motif elements in the proximal promoters (and flanking regions) of Arabidopsis.

#written on 26 March 2015
#TODO: the sample plot is generated successfully; but the frequency data needs to be 'binned' (ie each 5bp) before plotting to so as to be most clear.
#getting the list of .table motif frequency files in the current directory (generated by the shell scripts)
tableFiles <- list.files(path=".",pattern="\\.table")
#reading the files into four separate data.frames bundled inside a list object using sapply
tableList <- sapply(tableFiles,FUN=read.table,simplify=FALSE)
#adding names to the data frames (these are known prior to running the script; change if using a different combination than the four described here
myNames <- c("At_Inr","At_TATA","At_TC","At_YY")
names(tableList) <- myNames
binData <- -1000:250
freqData_TATA <- tableList$At_TATA[,2]
freqData_Inr <- tableList$At_Inr[,2]
freqData_TC <- tableList$At_TC[,2]
freqData_YY <- tableList$At_YY[,2]
#positionVector <- seq(from=1, to=1250, length.out=250)
#for (i in 1:249){
# positionVector[i] -> this_num
# positionVector[i+1] -> this_limit
# which(binData >= this_num & binData < this_limit) -> my.index
# sum(freqData[my.index]) -> thisSum
# thisSum -> binArray[i,2]
# }
#which(binData >= positionVector[250]) -> final.index
#sum(freqData[final.index]) -> thisSum
#thisSum -> binArray[250,2]
png(file="figure_6_motif_freq.png",height=960,width=1200)
par(mfrow=c(2,2))
plot(binData,freqData_TATA,type="p",col="blue",xlab="Genomic position relative to the TSS (in bp)",ylab="Frequency of TATA motif",xlim=c(-1000,200))
plot(binData,freqData_Inr,type="p",col="red",xlab="Genomic position relative to the TSS (in bp)",ylab="Frequency of Inr motif",xlim=c(-1000,200))
plot(binData,freqData_TC,type="p",col="darkgreen",xlab="Genomic position relative to the TSS (in bp)",ylab="Frequency of TC motif",xlim=c(-1000,200))
plot(binData,freqData_YY,type="p",col="gold",xlab="Genomic position relative to the TSS (in bp)",ylab="Frequency of Y Patch motif",xlim=c(-1000,200))
dev.off()
#/usr/bin/sh
echo "Removing any residual .table files in the working directory"
rm *_motif.table
echo "Creating a fasta file from TSR intervals"
bedtools getfasta -fi /DATA/GROUP/rtraborn/plant_promoters/A_thaliana/genome/TAIR10_all_nuc_chr.fa -bed /DATA/GROUP/rtraborn/tsrchitect_test/atPEAT_TSRs_mid_flank.bed -name -s -fo atPEAT_TSRs_flank.fa
echo "Analyzing the frequency of the TC element in flanking sequence elements"
fuzznuc -sequence atPEAT_TSRs_flank.fa -pattern TTCTTC -rformat table -rextension .txt -outfile At_TC_motif_prom.out
sh fuzznuc_parse_TC.sh
echo "Analyzing the frequency of the TATA element in flanking sequence elements"
fuzznuc -sequence atPEAT_TSRs_flank.fa -pattern TATAWA -rformat table -rextension .txt -outfile At_TATA_motif_prom.out
sh fuzznuc_parse_TATA.sh
echo "Analyzing the frequency of the TATA element in flanking sequence elements"
fuzznuc -sequence atPEAT_TSRs_flank.fa -pattern YYANWYY -rformat table -rextension .txt -outfile At_Inr_motif_prom.out
sh fuzznuc_parse_Inr.sh
echo "Analyzing the frequency of the Y-patch element in flanking sequence elements"
fuzznuc -sequence atPEAT_TSRs_flank.fa -pattern YYYYYY -rformat table -rextension .txt -outfile At_YY_motif_prom.out
sh fuzznuc_parse_YY.sh
R CMD BATCH figure_6_workflow.R
echo "Job complete!"
#/usr/bin/sh
read -p 'thisVar'
echo $thisVar
echo "Parsing the fuzznuc table output report file"
perl -ne 'if(m/^ +(\d.+) */){ print "$1\n" }' < At_YY_motif_prom.out > this_inter.out
for i in {1..1251}; do
echo $i
countVar="$(egrep -c "^$i[[:space:]]+" this_inter.out)"
echo $countVar
myTab=$'\t'
IFS=$'\n'
myLine="$(echo $i $myTab $countVar $IFS)"
echo $myLine >> At_YY_motif.table
done
rm this_inter.out
#/usr/bin/sh
echo "Parsing the fuzznuc table output report file"
perl -ne 'if(m/^ +(\d.+) */){ print "$1\n" }' < At_Inr_motif_prom.out > this_inter.out
for i in {1..1251}; do
countVar="$(egrep -c "^$i[[:space:]]+" this_inter.out)"
myTab=$'\t'
IFS=$'\n'
myLine="$(echo $i $myTab $countVar $IFS)"
echo $myLine >> At_Inr_motif.table
done
rm this_inter.out
#/usr/bin/sh
echo "Parsing the fuzznuc table output report file"
perl -ne 'if(m/^ +(\d.+) */){ print "$1\n" }' < At_TATA_motif_prom.out > this_inter.out
for i in {1..1251}; do
countVar="$(egrep -c "^$i[[:space:]]+" this_inter.out)"
myTab=$'\t'
IFS=$'\n'
myLine="$(echo $i $myTab $countVar $IFS)"
echo $myLine >> At_TATA_motif.table
done
rm this_inter.out
#/usr/bin/sh
echo "Parsing the fuzznuc table output report file"
perl -ne 'if(m/^ +(\d.+) */){ print "$1\n" }' < At_TC_motif_prom.out > this_inter.out
for i in {1..1251}; do
countVar="$(egrep -c "^$i[[:space:]]+" this_inter.out)"
myTab=$'\t'
IFS=$'\n'
myLine="$(echo $i $myTab $countVar $IFS)"
echo $myLine >> At_TC_motif.table
done
rm this_inter.out
#/usr/bin/sh
echo "Parsing the fuzznuc table output report file"
perl -ne 'if(m/^ +(\d.+) */){ print "$1\n" }' < At_YY_motif_prom.out > this_inter.out
for i in {1..1251}; do
countVar="$(egrep -c "^$i[[:space:]]+" this_inter.out)"
myTab=$'\t'
IFS=$'\n'
myLine="$(echo $i $myTab $countVar $IFS)"
echo $myLine >> At_YY_motif.table
done
rm this_inter.out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment