##These workflows analyze and visualize the frequency of various motif elements in the proximal promoters (and flanking regions) of Arabidopsis.
Last active
August 29, 2015 14:17
-
-
Save rtraborn/669f01a59797107ed6f7 to your computer and use it in GitHub Desktop.
Frequency of various motif elements in proximal promoters of various plants
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#written on 26 March 2015 | |
#TODO: the sample plot is generated successfully; but the frequency data needs to be 'binned' (ie each 5bp) before plotting to so as to be most clear. | |
#getting the list of .table motif frequency files in the current directory (generated by the shell scripts) | |
tableFiles <- list.files(path=".",pattern="\\.table") | |
#reading the files into four separate data.frames bundled inside a list object using sapply | |
tableList <- sapply(tableFiles,FUN=read.table,simplify=FALSE) | |
#adding names to the data frames (these are known prior to running the script; change if using a different combination than the four described here | |
myNames <- c("At_Inr","At_TATA","At_TC","At_YY") | |
names(tableList) <- myNames | |
binData <- -1000:250 | |
freqData_TATA <- tableList$At_TATA[,2] | |
freqData_Inr <- tableList$At_Inr[,2] | |
freqData_TC <- tableList$At_TC[,2] | |
freqData_YY <- tableList$At_YY[,2] | |
#positionVector <- seq(from=1, to=1250, length.out=250) | |
#for (i in 1:249){ | |
# positionVector[i] -> this_num | |
# positionVector[i+1] -> this_limit | |
# which(binData >= this_num & binData < this_limit) -> my.index | |
# sum(freqData[my.index]) -> thisSum | |
# thisSum -> binArray[i,2] | |
# } | |
#which(binData >= positionVector[250]) -> final.index | |
#sum(freqData[final.index]) -> thisSum | |
#thisSum -> binArray[250,2] | |
png(file="figure_6_motif_freq.png",height=960,width=1200) | |
par(mfrow=c(2,2)) | |
plot(binData,freqData_TATA,type="p",col="blue",xlab="Genomic position relative to the TSS (in bp)",ylab="Frequency of TATA motif",xlim=c(-1000,200)) | |
plot(binData,freqData_Inr,type="p",col="red",xlab="Genomic position relative to the TSS (in bp)",ylab="Frequency of Inr motif",xlim=c(-1000,200)) | |
plot(binData,freqData_TC,type="p",col="darkgreen",xlab="Genomic position relative to the TSS (in bp)",ylab="Frequency of TC motif",xlim=c(-1000,200)) | |
plot(binData,freqData_YY,type="p",col="gold",xlab="Genomic position relative to the TSS (in bp)",ylab="Frequency of Y Patch motif",xlim=c(-1000,200)) | |
dev.off() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/sh | |
echo "Removing any residual .table files in the working directory" | |
rm *_motif.table | |
echo "Creating a fasta file from TSR intervals" | |
bedtools getfasta -fi /DATA/GROUP/rtraborn/plant_promoters/A_thaliana/genome/TAIR10_all_nuc_chr.fa -bed /DATA/GROUP/rtraborn/tsrchitect_test/atPEAT_TSRs_mid_flank.bed -name -s -fo atPEAT_TSRs_flank.fa | |
echo "Analyzing the frequency of the TC element in flanking sequence elements" | |
fuzznuc -sequence atPEAT_TSRs_flank.fa -pattern TTCTTC -rformat table -rextension .txt -outfile At_TC_motif_prom.out | |
sh fuzznuc_parse_TC.sh | |
echo "Analyzing the frequency of the TATA element in flanking sequence elements" | |
fuzznuc -sequence atPEAT_TSRs_flank.fa -pattern TATAWA -rformat table -rextension .txt -outfile At_TATA_motif_prom.out | |
sh fuzznuc_parse_TATA.sh | |
echo "Analyzing the frequency of the TATA element in flanking sequence elements" | |
fuzznuc -sequence atPEAT_TSRs_flank.fa -pattern YYANWYY -rformat table -rextension .txt -outfile At_Inr_motif_prom.out | |
sh fuzznuc_parse_Inr.sh | |
echo "Analyzing the frequency of the Y-patch element in flanking sequence elements" | |
fuzznuc -sequence atPEAT_TSRs_flank.fa -pattern YYYYYY -rformat table -rextension .txt -outfile At_YY_motif_prom.out | |
sh fuzznuc_parse_YY.sh | |
R CMD BATCH figure_6_workflow.R | |
echo "Job complete!" | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/sh | |
read -p 'thisVar' | |
echo $thisVar | |
echo "Parsing the fuzznuc table output report file" | |
perl -ne 'if(m/^ +(\d.+) */){ print "$1\n" }' < At_YY_motif_prom.out > this_inter.out | |
for i in {1..1251}; do | |
echo $i | |
countVar="$(egrep -c "^$i[[:space:]]+" this_inter.out)" | |
echo $countVar | |
myTab=$'\t' | |
IFS=$'\n' | |
myLine="$(echo $i $myTab $countVar $IFS)" | |
echo $myLine >> At_YY_motif.table | |
done | |
rm this_inter.out | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/sh | |
echo "Parsing the fuzznuc table output report file" | |
perl -ne 'if(m/^ +(\d.+) */){ print "$1\n" }' < At_Inr_motif_prom.out > this_inter.out | |
for i in {1..1251}; do | |
countVar="$(egrep -c "^$i[[:space:]]+" this_inter.out)" | |
myTab=$'\t' | |
IFS=$'\n' | |
myLine="$(echo $i $myTab $countVar $IFS)" | |
echo $myLine >> At_Inr_motif.table | |
done | |
rm this_inter.out | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/sh | |
echo "Parsing the fuzznuc table output report file" | |
perl -ne 'if(m/^ +(\d.+) */){ print "$1\n" }' < At_TATA_motif_prom.out > this_inter.out | |
for i in {1..1251}; do | |
countVar="$(egrep -c "^$i[[:space:]]+" this_inter.out)" | |
myTab=$'\t' | |
IFS=$'\n' | |
myLine="$(echo $i $myTab $countVar $IFS)" | |
echo $myLine >> At_TATA_motif.table | |
done | |
rm this_inter.out | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/sh | |
echo "Parsing the fuzznuc table output report file" | |
perl -ne 'if(m/^ +(\d.+) */){ print "$1\n" }' < At_TC_motif_prom.out > this_inter.out | |
for i in {1..1251}; do | |
countVar="$(egrep -c "^$i[[:space:]]+" this_inter.out)" | |
myTab=$'\t' | |
IFS=$'\n' | |
myLine="$(echo $i $myTab $countVar $IFS)" | |
echo $myLine >> At_TC_motif.table | |
done | |
rm this_inter.out | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/sh | |
echo "Parsing the fuzznuc table output report file" | |
perl -ne 'if(m/^ +(\d.+) */){ print "$1\n" }' < At_YY_motif_prom.out > this_inter.out | |
for i in {1..1251}; do | |
countVar="$(egrep -c "^$i[[:space:]]+" this_inter.out)" | |
myTab=$'\t' | |
IFS=$'\n' | |
myLine="$(echo $i $myTab $countVar $IFS)" | |
echo $myLine >> At_YY_motif.table | |
done | |
rm this_inter.out | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment