AndrewVos/TextAnalyzer-Ref

## TextAnalyzer-Ref
#!/usr/bin/ruby
# Text Analyzer
# Author: Zach
# Purpose: Utilize Ruby to analyze Text files
# and generate statistical information therein.

require 'cgi'

STOPWORDS = File.read('conf/stop_words.txt').map{|x| x.chomp}

# Count the characters; return an array with the length of the file
def count_characters(text)
  text.length
end

def count_characters_less_spaces(text)
  count_characters(text.gsub(/\s+/, ''))
end

def count_words(text)
  word_count = text.split.length
end

def count_sentences(text)
  text.split(/\.|\?|!/).length
end

def count_paragraphs(text)
  text.split(/\n\n/).length
end

def count_lines(text)
  text.split("\n").length
end

def non_stopwords(text)
  text.split(/\s+/).select { |word| !STOPWORDS.include?(word) }
end

def most_common_words(text)
  most_common_words = (text.split(/\s+/) - STOPWORDS).group_by{ |x| x}.sort_by{ |word, hits| -hits.length}[0..9].map(&:first).join('-- ')
  #((keywords.length.to_f / word_count.to_f) * 100).to_i,
end

def ideal_sentences(sentences) # should return array - not hash
  sentences_sorted = sentences.sort_by { |sentence| sentence.length }
  foo = sentences_sorted.length / 7
  ideal_sentences = sentences_sorted.slice(foo, foo + 1)
  ideal_sentences.select { |sentence| sentence =~/\sis\W|\sare\W/ }
end

def file_names(stats)
  { :names => stats.map{ |h| h[:text]} }
end

def build_reports (stats)
  reports = []
  stats.each do |stat|
    report = <<-REPORT
    #{stat[:text]} Inaugural Speech - Analysis Results
    Total number of characters is: #{stat[:chars]}.
    Total number of characters less whitespace is: #{stat[:chars2]}
    Total number of words is: #{stat[:wrd_c]}.
    Total number of sentences is: #{stat[:chunks][:sent_count]}
    Total number of paragraphs is: #{stat[:chunks][:para_count]}
    The average sentences per paragraph is: #{stat[:chunks][:sent_count] / stat[:chunks][:para_count]}
    The average words per sentence is:  #{stat[:chunks][:word_count] / stat[:chunks][:sent_count]}
    #{stat[:words][percent_good_words]} % of all words in the text are non-fluff words.
    The ideal sentences are: #{stat[:sent][:ideal_sentences].join("-- ")}
    The top 10 most common words are: #{stat[:words][:most_common_words]}

    REPORT
    reports << report
  end #end each-loop
  reports
end #end build_reports

def collect_stats
  file_stats = []
  Dir.glob("files/*.txt").each do |text_file| # Iterate through all text files
    # Local Variables / Text Files
    text = File.read(text_file).sub('/files/', '').sub('.txt', '')
    sentences = text.gsub(/\s+/, ' ').strip.split(/\.|\?|!/)

    # Collect our stats and stuff them in the results array
    file_stats << {
      :chars  => count_characters(text),
      :chars2 => count_characters_less_spaces(text),
      :words  => count_words(text),
      :sent   => count_sentences(text),
      :para   => count_paragraphs(text),
      :lines  => count_lines(text),
      :kword  => non_stopwords(text),
      :words  => most_common_words(text),
      :bsent  => ideal_sentences(sentences),
      :text   => text_file,
    }
  end # End each-loop
  # Return the filestats at the end
  file_stats
end
# Call to collect_stats method containing calls to the functions that process the text files passed to var(text)
stats = collect_stats
# Process the file name(s) of the text files for use in the build_reports method
names = file_names(stats)
#Build reports that contain the values from our stats
reports = build_reports(stats)

header = <<HTML
<html>
<body>
<pre>
CS 132A Lab3
Innagural Speech Analysis
HTML

footer = <<HTML
</html>
</body>
</pre>
HTML

output = <<OUT
#{header}
#{reports}
#{footer}
OUT

cgi = CGI.new
cgi.out do
  output
end
	#!/usr/bin/ruby
	# Text Analyzer
	# Author: Zach
	# Purpose: Utilize Ruby to analyze Text files
	# and generate statistical information therein.

	require 'cgi'

	STOPWORDS = File.read('conf/stop_words.txt').map{\|x\| x.chomp}

	# Count the characters; return an array with the length of the file
	def count_characters(text)
	text.length
	end

	def count_characters_less_spaces(text)
	count_characters(text.gsub(/\s+/, ''))
	end

	def count_words(text)
	word_count = text.split.length
	end

	def count_sentences(text)
	text.split(/\.\|\?\|!/).length
	end

	def count_paragraphs(text)
	text.split(/\n\n/).length
	end

	def count_lines(text)
	text.split("\n").length
	end

	def non_stopwords(text)
	text.split(/\s+/).select { \|word\| !STOPWORDS.include?(word) }
	end

	def most_common_words(text)
	most_common_words = (text.split(/\s+/) - STOPWORDS).group_by{ \|x\| x}.sort_by{ \|word, hits\| -hits.length}[0..9].map(&:first).join('-- ')
	#((keywords.length.to_f / word_count.to_f) * 100).to_i,
	end

	def ideal_sentences(sentences) # should return array - not hash
	sentences_sorted = sentences.sort_by { \|sentence\| sentence.length }
	foo = sentences_sorted.length / 7
	ideal_sentences = sentences_sorted.slice(foo, foo + 1)
	ideal_sentences.select { \|sentence\| sentence =~/\sis\W\|\sare\W/ }
	end

	def file_names(stats)
	{ :names => stats.map{ \|h\| h[:text]} }
	end

	def build_reports (stats)
	reports = []
	stats.each do \|stat\|
	report = <<-REPORT
	#{stat[:text]} Inaugural Speech - Analysis Results
	Total number of characters is: #{stat[:chars]}.
	Total number of characters less whitespace is: #{stat[:chars2]}
	Total number of words is: #{stat[:wrd_c]}.
	Total number of sentences is: #{stat[:chunks][:sent_count]}
	Total number of paragraphs is: #{stat[:chunks][:para_count]}
	The average sentences per paragraph is: #{stat[:chunks][:sent_count] / stat[:chunks][:para_count]}
	The average words per sentence is: #{stat[:chunks][:word_count] / stat[:chunks][:sent_count]}
	#{stat[:words][percent_good_words]} % of all words in the text are non-fluff words.
	The ideal sentences are: #{stat[:sent][:ideal_sentences].join("-- ")}
	The top 10 most common words are: #{stat[:words][:most_common_words]}

	REPORT
	reports << report
	end #end each-loop
	reports
	end #end build_reports

	def collect_stats
	file_stats = []
	Dir.glob("files/*.txt").each do \|text_file\| # Iterate through all text files
	# Local Variables / Text Files
	text = File.read(text_file).sub('/files/', '').sub('.txt', '')
	sentences = text.gsub(/\s+/, ' ').strip.split(/\.\|\?\|!/)

	# Collect our stats and stuff them in the results array
	file_stats << {
	:chars => count_characters(text),
	:chars2 => count_characters_less_spaces(text),
	:words => count_words(text),
	:sent => count_sentences(text),
	:para => count_paragraphs(text),
	:lines => count_lines(text),
	:kword => non_stopwords(text),
	:words => most_common_words(text),
	:bsent => ideal_sentences(sentences),
	:text => text_file,
	}
	end # End each-loop
	# Return the filestats at the end
	file_stats
	end
	# Call to collect_stats method containing calls to the functions that process the text files passed to var(text)
	stats = collect_stats
	# Process the file name(s) of the text files for use in the build_reports method
	names = file_names(stats)
	#Build reports that contain the values from our stats
	reports = build_reports(stats)

	header = <<HTML
	<html>
	<body>
	<pre>
	CS 132A Lab3
	Innagural Speech Analysis
	HTML

	footer = <<HTML
	</html>
	</body>
	</pre>
	HTML

	output = <<OUT
	#{header}
	#{reports}
	#{footer}
	OUT

	cgi = CGI.new
	cgi.out do
	output
	end