Last active
December 16, 2015 18:09
-
-
Save jnewman12/5475458 to your computer and use it in GitHub Desktop.
Basic text analyzer from Peter Cooper's Book; Beginning Ruby
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
stopwords = %w{ the a by on for of are with just but and to the my I has some in} | |
lines = File.readlines("text.txt") | |
line_count = lines.size | |
text = lines.join | |
#Count the Characters | |
character_count = text.length | |
character_count_nospaces = text.gsub(/\s+/, '').length | |
#Count the words, sentences & paragraphs | |
word_count = text.split.length | |
sentence_count = text.split(/\.|\?|!/).length | |
paragraph_count = text.split(/\n\n/).length | |
# Make a list of words in the text that are not stop words, | |
# count them, and work out the percentage of non stop words | |
# against all words | |
all_words = text.scan(/\w+/) | |
good_words = all_words.select{ |word| !stopwords.include?(word) } | |
good_percentage = ((good_words.length.to_f / all_words.length.to_f) * 100).to_i | |
# Summarize the text by cherry picking some words to use | |
sentences = text.gsub(/\s+/, ' ').strip.split(/\.|\?|!/) | |
sentences_sorted = sentences.sort_by { |sentence| sentence.length } | |
one_third = sentences_sorted.length / 3 | |
ideal_sentences = sentences_sorted.slice(one_third, one_third + 1) | |
ideal_sentences = ideal_sentences.select { |sentence| sentence =~ /is|are/ } | |
# This is for giving users feedback. In line 2, I wanted to make it File.readlines(ARGV[0]) | |
# so users could let this read any text file they may have save onto their computer from the command line/terminal, but when | |
# I tried doing that, it returned an error; "can't convert nil into string". I will try to | |
# fix it. | |
puts "#{line_count} lines" | |
puts "#{character_count} Characters" | |
puts "#{character_count_nospaces} Characters excluding spaces" | |
puts "#{word_count} words" | |
puts "#{sentence_count} sentences" | |
puts "#{paragraph_count} paragraphs" | |
puts "#{sentence_count / paragraph_count} sentences per paragraph (average)" | |
puts "#{word_count / sentence_count} words per sentence (average)" | |
puts "#{good_percentage}% of words are non fluff words" | |
puts "Summary:\n\n" + ideal_sentences.join(". ") | |
puts "End of analysis" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment