thoughtpunch/most_common_word.rb

## most_common_word.rb
# DANS METHOD + EXPLANATION - LETS TAKE A LOOK AT HOW THIS WORKS

# lets start with a string
string = "Joe is a cool dude. I am also a cool dude. Cool dudes are the best."

# lets clean it up by removing punctuation and lowercasing everything.
#  - remember 'Dude' does not equal 'dude' nor 'dUdE', etc. String comparison is exact!
#  - the '/\W/' is a regex command that matches all non-letters and numbers. 'gsub' replaces those matches with something, in
#    this case, it's an empty space
string.downcase.gsub(/\W/,' ')

# lets use Ruby's split method to turn the string into an Array of strings
# - will look like this: ["joe", "is", "a", "cool", "dude", "i", "am", ...
array_of_strings = string.split(" ") # split at every empty space ' '

# make an empty hash to hold our word frequencies
@words_by_frequency = {}

# * THE BIZNASS *
# now we will do the following....
# 1. iterate (i.e. 'loop') over all the 'array_of_strings'
# 2. add each string to the hash as a 'key' with a value starting at
#    1 like so: {'joe' => 1}
# 3. every time we come across the same word, add +1 to the value for that key

array_of_strings.each do |string|
	# 'string' here is each item in the array in turn
	# first it will be 'joe', then 'is', then 'a', etc..

	# Now we are going to check the @words_by_frequency hash
	# - hashes store data in 'key' => 'value' pairs (i.e. {"name" => "Dan", "age" => 31} )
	# - you can get to the data in a hash by looking up its key (i.e. @words_by_frequency["name"] returns "Dan" )
	# - you can SET/STORE data in the hash by providing a key and value (i.e. some_hash["foo"] = "bar", makes {"foo" => "bar"} )

	# First, check to see if the @words_by_frequency already has the 'string' in it
	# if @words_by_frequency[string] returns ANYTHING, this will be true, otherwise false
	if @words_by_frequency[string]
		# if we get to this point, the hash already had a key named whatever 'string' is (i.e. "joe" )
		# so lets add one more to the count...
		@words_by_frequency[string] += 1 # value is now something like { "joe" => 2 }

	else
		# if we get here, we haven't seen this word before, so we add it to the hash
		@words_by_frequency[string] = 1 # value is now something like { "joe" => 1 }
	end

	#the loop will now return to the top and run through it again with the next word
end

puts @words_by_frequency.sort_by{|k,v| v } #sort by least to most frequent

#HERES HOW IT LOOKED MID LOOP
# 1st iteration: @words_by_frequency = {"joe" => 1}
# 2nd iteration: @words_by_frequency = {"joe" => 1, "is" => 1}
# 3rd iteration: @words_by_frequency = {"joe" => 1, "is" => 1, "a" => 1}
# ....
# On the 9th iteration we hit our first duplicate, the letter "a", so the hash
# looked something like this...
# {"joe" => "1", "is" => 1, "a" => 2, "cool" => 1, ...
# ....
# By the time it runs out of words, it will look like this unsorted
# { "joe"=>1, "is"=>1, "a"=>2, "cool"=>3, "dude"=>2, "i"=>1, "am"=>1,
#   "also"=>1, "dudes"=>1, "are"=>1, "the"=>1, "best"=>1 }

#Hopefully that helps! Let me know if you still have any issues.

# Dan
	# DANS METHOD + EXPLANATION - LETS TAKE A LOOK AT HOW THIS WORKS

	# lets start with a string
	string = "Joe is a cool dude. I am also a cool dude. Cool dudes are the best."

	# lets clean it up by removing punctuation and lowercasing everything.
	# - remember 'Dude' does not equal 'dude' nor 'dUdE', etc. String comparison is exact!
	# - the '/\W/' is a regex command that matches all non-letters and numbers. 'gsub' replaces those matches with something, in
	# this case, it's an empty space
	string.downcase.gsub(/\W/,' ')

	# lets use Ruby's split method to turn the string into an Array of strings
	# - will look like this: ["joe", "is", "a", "cool", "dude", "i", "am", ...
	array_of_strings = string.split(" ") # split at every empty space ' '

	# make an empty hash to hold our word frequencies
	@words_by_frequency = {}

	# * THE BIZNASS *
	# now we will do the following....
	# 1. iterate (i.e. 'loop') over all the 'array_of_strings'
	# 2. add each string to the hash as a 'key' with a value starting at
	# 1 like so: {'joe' => 1}
	# 3. every time we come across the same word, add +1 to the value for that key

	array_of_strings.each do \|string\|
	# 'string' here is each item in the array in turn
	# first it will be 'joe', then 'is', then 'a', etc..

	# Now we are going to check the @words_by_frequency hash
	# - hashes store data in 'key' => 'value' pairs (i.e. {"name" => "Dan", "age" => 31} )
	# - you can get to the data in a hash by looking up its key (i.e. @words_by_frequency["name"] returns "Dan" )
	# - you can SET/STORE data in the hash by providing a key and value (i.e. some_hash["foo"] = "bar", makes {"foo" => "bar"} )

	# First, check to see if the @words_by_frequency already has the 'string' in it
	# if @words_by_frequency[string] returns ANYTHING, this will be true, otherwise false
	if @words_by_frequency[string]
	# if we get to this point, the hash already had a key named whatever 'string' is (i.e. "joe" )
	# so lets add one more to the count...
	@words_by_frequency[string] += 1 # value is now something like { "joe" => 2 }

	else
	# if we get here, we haven't seen this word before, so we add it to the hash
	@words_by_frequency[string] = 1 # value is now something like { "joe" => 1 }
	end

	#the loop will now return to the top and run through it again with the next word
	end

	puts @words_by_frequency.sort_by{\|k,v\| v } #sort by least to most frequent

	#HERES HOW IT LOOKED MID LOOP
	# 1st iteration: @words_by_frequency = {"joe" => 1}
	# 2nd iteration: @words_by_frequency = {"joe" => 1, "is" => 1}
	# 3rd iteration: @words_by_frequency = {"joe" => 1, "is" => 1, "a" => 1}
	# ....
	# On the 9th iteration we hit our first duplicate, the letter "a", so the hash
	# looked something like this...
	# {"joe" => "1", "is" => 1, "a" => 2, "cool" => 1, ...
	# ....
	# By the time it runs out of words, it will look like this unsorted
	# { "joe"=>1, "is"=>1, "a"=>2, "cool"=>3, "dude"=>2, "i"=>1, "am"=>1,
	# "also"=>1, "dudes"=>1, "are"=>1, "the"=>1, "best"=>1 }

	#Hopefully that helps! Let me know if you still have any issues.

	# Dan