Created
August 9, 2008 10:01
-
-
Save martinus/4667 to your computer and use it in GitHub Desktop.
Two Word Anagram Finder Algorithm (in Ruby)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# created by Martin Ankerl http://martin.ankerl.com/ | |
class String | |
# creates an array of characters | |
def letters | |
unpack("c*") | |
end | |
end | |
class Array | |
# converts an array of letters back into a String | |
def word | |
pack("c*") | |
end | |
end | |
query = "documenting" | |
query_letters_sorted = query.letters.sort | |
txt = File.read('wordlist.txt').downcase | |
# to quickly check if a letter is part of the query word | |
used_letters = Array.new(256, nil) | |
query_letters_sorted.each do |letter| | |
used_letters[letter] = true | |
end | |
# Maps from cummulative hash of a word to a list of words that have this hash code. | |
hashToWords = Hash.new do |hash, key| | |
hash[key] = Array.new | |
end | |
query_hash = query.sum | |
prev = 0 | |
txt_size = txt.size | |
separator = 10 | |
idx = txt.index(separator, prev) | |
while prev < txt_size | |
letter_idx = prev | |
# no need to check end of word because it is \n | |
# which is not part of the word anyways | |
while used_letters[txt[letter_idx]] | |
letter_idx += 1 | |
end | |
# ignore word if the above quick check fails | |
if letter_idx == idx | |
word = txt[prev, idx-prev] | |
# check all key matches | |
key = word.sum | |
hashToWords[query_hash - key].each do |other_word| | |
if (word.letters + other_word.letters).sort == query_letters_sorted | |
puts "#{word} #{other_word}" | |
puts "#{other_word} #{word}" | |
end | |
end | |
# insert word | |
hashToWords[key] << word | |
end | |
prev = idx + 1 | |
# no need to check end of file because we have to end with new line | |
idx = txt.index(separator, prev) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment