Last active
December 31, 2020 19:01
-
-
Save Grumblesaur/827ec1e512dc065d8c57097b40198838 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from rapidfuzz import process | |
# Translate typos made by rigsters989 in [GNOME] NA to | |
# a slightly more recognizable version of English. | |
def small(w, size=4): | |
'''Assume that Rigs probably won't spell really small words wrong.''' | |
return len(w) <= 4 | |
def guess_from_anagrams(w, anagram_lists): | |
'''Handle the case where Rigs only jumbles a word and doesn't | |
miss or add letters.''' | |
norm = ''.join(sorted(w)) | |
try: | |
anagrams = anagram_lists[norm] | |
best_match = get_best_match(w, anagrams) | |
except KeyError: | |
best_match = None | |
return best_match | |
def build_anagram_lists(wordlist): | |
'''Build a dictionary of {normalized word: matching words}''' | |
d = {} | |
for word in wordlist: | |
norm = ''.join(sorted(word)) | |
try: | |
d[norm].append(word) | |
except KeyError: | |
d[norm] = [word] | |
return d | |
def get_best_match(word, wordlist): | |
'''Extract the best match according to rapidfuzz.''' | |
matches = process.extract(word, wordlist) | |
return sorted(matches, key=lambda t: t[1], reverse=True)[0][0] | |
def build_word_list(path='/usr/share/dict/words'): | |
wordlist = [] | |
with open(path, 'r') as word_file: | |
for line in word_file: | |
s = line.strip() | |
if small(s): | |
continue | |
wordlist.append(s) | |
return set(wordlist) | |
def main(*argv): | |
actual_words = build_word_list() | |
anagram_lists = build_anagram_lists(actual_words) | |
running = True | |
while running: | |
try: | |
rigs_sentence = input("Enter Rigs' sentence: ") | |
rigs_words = rigs_sentence.split() | |
corrected_rigs = [] | |
for word in rigs_words: | |
guess = guess_from_anagrams(word, anagram_lists) | |
if guess is not None: | |
corrected_word = guess | |
elif word in actual_words or small(word): | |
corrected_word = word | |
else: | |
corrected_word = get_best_match(word, actual_words) | |
corrected_rigs.append(corrected_word) | |
print(' '.join(corrected_rigs)) | |
except (EOFError, KeyboardInterrupt): | |
running = False | |
print('\nDone!') | |
if __name__ == '__main__': | |
main(*sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment