Skip to content

Instantly share code, notes, and snippets.

@TheCrether
Created October 12, 2020 06:08
Show Gist options
  • Save TheCrether/b2f4d09ae90b4b8c137b1318b2dcbe90 to your computer and use it in GitHub Desktop.
Save TheCrether/b2f4d09ae90b4b8c137b1318b2dcbe90 to your computer and use it in GitHub Desktop.
REPLY 2020 Wells-Read
import re
from time import sleep
# parse the wells.txt and remove all non-words and words where special characters are at the beginning/end
wells = []
with open("wells.txt", "r") as f:
temp = f.read() \
.replace("\n", " ") \
.split(" ")
# regex yey
begin = re.compile(r"^[\W_]+")
end = re.compile(r"[\W_]+$")
for word in temp:
word = end.sub("", begin.sub("", word))
# also filter empty stuff
if len(word.strip()) == 0:
continue
wells.append(word)
# get words and make them lowercase
words = []
with open("words.txt", "r") as f:
words = f.read().split("\n")
for i in range(len(words)):
words[i] = words[i].lower()
# go through all the tokens and see if they arent in the dictionary
wrong1 = []
for word in wells:
word = word.strip()
if len(word) == 0:
continue
if word.lower() not in words:
wrong1.append(word)
# after that, there may be "words" like "like--a" where they actually are good, but just really, messed up
# this matches those and removes them if they actually are words with messed up spaces
nonword = re.compile(r"[\W]+")
wrong2 = []
for word in wrong1:
for token in nonword.split(word):
if token.lower() not in words:
wrong2.append(token)
with open("wrong.txt", "w") as f:
s = ""
for word in wrong2:
s += word+"\n"
f.write(s)
def get_diff(word1: str, word2: str) -> [str]:
diff = []
for i in range(len(word1)):
if not word1[i] == word2[i]:
diff.append(word1[i])
return diff
# get words and make them lowercase
words = []
lower = []
with open("words.txt", "r") as f:
words = f.read().split("\n")
for i in range(len(words)):
lower.append(words[i].lower())
wrong = []
with open("wrong.txt", "r") as f:
wrong = f.read().split("\n")
for word in wrong:
for l in range(len(lower)):
if len(word) == len(lower[l]):
diff1 = get_diff(word, words[l])
diff2 = get_diff(word.lower(), lower[l])
if len(diff1) == 1 or len(diff2) == 1:
print(diff1)
print(diff2)
print(word)
print(words[l])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment