Skip to content

Instantly share code, notes, and snippets.

@benob
Created March 20, 2022 13:41
Show Gist options
  • Save benob/df806ee98120d5f62a5849aa109ef3ee to your computer and use it in GitHub Desktop.
Save benob/df806ee98120d5f62a5849aa109ef3ee to your computer and use it in GitHub Desktop.
Minimalist implementation of Symmetric Delete Spelling Correction
import re
from collections import Counter, defaultdict
# create lexicon with word frequency from big text
def words(text): return re.findall(r'\w+', text.lower())
WORDS = Counter(words(open('big.txt').read()))
# generate all deletion edits, plus original
def edits(word):
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
deletes = [L + R[1:] for L, R in splits if R]
return deletes + [word]
# expand lexicon with edited words
EXPANDED = defaultdict(list)
for word in WORDS:
for deletion in edits(word):
EXPANDED[deletion].append(word)
# compute word frequency
def P(word, N=sum(WORDS.values())):
return WORDS[word] / N
# match all 1-deletion edits from word to all 1-deletion edits from lexicon
def correction(word):
candidates = []
for deletion in edits(word):
candidates += EXPANDED[deletion]
return max(set(candidates), key=P)
print('lenght =>', correction('lenght'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment