Last active May 5, 2022
# Install: !pip install pyspellchecker
from spellchecker import SpellChecker
spell = SpellChecker()
# Text from 122001 of the wikiText data; modified to inculde typos:
# 'commentary' -> 'commentyra', 'gimmick' -> 'gimimick';
# curText = doc_set[122001]
# Use the spellchecker to identify and correct the typos
correctDict = {}
for val in re.split(r'[^\w]', curText):
if not val:
misspelled = spell.unknown([val])
if len(misspelled) > 0:
misWord = misspelled.pop()
corrected = spell.correction(misWord)
correctDict[misWord] = corrected
correctDict[val] = val
# Parse out the typos
{k:v for k,v in correctDict.items() if k != v}
