Skip to content

Instantly share code, notes, and snippets.

@blcarlson01
Created April 14, 2023 13:43
Show Gist options
  • Save blcarlson01/c2b31c5520f1e6ce84f88b3759938818 to your computer and use it in GitHub Desktop.
Save blcarlson01/c2b31c5520f1e6ce84f88b3759938818 to your computer and use it in GitHub Desktop.
import re
import ahocorasick
from operator import itemgetter
text = "The quick brown fox jumps over the lazy dog."
automaton = ahocorasick.Automaton()
automaton.add_word('fox', 'fox')
automaton.add_word('quick', 'quick')
automaton.add_word('over', 'over')
automaton.add_word('The', 'The')
automaton.make_automaton()
matches = sorted([(match[0]-len(match[1])+1,match[0]+1,match[1]) for match in automaton.iter(text)], key=itemgetter(1), reverse=True)
print(matches)
print(text)
for start, end, new_word in matches:
print('replacement word: ',new_word, start, end)
text = text[:start] + text[end:]
print(text)
text = text.strip()
pattern = re.compile(' +')
text = pattern.sub(' ', text)
print('----')
text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment