Skip to content

Instantly share code, notes, and snippets.

@luthfibalaka
Created September 24, 2023 02:55
Show Gist options
  • Save luthfibalaka/03754633447b93662d2a49714bcc8ed8 to your computer and use it in GitHub Desktop.
Save luthfibalaka/03754633447b93662d2a49714bcc8ed8 to your computer and use it in GitHub Desktop.
Mencoba Algoritma Levenshtein Automata
from levenshtein_automata import *
# Asumsikan kita punya korpus kata-kata baku sebagai berikut
words_corpus = ["kucing", "kemoceng", "kacang", "kucir", "kucil"]
# Misal user punya query typo, kita buat automata-nya
query = "kucin"
automata = levenshtein_automata(query, 1)
# Cek setiap kata di korpus, mana saja yang menjadi kandidat dengan edit distance 1
for word in words_corpus:
current_states = automata.start_state
for char in word:
current_states = automata.next_state(current_states, char)
if automata.is_final(current_states):
print(f"Kata {word} adalah kandidat dari query {query}")
else:
print(f"Kata {word} bukan kandidat dari query {query}")
# Hasilnya ada 3 kandidat, yaitu kucing, kucir, dan kucil
"""
Implementasi Algoritma Levenshtein Automata (NFA)
Sumber: http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
"""
class NFA:
EPSILON = object()
ANY = object()
def __init__(self, start_state: tuple):
self.transitions = {}
self.final_states = set()
self._start_state = start_state
@property
def start_state(self):
return frozenset(self._expand(set([self._start_state])))
def add_transition(self, src, input, dest):
self.transitions.setdefault(src, {}).setdefault(input, set()).add(dest)
def add_final_state(self, state):
self.final_states.add(state)
def is_final(self, states):
return len(self.final_states.intersection(states)) > 0
def _expand(self, states):
frontier = set(states)
while frontier:
state = frontier.pop()
new_states = (
self.transitions.get(state, {})
.get(NFA.EPSILON, set())
.difference(states)
)
frontier.update(new_states)
states.update(new_states)
return states
def next_state(self, states, input):
dest_states = set()
for state in states:
state_transitions = self.transitions.get(state, {})
dest_states.update(state_transitions.get(input, []))
dest_states.update(state_transitions.get(NFA.ANY, []))
return frozenset(self._expand(dest_states))
def get_inputs(self, states):
inputs = set()
for state in states:
inputs.update(self.transitions.get(state, {}).keys())
return inputs
def levenshtein_automata(term: str, k=1):
"""
Membangun NFA untuk suatu term dengan edit distance k (defaultnya 1)
"""
nfa = NFA((0, 0))
for i, c in enumerate(term):
for e in range(k + 1):
# Correct character
nfa.add_transition((i, e), c, (i + 1, e))
if e < k:
# Deletion
nfa.add_transition((i, e), NFA.ANY, (i, e + 1))
# Insertion
nfa.add_transition((i, e), NFA.EPSILON, (i + 1, e + 1))
# Substitution
nfa.add_transition((i, e), NFA.ANY, (i + 1, e + 1))
for e in range(k + 1):
if e < k:
nfa.add_transition((len(term), e), NFA.ANY, (len(term), e + 1))
nfa.add_final_state((len(term), e))
return nfa
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment