Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created January 19, 2021 12:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save amankharwal/857a62a94a51ff1d23726677aac6f525 to your computer and use it in GitHub Desktop.
Save amankharwal/857a62a94a51ff1d23726677aac6f525 to your computer and use it in GitHub Desktop.
import numpy as np
lexicon = {}
def update_lexicon(current : str, next_word : str) -> None:
# Add the input word to the lexicon if it in there yet.
if current not in lexicon:
lexicon.update({current: {next_word: 1} })
return
# Recieve te probabilties of the input word.
options = lexicon[current]
# Check if the output word is in the propability list.
if next_word not in options:
options.update({next_word : 1})
else:
options.update({next_word : options[next_word] + 1})
# Update the lexicon
lexicon[current] = options
# Populate lexicon
with open('dataset.txt', 'r') as dataset:
for line in dataset:
words = line.strip().split(' ')
for i in range(len(words) - 1):
update_lexicon(words[i], words[i+1])
# Adjust propability
for word, transition in lexicon.items():
transition = dict((key, value / sum(transition.values())) for key, value in transition.items())
lexicon[word] = transition
# Predict next word
line = input('> ')
word = line.strip().split(' ')[-1]
if word not in lexicon:
print('Word not found')
else:
options = lexicon[word]
predicted = np.random.choice(list(options.keys()), p=list(options.values()))
print(line + ' ' + predicted)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment