Created
February 27, 2019 14:53
-
-
Save kylebgorman/e5ef47e148afb370388d5171c1e5b9ee to your computer and use it in GitHub Desktop.
Yeonju's solution to part one of MP1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Yeonju's solution to part 1 of MP1.""" | |
import nltk | |
text = """du | |
du hast | |
du hast mich | |
du | |
du hast | |
du hast mich | |
du | |
du hast | |
du hast mich | |
du | |
du hast | |
du hast mich | |
du | |
du hast | |
du hast mich | |
du hast mich | |
du hast mich gefragt | |
du hast mich gefragt | |
du hast mich gefragt und ich hab nichts gesagt | |
willst du bis der tod uns scheidet | |
treue sein für alle tage | |
nein | |
nein""" | |
def mlp_second_order(text, target): | |
linestext = text.splitlines() | |
s_text = text.split() # For number of all tokens. | |
s_target = target.split() | |
bigramss = nltk.bigrams(s_text) | |
trigrams = nltk.ngrams(s_text, 3) | |
fidst = nltk.FreqDist(s_text) | |
bifidst = nltk.FreqDist(bigramss) | |
trifidst = nltk.FreqDist(trigrams) | |
result = ( | |
fidst[s_target[0]] | |
/ len(s_text) | |
* bifidst[tuple(s_target[0:2])] | |
/ fidst[s_target[0]] | |
* trifidst[tuple(s_target[:3])] | |
/ bifidst[tuple(s_target[0:2])] | |
* trifidst[tuple(s_target[1:])] | |
/ bifidst[tuple(s_target[1:3])] | |
) | |
print("Count 'du':", fidst[s_target[0]], "\nCount unigrams:", len(s_text)) | |
print("Probability: ", fidst[s_target[0]] / len(s_text)) | |
print("=======================") | |
print( | |
"Count 'du hast':", | |
bifidst[tuple(s_target[0:2])], | |
"\nCount 'du':", | |
fidst[s_target[0]], | |
) | |
print("Probability:", bifidst[tuple(s_target[0:2])] / fidst[s_target[0]]) | |
print("=======================") | |
print( | |
"Count 'du hast mich':", | |
trifidst[tuple(s_target[:3])], | |
"\nCount 'du hast':", | |
bifidst[tuple(s_target[0:2])], | |
) | |
print( | |
"Probability:", | |
trifidst[tuple(s_target[:3])] / bifidst[tuple(s_target[0:2])], | |
) | |
print("=======================") | |
print( | |
"Count 'hast mich gefragt':", | |
trifidst[tuple(s_target[1:])], | |
"\nCount 'hast mich':", | |
bifidst[tuple(s_target[1:3])], | |
) | |
print( | |
"Probability:", | |
trifidst[tuple(s_target[1:])] / bifidst[tuple(s_target[1:3])], | |
) | |
print("=======================") | |
print(f"The maximal likelihood probability of '{target}' is {result}") | |
target = "du hast mich gefragt" | |
mlp_second_order(text, target) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment