Created
March 21, 2019 15:35
-
-
Save yedhink/386912fc421e100def9be464976e977e to your computer and use it in GitHub Desktop.
NLP elective assignments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.corpus import brown | |
from nltk.util import ngrams | |
from collections import defaultdict | |
tagged_words = brown.tagged_words() | |
# question 1 | |
def countTags(tag): | |
count = 0 | |
for x in tagged_words: | |
if x[1] == tag: | |
count += 1 | |
return count | |
# question 2 | |
def countWords(word): | |
count = 0 | |
for x in tagged_words: | |
if x[0] == word: | |
count += 1 | |
return count | |
# question 3 | |
def create_word_dictionary(our_word): | |
word_counter = defaultdict(lambda: defaultdict(int)) | |
for word in tagged_words: | |
word_counter[word[0]][word[1]] += 1 | |
return dict(word_counter[our_word]) | |
# question 4 | |
def create_tag_dictionary(tag): | |
tags =[] | |
for x in tagged_words: | |
tags.append(x[1]) | |
dd = defaultdict(int) | |
bigrams=ngrams(tags, 2) | |
for i in bigrams: | |
if i[0] == "AT": | |
dd[i[1]]+=1 | |
return dict(dd) | |
# question 5 | |
def find_probablity(our_word,tag): | |
word_counter = defaultdict(lambda: defaultdict(int)) | |
c_tot = 0 | |
tags =[] | |
for word in tagged_words: | |
if word[1] == tag: | |
c_tot+=1 | |
tags.append(word[1]) | |
word_counter[word[0]][word[1]] += 1 | |
val = dict(word_counter[our_word]) | |
c_word = 0 | |
for keys in val.keys(): | |
if keys == tag: | |
c_word += val[keys] | |
man_tt = c_word/c_tot | |
print("prob = {}".format(man_tt)) | |
c_word_tag =0 | |
bigrams = ngrams(tags, 2) | |
for i in bigrams: | |
if i[0] == "AT" and i[1] == tag: | |
c_word_tag += 1 | |
c_word_tag = c_word_tag/c_tot | |
return man_tt * c_word_tag |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment