Skip to content

Instantly share code, notes, and snippets.

@yedhink
Created March 21, 2019 15:35
Show Gist options
  • Save yedhink/386912fc421e100def9be464976e977e to your computer and use it in GitHub Desktop.
Save yedhink/386912fc421e100def9be464976e977e to your computer and use it in GitHub Desktop.
NLP elective assignments
from nltk.corpus import brown
from nltk.util import ngrams
from collections import defaultdict
tagged_words = brown.tagged_words()
# question 1
def countTags(tag):
count = 0
for x in tagged_words:
if x[1] == tag:
count += 1
return count
# question 2
def countWords(word):
count = 0
for x in tagged_words:
if x[0] == word:
count += 1
return count
# question 3
def create_word_dictionary(our_word):
word_counter = defaultdict(lambda: defaultdict(int))
for word in tagged_words:
word_counter[word[0]][word[1]] += 1
return dict(word_counter[our_word])
# question 4
def create_tag_dictionary(tag):
tags =[]
for x in tagged_words:
tags.append(x[1])
dd = defaultdict(int)
bigrams=ngrams(tags, 2)
for i in bigrams:
if i[0] == "AT":
dd[i[1]]+=1
return dict(dd)
# question 5
def find_probablity(our_word,tag):
word_counter = defaultdict(lambda: defaultdict(int))
c_tot = 0
tags =[]
for word in tagged_words:
if word[1] == tag:
c_tot+=1
tags.append(word[1])
word_counter[word[0]][word[1]] += 1
val = dict(word_counter[our_word])
c_word = 0
for keys in val.keys():
if keys == tag:
c_word += val[keys]
man_tt = c_word/c_tot
print("prob = {}".format(man_tt))
c_word_tag =0
bigrams = ngrams(tags, 2)
for i in bigrams:
if i[0] == "AT" and i[1] == tag:
c_word_tag += 1
c_word_tag = c_word_tag/c_tot
return man_tt * c_word_tag
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment