yedhink/assignment1.py

## assignment1.py
from nltk.corpus import brown
from nltk.util import ngrams
from collections import defaultdict

tagged_words = brown.tagged_words()

# question 1
def countTags(tag):
    count = 0
    for x in tagged_words:
        if x[1] == tag:
            count += 1
    return count

# question 2
def countWords(word):
    count = 0
    for x in tagged_words:
        if x[0] == word:
            count += 1
    return count

# question 3
def create_word_dictionary(our_word):
    word_counter = defaultdict(lambda: defaultdict(int))
    for word in tagged_words:
        word_counter[word[0]][word[1]] += 1
    return dict(word_counter[our_word])

# question 4
def create_tag_dictionary(tag):
    tags  =[]
    for x in tagged_words:
        tags.append(x[1])
    dd = defaultdict(int)
    bigrams=ngrams(tags, 2)
    for i in bigrams:
        if i[0] == "AT":
            dd[i[1]]+=1
    return dict(dd)

# question 5
def find_probablity(our_word,tag):
    word_counter = defaultdict(lambda: defaultdict(int))
    c_tot = 0
    tags  =[]
    for word in tagged_words:
        if word[1] == tag:
            c_tot+=1
        tags.append(word[1])
        word_counter[word[0]][word[1]] += 1
    val = dict(word_counter[our_word])
    c_word = 0
    for keys in val.keys():
        if keys == tag:
            c_word += val[keys]
    man_tt = c_word/c_tot
    print("prob = {}".format(man_tt))
    c_word_tag =0
    bigrams = ngrams(tags, 2)
    for i in bigrams:
        if i[0] == "AT" and i[1] == tag:
            c_word_tag += 1
    c_word_tag = c_word_tag/c_tot
    return man_tt * c_word_tag
	from nltk.corpus import brown
	from nltk.util import ngrams
	from collections import defaultdict

	tagged_words = brown.tagged_words()

	# question 1
	def countTags(tag):
	count = 0
	for x in tagged_words:
	if x[1] == tag:
	count += 1
	return count

	# question 2
	def countWords(word):
	count = 0
	for x in tagged_words:
	if x[0] == word:
	count += 1
	return count

	# question 3
	def create_word_dictionary(our_word):
	word_counter = defaultdict(lambda: defaultdict(int))
	for word in tagged_words:
	word_counter[word[0]][word[1]] += 1
	return dict(word_counter[our_word])

	# question 4
	def create_tag_dictionary(tag):
	tags =[]
	for x in tagged_words:
	tags.append(x[1])
	dd = defaultdict(int)
	bigrams=ngrams(tags, 2)
	for i in bigrams:
	if i[0] == "AT":
	dd[i[1]]+=1
	return dict(dd)

	# question 5
	def find_probablity(our_word,tag):
	word_counter = defaultdict(lambda: defaultdict(int))
	c_tot = 0
	tags =[]
	for word in tagged_words:
	if word[1] == tag:
	c_tot+=1
	tags.append(word[1])
	word_counter[word[0]][word[1]] += 1
	val = dict(word_counter[our_word])
	c_word = 0
	for keys in val.keys():
	if keys == tag:
	c_word += val[keys]
	man_tt = c_word/c_tot
	print("prob = {}".format(man_tt))
	c_word_tag =0
	bigrams = ngrams(tags, 2)
	for i in bigrams:
	if i[0] == "AT" and i[1] == tag:
	c_word_tag += 1
	c_word_tag = c_word_tag/c_tot
	return man_tt * c_word_tag