h-alg/first.py Secret

## first.py
import nltk
from nltk.tag.sequential import *
from nltk.corpus import udhr
from nltk.corpus import PlaintextCorpusReader
from nltk.corpus import brown
from nltk.corpus.reader import CategorizedPlaintextCorpusReader
from nltk.corpus.reader import TaggedCorpusReader

# train_test is alist , first item : training corpus,second one: testin corpus
#this function returns train_test

def BuildCorpus(filename):
    train_test=[]
    train=open("train.txt",'w')
    test=open("test.txt",'w')
    myfile = open(filename,'r+')

    corpus=[]
    corpus2=[]
    for line in myfile :
        l=line.split()
        if len(l)==3:
            l.pop(1)
            t=tuple(l)
            corpus.append(t)
    cor=corpus


    x=0

    while x<len(cor)-1 :
        t=[]
        x=x+1

        while cor[x][0] != '.':
            t.append(cor[x])
            x=x+1
            if x== len(cor):
                break
        corpus2.append(t)


    train_test.append(corpus2[:5911])
    train_test.append(corpus2[5911:])
    return train_test


# building our corpus

filename=input("enter the file name please:[put it in ""]")
train_test=BuildCorpus(filename)

#test is the list for testing our taggers, a list of tokens

test=[]
training=train_test[0]
testing=train_test[1]

#removing tags from testing to build test

for i in testing:
    for j in i :
        test.append(j[0])


#our backoff tagger

default_tagger=nltk.DefaultTagger('O')
unigram_tagger=nltk.UnigramTagger(training,backoff=default_tagger)
bigram_tagger=nltk.BigramTagger(training,backoff= unigram_tagger)
trigram_tagger=nltk.TrigramTagger(training,backoff=bigram_tagger)


#evaluate removes the tags of testing and retags it then it compares it by our tagger to see how we did
#tagger is our tagging function

def tagger(data):
    return trigram_tagger.tag(data)

evaluate=trigram_tagger.evaluate(testing)

#tagged is a file we write our test tokens and tags in each line

#tagging test

learned = tagger(test)
taged=open('tagged.txt','w')
for tag in learned :
    taged.write(str(tag[0]))
    taged.write('/')
    taged.write(str(tag[1]))
    taged.write('\n')
taged.close()

#printing the evaluation score

print " the evaluation is:",evaluate
	import nltk
	from nltk.tag.sequential import *
	from nltk.corpus import udhr
	from nltk.corpus import PlaintextCorpusReader
	from nltk.corpus import brown
	from nltk.corpus.reader import CategorizedPlaintextCorpusReader
	from nltk.corpus.reader import TaggedCorpusReader

	# train_test is alist , first item : training corpus,second one: testin corpus
	#this function returns train_test

	def BuildCorpus(filename):
	train_test=[]
	train=open("train.txt",'w')
	test=open("test.txt",'w')
	myfile = open(filename,'r+')

	corpus=[]
	corpus2=[]
	for line in myfile :
	l=line.split()
	if len(l)==3:
	l.pop(1)
	t=tuple(l)
	corpus.append(t)
	cor=corpus


	x=0

	while x<len(cor)-1 :
	t=[]
	x=x+1

	while cor[x][0] != '.':
	t.append(cor[x])
	x=x+1
	if x== len(cor):
	break
	corpus2.append(t)


	train_test.append(corpus2[:5911])
	train_test.append(corpus2[5911:])
	return train_test


	# building our corpus

	filename=input("enter the file name please:[put it in ""]")
	train_test=BuildCorpus(filename)

	#test is the list for testing our taggers, a list of tokens

	test=[]
	training=train_test[0]
	testing=train_test[1]

	#removing tags from testing to build test

	for i in testing:
	for j in i :
	test.append(j[0])


	#our backoff tagger

	default_tagger=nltk.DefaultTagger('O')
	unigram_tagger=nltk.UnigramTagger(training,backoff=default_tagger)
	bigram_tagger=nltk.BigramTagger(training,backoff= unigram_tagger)
	trigram_tagger=nltk.TrigramTagger(training,backoff=bigram_tagger)


	#evaluate removes the tags of testing and retags it then it compares it by our tagger to see how we did
	#tagger is our tagging function

	def tagger(data):
	return trigram_tagger.tag(data)

	evaluate=trigram_tagger.evaluate(testing)

	#tagged is a file we write our test tokens and tags in each line

	#tagging test

	learned = tagger(test)
	taged=open('tagged.txt','w')
	for tag in learned :
	taged.write(str(tag[0]))
	taged.write('/')
	taged.write(str(tag[1]))
	taged.write('\n')
	taged.close()

	#printing the evaluation score

	print " the evaluation is:",evaluate