tejasvaidhyadev/test.jl

## gistfile1.txt
using CorpusLoaders
using TextAnalysis
using MultiResolutionIterators

test=load(CoNLL(),"test")
test=flatten_levels(test, lvls(CoNLL, :document)) |> full_consolidate
testx=Vector{String}()
testy=Vector{String}()
tagger =PerceptronTagger(true)
for val in 1:length(test)
                         for tagged_word in test[val]
                                pos_tag = part_of_speech(tagged_word)
                                w = word(tagged_word)

                                push!(testx,(w))
                                push!(testy,(pos_tag))
                         end
                     end
predictedy= predict(tagger, testx)
check=[]
 for val in 1:length(test)
                         for tagged_word in test[val]
                                pos_tag = part_of_speech(tagged_word)
                                w = word(tagged_word)

                                append!(check,[(w,pos_tag)])
                         end
                     end


y=[0.0]
for i in 1:length(check)
       check[i]==predictedy[i]
	  y=y+1
     end
        #println(check[i])
 end
end
accuracy=y/length(check)


traindata =load(CoNLL(),"train")
traindata=flatten_levels(traindata, lvls(CoNLL, :document)) |> full_consolidate
train=[[("1","1")]]
for val in 1:length(traindata)
                                for tagged_word in traindata[val]
                                       pos_tag = part_of_speech(tagged_word)
                                       w = word(tagged_word)

                                       push!(train,[(w,pos_tag)])

                                end
                            end
trainnew=train[2:203622]
tagger1=PerceptronTagger(false)

## test.jl
using CorpusLoaders
using TextAnalysis
using MultiResolutionIterators
#below block is used for finding accuracy of CoNLL 'test'-set on pre-train weight
test=load(CoNLL(),"test")
test=flatten_levels(test, lvls(CoNLL, :document)) |> full_consolidate
testx=Vector{String}() # for containing words
testy=Vector{String}() # for containing Pos_tagger
tagger =PerceptronTagger(true)#loading of pre-trained weight
for val in 1:length(test)
                         for tagged_word in test[val]
                                pos_tag = part_of_speech(tagged_word)
                                w = word(tagged_word)

                                push!(testx,(w))
                                push!(testy,(pos_tag))
                         end
                     end
#Prediction of model
predictedy= predict(tagger, testx) #predictedy return output in [("String","String")]
check=[] #creating data type same as the output of predictedy to compare with actual output
 for val in 1:length(test)
                         for tagged_word in test[val]
                                pos_tag = part_of_speech(tagged_word)
                                w = word(tagged_word)

                                append!(check,[(w,pos_tag)])
                         end
                     end


y=[0.0] #counter for counting corrected prediction
for i in 1:length(check)
       check[i]==predictedy[i]
	  y=y+1
     end
        #println(check[i])
 end
end
accuracy=y/length(check) # no.of corrected prediction/no. of total prediction
###
#the below block of code is used for training Avg Perceptron POS model
traindata =load(CoNLL(),"train")
traindata=flatten_levels(traindata, lvls(CoNLL, :document)) |> full_consolidate
train=[[("1","1")]]
for val in 1:length(traindata)
                                for tagged_word in traindata[val]
                                       pos_tag = part_of_speech(tagged_word)
                                       w = word(tagged_word)

                                       push!(train,[(w,pos_tag)])

                                end
                            end
trainnew=train[2:203622] #number_of_unique_pos_tag = 203622 and removing of first index
taggerfit=PerceptronTagger(false) #tagger with no pre-train weight
fit!(taggerfit, trainnew)
#Result in killed process
	using CorpusLoaders
	using TextAnalysis
	using MultiResolutionIterators

	test=load(CoNLL(),"test")
	test=flatten_levels(test, lvls(CoNLL, :document)) \|> full_consolidate
	testx=Vector{String}()
	testy=Vector{String}()
	tagger =PerceptronTagger(true)
	for val in 1:length(test)
	for tagged_word in test[val]
	pos_tag = part_of_speech(tagged_word)
	w = word(tagged_word)

	push!(testx,(w))
	push!(testy,(pos_tag))
	end
	end
	predictedy= predict(tagger, testx)
	check=[]
	for val in 1:length(test)
	for tagged_word in test[val]
	pos_tag = part_of_speech(tagged_word)
	w = word(tagged_word)

	append!(check,[(w,pos_tag)])
	end
	end



	y=[0.0]
	for i in 1:length(check)
	check[i]==predictedy[i]
	y=y+1
	end
	#println(check[i])
	end
	end
	accuracy=y/length(check)


	traindata =load(CoNLL(),"train")
	traindata=flatten_levels(traindata, lvls(CoNLL, :document)) \|> full_consolidate
	train=[[("1","1")]]
	for val in 1:length(traindata)
	for tagged_word in traindata[val]
	pos_tag = part_of_speech(tagged_word)
	w = word(tagged_word)

	push!(train,[(w,pos_tag)])

	end
	end
	trainnew=train[2:203622]
	tagger1=PerceptronTagger(false)