Last active
February 10, 2020 11:19
-
-
Save tejasvaidhyadev/01ea565ee8807a59c672866e4057c360 to your computer and use it in GitHub Desktop.
For testing Avg Perceptron POS present in julia TextAnalysis.jl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CorpusLoaders | |
using TextAnalysis | |
using MultiResolutionIterators | |
test=load(CoNLL(),"test") | |
test=flatten_levels(test, lvls(CoNLL, :document)) |> full_consolidate | |
testx=Vector{String}() | |
testy=Vector{String}() | |
tagger =PerceptronTagger(true) | |
for val in 1:length(test) | |
for tagged_word in test[val] | |
pos_tag = part_of_speech(tagged_word) | |
w = word(tagged_word) | |
push!(testx,(w)) | |
push!(testy,(pos_tag)) | |
end | |
end | |
predictedy= predict(tagger, testx) | |
check=[] | |
for val in 1:length(test) | |
for tagged_word in test[val] | |
pos_tag = part_of_speech(tagged_word) | |
w = word(tagged_word) | |
append!(check,[(w,pos_tag)]) | |
end | |
end | |
y=[0.0] | |
for i in 1:length(check) | |
check[i]==predictedy[i] | |
y=y+1 | |
end | |
#println(check[i]) | |
end | |
end | |
accuracy=y/length(check) | |
traindata =load(CoNLL(),"train") | |
traindata=flatten_levels(traindata, lvls(CoNLL, :document)) |> full_consolidate | |
train=[[("1","1")]] | |
for val in 1:length(traindata) | |
for tagged_word in traindata[val] | |
pos_tag = part_of_speech(tagged_word) | |
w = word(tagged_word) | |
push!(train,[(w,pos_tag)]) | |
end | |
end | |
trainnew=train[2:203622] | |
tagger1=PerceptronTagger(false) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CorpusLoaders | |
using TextAnalysis | |
using MultiResolutionIterators | |
#below block is used for finding accuracy of CoNLL 'test'-set on pre-train weight | |
test=load(CoNLL(),"test") | |
test=flatten_levels(test, lvls(CoNLL, :document)) |> full_consolidate | |
testx=Vector{String}() # for containing words | |
testy=Vector{String}() # for containing Pos_tagger | |
tagger =PerceptronTagger(true)#loading of pre-trained weight | |
for val in 1:length(test) | |
for tagged_word in test[val] | |
pos_tag = part_of_speech(tagged_word) | |
w = word(tagged_word) | |
push!(testx,(w)) | |
push!(testy,(pos_tag)) | |
end | |
end | |
#Prediction of model | |
predictedy= predict(tagger, testx) #predictedy return output in [("String","String")] | |
check=[] #creating data type same as the output of predictedy to compare with actual output | |
for val in 1:length(test) | |
for tagged_word in test[val] | |
pos_tag = part_of_speech(tagged_word) | |
w = word(tagged_word) | |
append!(check,[(w,pos_tag)]) | |
end | |
end | |
y=[0.0] #counter for counting corrected prediction | |
for i in 1:length(check) | |
check[i]==predictedy[i] | |
y=y+1 | |
end | |
#println(check[i]) | |
end | |
end | |
accuracy=y/length(check) # no.of corrected prediction/no. of total prediction | |
### | |
#the below block of code is used for training Avg Perceptron POS model | |
traindata =load(CoNLL(),"train") | |
traindata=flatten_levels(traindata, lvls(CoNLL, :document)) |> full_consolidate | |
train=[[("1","1")]] | |
for val in 1:length(traindata) | |
for tagged_word in traindata[val] | |
pos_tag = part_of_speech(tagged_word) | |
w = word(tagged_word) | |
push!(train,[(w,pos_tag)]) | |
end | |
end | |
trainnew=train[2:203622] #number_of_unique_pos_tag = 203622 and removing of first index | |
taggerfit=PerceptronTagger(false) #tagger with no pre-train weight | |
fit!(taggerfit, trainnew) | |
#Result in killed process |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment