Skip to content

Instantly share code, notes, and snippets.

@tejasvaidhyadev
Last active February 10, 2020 11:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tejasvaidhyadev/01ea565ee8807a59c672866e4057c360 to your computer and use it in GitHub Desktop.
Save tejasvaidhyadev/01ea565ee8807a59c672866e4057c360 to your computer and use it in GitHub Desktop.
For testing Avg Perceptron POS present in julia TextAnalysis.jl
using CorpusLoaders
using TextAnalysis
using MultiResolutionIterators
test=load(CoNLL(),"test")
test=flatten_levels(test, lvls(CoNLL, :document)) |> full_consolidate
testx=Vector{String}()
testy=Vector{String}()
tagger =PerceptronTagger(true)
for val in 1:length(test)
for tagged_word in test[val]
pos_tag = part_of_speech(tagged_word)
w = word(tagged_word)
push!(testx,(w))
push!(testy,(pos_tag))
end
end
predictedy= predict(tagger, testx)
check=[]
for val in 1:length(test)
for tagged_word in test[val]
pos_tag = part_of_speech(tagged_word)
w = word(tagged_word)
append!(check,[(w,pos_tag)])
end
end
y=[0.0]
for i in 1:length(check)
check[i]==predictedy[i]
y=y+1
end
#println(check[i])
end
end
accuracy=y/length(check)
traindata =load(CoNLL(),"train")
traindata=flatten_levels(traindata, lvls(CoNLL, :document)) |> full_consolidate
train=[[("1","1")]]
for val in 1:length(traindata)
for tagged_word in traindata[val]
pos_tag = part_of_speech(tagged_word)
w = word(tagged_word)
push!(train,[(w,pos_tag)])
end
end
trainnew=train[2:203622]
tagger1=PerceptronTagger(false)
using CorpusLoaders
using TextAnalysis
using MultiResolutionIterators
#below block is used for finding accuracy of CoNLL 'test'-set on pre-train weight
test=load(CoNLL(),"test")
test=flatten_levels(test, lvls(CoNLL, :document)) |> full_consolidate
testx=Vector{String}() # for containing words
testy=Vector{String}() # for containing Pos_tagger
tagger =PerceptronTagger(true)#loading of pre-trained weight
for val in 1:length(test)
for tagged_word in test[val]
pos_tag = part_of_speech(tagged_word)
w = word(tagged_word)
push!(testx,(w))
push!(testy,(pos_tag))
end
end
#Prediction of model
predictedy= predict(tagger, testx) #predictedy return output in [("String","String")]
check=[] #creating data type same as the output of predictedy to compare with actual output
for val in 1:length(test)
for tagged_word in test[val]
pos_tag = part_of_speech(tagged_word)
w = word(tagged_word)
append!(check,[(w,pos_tag)])
end
end
y=[0.0] #counter for counting corrected prediction
for i in 1:length(check)
check[i]==predictedy[i]
y=y+1
end
#println(check[i])
end
end
accuracy=y/length(check) # no.of corrected prediction/no. of total prediction
###
#the below block of code is used for training Avg Perceptron POS model
traindata =load(CoNLL(),"train")
traindata=flatten_levels(traindata, lvls(CoNLL, :document)) |> full_consolidate
train=[[("1","1")]]
for val in 1:length(traindata)
for tagged_word in traindata[val]
pos_tag = part_of_speech(tagged_word)
w = word(tagged_word)
push!(train,[(w,pos_tag)])
end
end
trainnew=train[2:203622] #number_of_unique_pos_tag = 203622 and removing of first index
taggerfit=PerceptronTagger(false) #tagger with no pre-train weight
fit!(taggerfit, trainnew)
#Result in killed process
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment