This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#determine the labels | |
import pyvw #vw python interface | |
DEST = 1 | |
PROP = 2 | |
FAC = 3 | |
... | |
#create the class for the Sequence Labeler | |
class SequenceLabeler(pyvw.SearchTask): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create an index of all the words (words2idx) | |
... | |
#create an index of all the labels (labels2idx) | |
... | |
#query to classify | |
q="hotel amsterdam wifi" | |
#labels to use "prop", "dest", "fac" | |
#this will contain all our training data | |
matrix=[] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#query to classify | |
q="hotel amsterdam wifi" | |
#labels to use "prop", "dest", "fac" | |
#this will contain all our training data | |
trainer = ner_trainer("total_word_feature_extractor.dat") | |
#split the string to words | |
#you can use whatever tokenizer | |
q_list=string_to_clasify.split() |