Skip to content

Instantly share code, notes, and snippets.

@thmavri
Created December 13, 2016 11:27
Show Gist options
  • Save thmavri/b6d5e4438e5278d4dff286b0e46f8b2e to your computer and use it in GitHub Desktop.
Save thmavri/b6d5e4438e5278d4dff286b0e46f8b2e to your computer and use it in GitHub Desktop.
#query to classify
q="hotel amsterdam wifi"
#labels to use "prop", "dest", "fac"
#this will contain all our training data
trainer = ner_trainer("total_word_feature_extractor.dat")
#split the string to words
#you can use whatever tokenizer
q_list=string_to_clasify.split()
#initialize a ner training instance
#ner: named entity recognition
train_item=ner_training_instance(q_list)
s_range=0
e_range=len(q_list[0])+s_range
train_item.add_entity(xrange(s_range,e_range), "prop")
s_range=len(q_list[0])+s_range
e_range=len(q_list[1])+s_range
train_item.add_entity(xrange(s_range,e_range), "dest")
s_range=len(q_list[1])+s_range
e_range=len(q_list[2])+s_range
train_item.add_entity(xrange(s_range,e_range), "fac")
trainer.add(train_item)
#take advantage of multi-core CPU.
#Set the num\_threads to the number of processing cores.
trainer.num_threads = 23
ner = trainer.train()
ner.save_to_disk('ner_model.dat')
#prints the classes
ner.get_possible_ner.tags()
#recognizes the entities in a string
q=["hotel","amsterdam","wifi"]
entities = ner.extract_entities(q)
# print out below.
print "\nEntities found:", entities
print "\nNumber of entities detected:", len(entities)
for e in entities:
range = e[0]
tag = e[1]
entity_text = " ".join(q[i] for i in range)
print " " + tag + ":" + entity_text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment