Skip to content

Instantly share code, notes, and snippets.

@claytantor
Last active March 23, 2017 01:25
Show Gist options
  • Save claytantor/ddaa1dbb1d4ce7ac8182b9bf2481d3b8 to your computer and use it in GitHub Desktop.
Save claytantor/ddaa1dbb1d4ce7ac8182b9bf2481d3b8 to your computer and use it in GitHub Desktop.
Allows for the generation of Rasa NLU models from a simpler form. It seems the native format has a lot of redundant information. This allows for a normalized model.
#!/usr/bin/env python
# coding: utf-8
import sys
import json
import re
import traceback
def load_model(model_file):
model = {}
with open(model_file, 'r') as f:
model = json.loads(f.read())
return model
def make_rasa_model_from_statements(statements_model):
entity_examples, intent_examples = handle_statements_model(statements_model)
rasa_model = {"rasa_nlu_data":{
'entity_examples':entity_examples,
'intent_examples':intent_examples
}}
return rasa_model
def handle_model(intent_name, entities, template, start, merge_model, items, keys):
merge_model['entities'] = entities
for i in range(start, len(entities)):
#print entities[i]
if 'synonyms' in entities[i]:
entity = entities[i]
synonyms = entity['synonyms']
for j in range(0, len(synonyms)):
#recurse
#print "{0}={1}".format(entity['name'],synonyms[j])
merge_model[entity['name']] = synonyms[j]
if i<=len(entities):
handle_model(intent_name, entities, template, i+1, merge_model, items, keys)
add_items(intent_name, template, merge_model, items, keys)
def add_items(intent_name, template, merge_model, items, keys):
try:
merged_text = template.format(**merge_model)
if merged_text not in keys:
entities_parsed = []
for entity in merge_model['entities']:
#print entity
entity_parsed = parse_item_entity(merged_text, entity)
entities_parsed.append(entity_parsed)
item = {
"text": merged_text,
"intent": intent_name,
"entities": entities_parsed
}
items.append(item)
keys.append(merged_text)
except:
#print "Unexpected error:", sys.exc_info()[0]
print json.dumps({"template":template, "merge_model":merge_model}, indent=4)
exc_type, exc_value, exc_traceback = sys.exc_info()
print "*** print_exception:"
traceback.print_exception(exc_type, exc_value, exc_traceback,
limit=2, file=sys.stdout)
pass
def parse_item_entity(speech_text, entity):
rasa_entity = {}
# split the text into words
parts = speech_text.split(" ")
word_index = entity['word']
entity_word = parts[word_index]
rasa_entity['value'] = entity['value']
rasa_entity['entity'] = entity['name']
# find the start index for the word number
m = re.search(entity_word, speech_text)
if m:
rasa_entity['start'] = m.start()
rasa_entity['end'] = m.end()
return rasa_entity
def handle_statements_model(statements_model):
entity_examples = []
intent_examples = []
"""
"text": "provider github push branchname",
"intent": "provider_branchpush",
"entities": [{
"start": 0,
"end": 8,
"value": "provider",
"entity": "element"
}
"""
items = []
keys = []
for statement in statements_model['statements']:
# we are going to recurse from here
merge_model = {}
handle_model(statement['intent'], statement['entities'], statement['text'], 0, merge_model, items, keys)
#now we have items, lets create the examples from each
for item in items:
entity_examples.append(item)
intent_example = {
"text": item['text'],
"intent": item['intent']
}
intent_examples.append(intent_example)
return entity_examples, intent_examples
def main(args):
statements_model = load_model(args[0])
rasa_model = make_rasa_model_from_statements(statements_model)
print json.dumps(rasa_model, indent=4)
if __name__ == "__main__":
main(sys.argv[1:])
{
"statements": [{
"text": "{element} {provider_subject_vcs} {provider_verb} {provider_object_branchname}",
"intent": "provider_branchpush",
"entities": [
{
"word": 0,
"value":"provider",
"synonyms": ["provider"],
"name": "element"
}, {
"word": 1,
"value":"vcs",
"synonyms": ["vcs","github", "travis", "stash", "bitbucket"],
"name": "provider_subject_vcs"
}, {
"word": 2,
"value":"push",
"synonyms": ["pushed", "push", "committed", "commit"],
"name": "provider_verb"
}, {
"word": 3,
"value":"branchname",
"synonyms": ["branchname"],
"name": "provider_object_branchname"
}]
},{
"text": "{element} {provider_subject_vcs} {provider_object_record} {provider_verb}",
"intent": "provider_prcreate",
"entities": [
{
"word": 0,
"value":"provider",
"synonyms": ["provider"],
"name": "element"
}, {
"word": 1,
"value":"vcs",
"synonyms": ["vcs","github", "travis", "stash", "bitbucket"],
"name": "provider_subject_vcs"
}, {
"word": 2,
"value":"pr",
"synonyms": ["pr", "pullrequest"],
"name": "provider_object_record"
}, {
"word": 3,
"value":"create",
"synonyms": ["create"],
"name": "provider_verb"
}]
}]
}
@kfezer
Copy link

kfezer commented Mar 23, 2017

Thanks for this!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment