Skip to content

Instantly share code, notes, and snippets.

@claytantor
Last active March 23, 2017 01:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save claytantor/ddaa1dbb1d4ce7ac8182b9bf2481d3b8 to your computer and use it in GitHub Desktop.
Save claytantor/ddaa1dbb1d4ce7ac8182b9bf2481d3b8 to your computer and use it in GitHub Desktop.
Allows for the generation of Rasa NLU models from a simpler form. It seems the native format has a lot of redundant information. This allows for a normalized model.
#!/usr/bin/env python
# coding: utf-8
import sys
import json
import re
import traceback
def load_model(model_file):
model = {}
with open(model_file, 'r') as f:
model = json.loads(f.read())
return model
def make_rasa_model_from_statements(statements_model):
entity_examples, intent_examples = handle_statements_model(statements_model)
rasa_model = {"rasa_nlu_data":{
'entity_examples':entity_examples,
'intent_examples':intent_examples
}}
return rasa_model
def handle_model(intent_name, entities, template, start, merge_model, items, keys):
merge_model['entities'] = entities
for i in range(start, len(entities)):
#print entities[i]
if 'synonyms' in entities[i]:
entity = entities[i]
synonyms = entity['synonyms']
for j in range(0, len(synonyms)):
#recurse
#print "{0}={1}".format(entity['name'],synonyms[j])
merge_model[entity['name']] = synonyms[j]
if i<=len(entities):
handle_model(intent_name, entities, template, i+1, merge_model, items, keys)
add_items(intent_name, template, merge_model, items, keys)
def add_items(intent_name, template, merge_model, items, keys):
try:
merged_text = template.format(**merge_model)
if merged_text not in keys:
entities_parsed = []
for entity in merge_model['entities']:
#print entity
entity_parsed = parse_item_entity(merged_text, entity)
entities_parsed.append(entity_parsed)
item = {
"text": merged_text,
"intent": intent_name,
"entities": entities_parsed
}
items.append(item)
keys.append(merged_text)
except:
#print "Unexpected error:", sys.exc_info()[0]
print json.dumps({"template":template, "merge_model":merge_model}, indent=4)
exc_type, exc_value, exc_traceback = sys.exc_info()
print "*** print_exception:"
traceback.print_exception(exc_type, exc_value, exc_traceback,
limit=2, file=sys.stdout)
pass
def parse_item_entity(speech_text, entity):
rasa_entity = {}
# split the text into words
parts = speech_text.split(" ")
word_index = entity['word']
entity_word = parts[word_index]
rasa_entity['value'] = entity['value']
rasa_entity['entity'] = entity['name']
# find the start index for the word number
m = re.search(entity_word, speech_text)
if m:
rasa_entity['start'] = m.start()
rasa_entity['end'] = m.end()
return rasa_entity
def handle_statements_model(statements_model):
entity_examples = []
intent_examples = []
"""
"text": "provider github push branchname",
"intent": "provider_branchpush",
"entities": [{
"start": 0,
"end": 8,
"value": "provider",
"entity": "element"
}
"""
items = []
keys = []
for statement in statements_model['statements']:
# we are going to recurse from here
merge_model = {}
handle_model(statement['intent'], statement['entities'], statement['text'], 0, merge_model, items, keys)
#now we have items, lets create the examples from each
for item in items:
entity_examples.append(item)
intent_example = {
"text": item['text'],
"intent": item['intent']
}
intent_examples.append(intent_example)
return entity_examples, intent_examples
def main(args):
statements_model = load_model(args[0])
rasa_model = make_rasa_model_from_statements(statements_model)
print json.dumps(rasa_model, indent=4)
if __name__ == "__main__":
main(sys.argv[1:])
{
"statements": [{
"text": "{element} {provider_subject_vcs} {provider_verb} {provider_object_branchname}",
"intent": "provider_branchpush",
"entities": [
{
"word": 0,
"value":"provider",
"synonyms": ["provider"],
"name": "element"
}, {
"word": 1,
"value":"vcs",
"synonyms": ["vcs","github", "travis", "stash", "bitbucket"],
"name": "provider_subject_vcs"
}, {
"word": 2,
"value":"push",
"synonyms": ["pushed", "push", "committed", "commit"],
"name": "provider_verb"
}, {
"word": 3,
"value":"branchname",
"synonyms": ["branchname"],
"name": "provider_object_branchname"
}]
},{
"text": "{element} {provider_subject_vcs} {provider_object_record} {provider_verb}",
"intent": "provider_prcreate",
"entities": [
{
"word": 0,
"value":"provider",
"synonyms": ["provider"],
"name": "element"
}, {
"word": 1,
"value":"vcs",
"synonyms": ["vcs","github", "travis", "stash", "bitbucket"],
"name": "provider_subject_vcs"
}, {
"word": 2,
"value":"pr",
"synonyms": ["pr", "pullrequest"],
"name": "provider_object_record"
}, {
"word": 3,
"value":"create",
"synonyms": ["create"],
"name": "provider_verb"
}]
}]
}
@claytantor
Copy link
Author

claytantor commented Feb 7, 2017

I wrote this because the native rasa format is difficult to craft, there is a ton of handwork and parsing of words required to build the training model. The above file format is simpler and is similar to API.ai's synonyms training model that I have never been able to get to work with rasa.

$ python -m rasa_nlu.train -c rasa/config.json
$ python rasa/utils/generate_model.py rasa/data/dronze-taxonomy.json>/Users/claytongraham/data/github/claytantor/dronze-qlearn/rasa/data/dronze-rasa-gen1.json
$ python -m rasa_nlu.train -c rasa/config.json
$ python -m rasa_nlu.server -c rasa/config.json

to make a query:

curl -X POST -H "Content-Type: application/json" -H "Cache-Control: no-cache" -d '{"q":"provider bitbucket push feature-8"}' "http://localhost:5010/parse"

with the result:

{
  "text": "provider bitbucket push feature-8",
  "confidence": 0.9414104120837643,
  "intent": "provider_branchpush",
  "entities": [
    {
      "start": 0,
      "end": 8,
      "value": "provider",
      "entity": "element"
    },
    {
      "start": 9,
      "end": 18,
      "value": "bitbucket",
      "entity": "provider_subject_vcs"
    },
    {
      "start": 19,
      "end": 23,
      "value": "push",
      "entity": "provider_verb"
    },
    {
      "start": 24,
      "end": 33,
      "value": "feature-8",
      "entity": "provider_object_branchname"
    }
  ]
}

@kfezer
Copy link

kfezer commented Mar 23, 2017

Thanks for this!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment