claytantor/generate_model.py

## generate_model.py
#!/usr/bin/env python
# coding: utf-8
import sys
import json
import re
import traceback

def load_model(model_file):
    model = {}
    with open(model_file, 'r') as f:
        model = json.loads(f.read())

    return model

def make_rasa_model_from_statements(statements_model):
    entity_examples, intent_examples = handle_statements_model(statements_model)
    rasa_model = {"rasa_nlu_data":{
        'entity_examples':entity_examples,
        'intent_examples':intent_examples
    }}
    return rasa_model


def handle_model(intent_name, entities, template, start, merge_model, items, keys):
    merge_model['entities'] = entities

    for i in range(start, len(entities)):
        #print entities[i]
        if 'synonyms' in entities[i]:
            entity = entities[i]
            synonyms = entity['synonyms']
            for j in range(0, len(synonyms)):
                #recurse
                #print "{0}={1}".format(entity['name'],synonyms[j])
                merge_model[entity['name']] = synonyms[j]

                if i<=len(entities):
                    handle_model(intent_name, entities, template, i+1, merge_model, items, keys)

    add_items(intent_name, template, merge_model, items, keys)


def add_items(intent_name, template, merge_model, items, keys):
    try:
        merged_text = template.format(**merge_model)

        if merged_text not in keys:

            entities_parsed = []
            for entity in merge_model['entities']:
                #print entity
                entity_parsed = parse_item_entity(merged_text, entity)
                entities_parsed.append(entity_parsed)

            item = {
                "text": merged_text,
                "intent": intent_name,
                "entities": entities_parsed
            }
            items.append(item)

        keys.append(merged_text)

    except:
        #print "Unexpected error:", sys.exc_info()[0]
        print json.dumps({"template":template, "merge_model":merge_model}, indent=4)
        exc_type, exc_value, exc_traceback = sys.exc_info()
        print "*** print_exception:"
        traceback.print_exception(exc_type, exc_value, exc_traceback,
                                  limit=2, file=sys.stdout)

        pass

def parse_item_entity(speech_text, entity):

    rasa_entity = {}
    # split the text into words
    parts = speech_text.split(" ")
    word_index = entity['word']
    entity_word = parts[word_index]

    rasa_entity['value'] = entity['value']
    rasa_entity['entity'] = entity['name']

    # find the start index for the word number
    m = re.search(entity_word, speech_text)
    if m:
        rasa_entity['start'] = m.start()
        rasa_entity['end'] = m.end()

    return rasa_entity


def handle_statements_model(statements_model):
    entity_examples = []
    intent_examples = []

    """
    "text": "provider github push branchname",
    "intent": "provider_branchpush",
    "entities": [{
        "start": 0,
        "end": 8,
        "value": "provider",
        "entity": "element"
    }
    """
    items = []
    keys = []
    for statement in statements_model['statements']:
        # we are going to recurse from here
        merge_model = {}
        handle_model(statement['intent'], statement['entities'], statement['text'], 0, merge_model, items, keys)

    #now we have items, lets create the examples from each
    for item in items:
        entity_examples.append(item)

        intent_example = {
            "text": item['text'],
            "intent": item['intent']
        }
        intent_examples.append(intent_example)

    return entity_examples, intent_examples


def main(args):
    statements_model = load_model(args[0])
    rasa_model = make_rasa_model_from_statements(statements_model)
    print json.dumps(rasa_model, indent=4)

if __name__ == "__main__":
    main(sys.argv[1:])

## taxonomy.json
{
    "statements": [{
        "text": "{element} {provider_subject_vcs} {provider_verb} {provider_object_branchname}",
        "intent": "provider_branchpush",
        "entities": [
        {
            "word": 0,
            "value":"provider",
            "synonyms": ["provider"],
            "name": "element"
        }, {
            "word": 1,
            "value":"vcs",
            "synonyms": ["vcs","github", "travis", "stash", "bitbucket"],
            "name": "provider_subject_vcs"
        }, {
            "word": 2,
            "value":"push",
            "synonyms": ["pushed", "push", "committed", "commit"],
            "name": "provider_verb"
        }, {
            "word": 3,
            "value":"branchname",
            "synonyms": ["branchname"],
            "name": "provider_object_branchname"
        }]
    },{
        "text": "{element} {provider_subject_vcs} {provider_object_record} {provider_verb}",
        "intent": "provider_prcreate",
        "entities": [
        {
            "word": 0,
            "value":"provider",
            "synonyms": ["provider"],
            "name": "element"
        }, {
            "word": 1,
            "value":"vcs",
            "synonyms": ["vcs","github", "travis", "stash", "bitbucket"],
            "name": "provider_subject_vcs"
        }, {
            "word": 2,
            "value":"pr",
            "synonyms": ["pr", "pullrequest"],
            "name": "provider_object_record"
        }, {
            "word": 3,
            "value":"create",
            "synonyms": ["create"],
            "name": "provider_verb"
        }]
    }]
}
	#!/usr/bin/env python
	# coding: utf-8
	import sys
	import json
	import re
	import traceback

	def load_model(model_file):
	model = {}
	with open(model_file, 'r') as f:
	model = json.loads(f.read())

	return model

	def make_rasa_model_from_statements(statements_model):
	entity_examples, intent_examples = handle_statements_model(statements_model)
	rasa_model = {"rasa_nlu_data":{
	'entity_examples':entity_examples,
	'intent_examples':intent_examples
	}}
	return rasa_model


	def handle_model(intent_name, entities, template, start, merge_model, items, keys):
	merge_model['entities'] = entities

	for i in range(start, len(entities)):
	#print entities[i]
	if 'synonyms' in entities[i]:
	entity = entities[i]
	synonyms = entity['synonyms']
	for j in range(0, len(synonyms)):
	#recurse
	#print "{0}={1}".format(entity['name'],synonyms[j])
	merge_model[entity['name']] = synonyms[j]

	if i<=len(entities):
	handle_model(intent_name, entities, template, i+1, merge_model, items, keys)

	add_items(intent_name, template, merge_model, items, keys)



	def add_items(intent_name, template, merge_model, items, keys):
	try:
	merged_text = template.format(**merge_model)

	if merged_text not in keys:

	entities_parsed = []
	for entity in merge_model['entities']:
	#print entity
	entity_parsed = parse_item_entity(merged_text, entity)
	entities_parsed.append(entity_parsed)

	item = {
	"text": merged_text,
	"intent": intent_name,
	"entities": entities_parsed
	}
	items.append(item)

	keys.append(merged_text)

	except:
	#print "Unexpected error:", sys.exc_info()[0]
	print json.dumps({"template":template, "merge_model":merge_model}, indent=4)
	exc_type, exc_value, exc_traceback = sys.exc_info()
	print "*** print_exception:"
	traceback.print_exception(exc_type, exc_value, exc_traceback,
	limit=2, file=sys.stdout)

	pass

	def parse_item_entity(speech_text, entity):

	rasa_entity = {}
	# split the text into words
	parts = speech_text.split(" ")
	word_index = entity['word']
	entity_word = parts[word_index]

	rasa_entity['value'] = entity['value']
	rasa_entity['entity'] = entity['name']

	# find the start index for the word number
	m = re.search(entity_word, speech_text)
	if m:
	rasa_entity['start'] = m.start()
	rasa_entity['end'] = m.end()

	return rasa_entity


	def handle_statements_model(statements_model):
	entity_examples = []
	intent_examples = []

	"""
	"text": "provider github push branchname",
	"intent": "provider_branchpush",
	"entities": [{
	"start": 0,
	"end": 8,
	"value": "provider",
	"entity": "element"
	}
	"""
	items = []
	keys = []
	for statement in statements_model['statements']:
	# we are going to recurse from here
	merge_model = {}
	handle_model(statement['intent'], statement['entities'], statement['text'], 0, merge_model, items, keys)

	#now we have items, lets create the examples from each
	for item in items:
	entity_examples.append(item)

	intent_example = {
	"text": item['text'],
	"intent": item['intent']
	}
	intent_examples.append(intent_example)

	return entity_examples, intent_examples


	def main(args):
	statements_model = load_model(args[0])
	rasa_model = make_rasa_model_from_statements(statements_model)
	print json.dumps(rasa_model, indent=4)

	if __name__ == "__main__":
	main(sys.argv[1:])
	{
	"statements": [{
	"text": "{element} {provider_subject_vcs} {provider_verb} {provider_object_branchname}",
	"intent": "provider_branchpush",
	"entities": [
	{
	"word": 0,
	"value":"provider",
	"synonyms": ["provider"],
	"name": "element"
	}, {
	"word": 1,
	"value":"vcs",
	"synonyms": ["vcs","github", "travis", "stash", "bitbucket"],
	"name": "provider_subject_vcs"
	}, {
	"word": 2,
	"value":"push",
	"synonyms": ["pushed", "push", "committed", "commit"],
	"name": "provider_verb"
	}, {
	"word": 3,
	"value":"branchname",
	"synonyms": ["branchname"],
	"name": "provider_object_branchname"
	}]
	},{
	"text": "{element} {provider_subject_vcs} {provider_object_record} {provider_verb}",
	"intent": "provider_prcreate",
	"entities": [
	{
	"word": 0,
	"value":"provider",
	"synonyms": ["provider"],
	"name": "element"
	}, {
	"word": 1,
	"value":"vcs",
	"synonyms": ["vcs","github", "travis", "stash", "bitbucket"],
	"name": "provider_subject_vcs"
	}, {
	"word": 2,
	"value":"pr",
	"synonyms": ["pr", "pullrequest"],
	"name": "provider_object_record"
	}, {
	"word": 3,
	"value":"create",
	"synonyms": ["create"],
	"name": "provider_verb"
	}]
	}]
	}