hosford42/pyramids_parser_example.py

## pyramids_parser_example.py
# Installation note: Unless you plan to write your own grammar, you will need to install not only the pyramids package
# itself, but the pyramids_categories package as well. The pyramids package should automatically detect the presence of
# the pyramids_categories package and use its contents to initialize the default parser instance.


import time

import pyramids


def main():
    # If your input method supports long sections of text, it is recommended that you first split your text into
    # sentences using another NLP tool before passing them to the parser. Parser performance may also degrade
    # significantly for very long inputs, particularly for those containing the words "and" or "or".
    sentence = input("> ")

    max_time = 10  # Maximum seconds to spend parsing.

    # Parsing takes place in two phases: identification and disambiguation. The parser reduces the combinatorial
    # explosion of possible combinations of subtrees that must be considered, by combining subtrees of similar type
    # together during the identification phase. Then, once a valid structure covering the entirety of the input has been
    # identified, the combined structures must be separated again, which is the disambiguation phase. Both phases
    # can be interrupted by the timeout. If there is insufficient time to disambiguate because the identification phase
    # timed out, a fast heuristic mechanism is used to pick out the most likely forest, to avoid returning nothing at
    # all. This is called "emergency disambiguation", and the like-named flag is returned to indicate whether the fast
    # heuristic mechanism had to be used. The timeout flags indicate whether the phase had to be interrupted before it
    # was finished considering every option.
    (
        forests,  # The actual parses. Each parse can contain multiple trees which together attempt to cover the input.
        emergency_disambiguation,  # Whether emergency disambiguation was used.
        identification_timed_out,  # Whether identification timed out before all options were considered.
        disambiguation_timed_out  # Whether disambiguation timed out before all options were considered.
    ) = pyramids.parse(
        sentence,  # The sentence to be parsed
        'sentence',  # The grammatical category to be parsed
        timeout=time.time() + max_time  # The (optional) time at which parsing is terminated if it is not complete
    )


    if emergency_disambiguation:
        print("Emergency disambiguation! The parse forest is probably incorrect.")
    if identification_timed_out:
        print("Identification phase timed out. The parse forest is less likely to be correct.")
    if disambiguation_timed_out:
        print("Disambiguation phase timed out. The parse forest is less likely to be correct.")


    # Eliminate any forest that contains zero trees or has gaps in its coverage of the input text.
    forests = [forest for forest in forests if forest.parse_trees and not forest.has_gaps()]

    if not forests:
        print("No proper parses were identified.")
    else:
        # Eliminate any forests with more than the minimum number of trees required to cover the input text.
        best_size = min(len(forest.parse_trees) for forest in forests)
        forests = [forest for forest in forests if len(forest.parse_trees) == best_size]

        for index, forest in enumerate(forests[:3]):
            print("========")
            print()
            print("Parse #%s" % (index + 1))
            print("Score: %s avg / %s sum" % forest.get_weighted_score())
            graphs = pyramids.get_parse_graphs(forest)
            for tree, graph in zip(forest.parse_trees, graphs):
                print()
                if tree.category.has_properties('command'):
                    print("Possible command.")
                if tree.category.has_properties('question'):
                    print("Possible question.")
                if tree.category.has_properties('statement'):
                    print("Possible statement.")
                print("Graph:")
                print(graph)
                print()

                # The graph can be accessed as an indexed sequence, ordered by token position. There are also several
                # different methods for getting additional information about a token position.
                (
                    _,  # Disregard
                    spelling,  # The spelling for the token at the root node in the tree.
                    span,  # The span of the token in the original input string
                    category  # The grammatical category for the token
                ) = graph[graph.root_index]

                print("Root token: %s" % spelling)
                print("Root token position: %s" % graph.root_index)
                print("Root token category: %s" % category)
                print("Root phrase category: %s" % graph.get_phrase_category(graph.root_index))
                print("Root token starts at string index %s and ends at string index %s." % span)
                print("Root token has outbound directed edges to these token positions: %s" %
                      sorted(graph.get_sinks(graph.root_index)))
                for sink_index in sorted(graph.get_sinks(graph.root_index)):
                    labels = graph.get_labels(graph.root_index, sink_index)
                    print("There is a directed edge from %s to %s with the following labels: %s" %
                          (graph.root_index, sink_index, sorted(labels)))
                    break

                print()
            print("========")
            print()
            print()

        # You will likely have some additional criteria which help you to pick the best parse forest from among those
        # that were generated. If so, you can adjust the scoring for each parse by providing a score between 0 and 1.
        best_forest = forests[0]
        for forest in forests:
            if forest is best_forest:
                score = 1
            else:
                score = 0
            forest.adjust_score(score)

        # If you want your scores to affect parsing on future runs, you have to save the parser's scoring measures
        # before your program terminates. Unfortunately, I haven't yet defined a function to access the default parser,
        # so you'll have to access a "private" variable to do this. I don't plan on removing _default_parser anytime in
        # the foreseeable future, so this should be safe despite its ugliness.
        pyramids._default_parser.save_scoring_measures()


if __name__ == "__main__":
    main()
	# Installation note: Unless you plan to write your own grammar, you will need to install not only the pyramids package
	# itself, but the pyramids_categories package as well. The pyramids package should automatically detect the presence of
	# the pyramids_categories package and use its contents to initialize the default parser instance.


	import time

	import pyramids


	def main():
	# If your input method supports long sections of text, it is recommended that you first split your text into
	# sentences using another NLP tool before passing them to the parser. Parser performance may also degrade
	# significantly for very long inputs, particularly for those containing the words "and" or "or".
	sentence = input("> ")

	max_time = 10 # Maximum seconds to spend parsing.

	# Parsing takes place in two phases: identification and disambiguation. The parser reduces the combinatorial
	# explosion of possible combinations of subtrees that must be considered, by combining subtrees of similar type
	# together during the identification phase. Then, once a valid structure covering the entirety of the input has been
	# identified, the combined structures must be separated again, which is the disambiguation phase. Both phases
	# can be interrupted by the timeout. If there is insufficient time to disambiguate because the identification phase
	# timed out, a fast heuristic mechanism is used to pick out the most likely forest, to avoid returning nothing at
	# all. This is called "emergency disambiguation", and the like-named flag is returned to indicate whether the fast
	# heuristic mechanism had to be used. The timeout flags indicate whether the phase had to be interrupted before it
	# was finished considering every option.
	(
	forests, # The actual parses. Each parse can contain multiple trees which together attempt to cover the input.
	emergency_disambiguation, # Whether emergency disambiguation was used.
	identification_timed_out, # Whether identification timed out before all options were considered.
	disambiguation_timed_out # Whether disambiguation timed out before all options were considered.
	) = pyramids.parse(
	sentence, # The sentence to be parsed
	'sentence', # The grammatical category to be parsed
	timeout=time.time() + max_time # The (optional) time at which parsing is terminated if it is not complete
	)


	if emergency_disambiguation:
	print("Emergency disambiguation! The parse forest is probably incorrect.")
	if identification_timed_out:
	print("Identification phase timed out. The parse forest is less likely to be correct.")
	if disambiguation_timed_out:
	print("Disambiguation phase timed out. The parse forest is less likely to be correct.")


	# Eliminate any forest that contains zero trees or has gaps in its coverage of the input text.
	forests = [forest for forest in forests if forest.parse_trees and not forest.has_gaps()]

	if not forests:
	print("No proper parses were identified.")
	else:
	# Eliminate any forests with more than the minimum number of trees required to cover the input text.
	best_size = min(len(forest.parse_trees) for forest in forests)
	forests = [forest for forest in forests if len(forest.parse_trees) == best_size]

	for index, forest in enumerate(forests[:3]):
	print("========")
	print()
	print("Parse #%s" % (index + 1))
	print("Score: %s avg / %s sum" % forest.get_weighted_score())
	graphs = pyramids.get_parse_graphs(forest)
	for tree, graph in zip(forest.parse_trees, graphs):
	print()
	if tree.category.has_properties('command'):
	print("Possible command.")
	if tree.category.has_properties('question'):
	print("Possible question.")
	if tree.category.has_properties('statement'):
	print("Possible statement.")
	print("Graph:")
	print(graph)
	print()

	# The graph can be accessed as an indexed sequence, ordered by token position. There are also several
	# different methods for getting additional information about a token position.
	(
	_, # Disregard
	spelling, # The spelling for the token at the root node in the tree.
	span, # The span of the token in the original input string
	category # The grammatical category for the token
	) = graph[graph.root_index]

	print("Root token: %s" % spelling)
	print("Root token position: %s" % graph.root_index)
	print("Root token category: %s" % category)
	print("Root phrase category: %s" % graph.get_phrase_category(graph.root_index))
	print("Root token starts at string index %s and ends at string index %s." % span)
	print("Root token has outbound directed edges to these token positions: %s" %
	sorted(graph.get_sinks(graph.root_index)))
	for sink_index in sorted(graph.get_sinks(graph.root_index)):
	labels = graph.get_labels(graph.root_index, sink_index)
	print("There is a directed edge from %s to %s with the following labels: %s" %
	(graph.root_index, sink_index, sorted(labels)))
	break

	print()
	print("========")
	print()
	print()

	# You will likely have some additional criteria which help you to pick the best parse forest from among those
	# that were generated. If so, you can adjust the scoring for each parse by providing a score between 0 and 1.
	best_forest = forests[0]
	for forest in forests:
	if forest is best_forest:
	score = 1
	else:
	score = 0
	forest.adjust_score(score)

	# If you want your scores to affect parsing on future runs, you have to save the parser's scoring measures
	# before your program terminates. Unfortunately, I haven't yet defined a function to access the default parser,
	# so you'll have to access a "private" variable to do this. I don't plan on removing _default_parser anytime in
	# the foreseeable future, so this should be safe despite its ugliness.
	pyramids._default_parser.save_scoring_measures()


	if __name__ == "__main__":
	main()