Last active
December 29, 2015 15:08
-
-
Save rns/7688275 to your computer and use it in GitHub Desktop.
cross-sentence disambiguation with ASF's — first cut
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ambiguities via ASF's ambiguities_show() | |
Ambiguous symch at Glade=7, Symbol=<sentence>: | |
The ambiguity is from line 1, column 1 to line 1, column 25 | |
Text is: Time flies like an arrow. | |
There are 2 symches | |
Symch 0 is a rule: sentence ::= subject verb adjunct period | |
Symch 1 is a rule: sentence ::= subject verb object period | |
Ambiguous symch at Glade=5, Symbol=<sentence>: | |
The ambiguity is from line 3, column 1 to line 3, column 26 | |
Text is: Fruit flies like a banana. | |
There are 2 symches | |
Symch 0 is a rule: sentence ::= subject verb adjunct period | |
Symch 1 is a rule: sentence ::= subject verb object period | |
# ambiguities via array_display() in ASF's synopsis: | |
Glade 2, paragraph ::= sentence + | |
Glade 7 has 2 symches | |
Glade 7, Symch 0, sentence ::= subject verb adjunct period | |
Glade 14, subject ::= noun | |
Glade 69, noun ::= adjective_noun_lex | |
Glade 71, Symbol adjective_noun_lex: "Time" | |
Glade 13, verb ::= verb_lex | |
Glade 73, Symbol verb_lex: "flies" | |
Glade 12, adjunct ::= preposition object | |
Glade 75, preposition ::= preposition_lex | |
Glade 76, Symbol preposition_lex: "like" | |
Glade 16, object ::= article noun | |
Glade 68, article ::= article_lex | |
Glade 78, Symbol article_lex: "an" | |
Glade 77, noun ::= adjective_noun_lex | |
Glade 81, Symbol adjective_noun_lex: "arrow" | |
Glade 11, Symbol period: "." | |
Glade 7, Symch 1, sentence ::= subject verb object period | |
Glade 18, subject ::= adjective noun | |
Glade 1, adjective ::= adjective_noun_lex | |
Glade 83, Symbol adjective_noun_lex: "Time" | |
Glade 19, noun ::= adjective_noun_lex | |
Glade 86, Symbol adjective_noun_lex: "flies" | |
Glade 17, verb ::= verb_lex | |
Glade 88, Symbol verb_lex: "like" | |
Glade 16 revisited | |
Glade 15, Symbol period: "." | |
Glade 6, sentence ::= subject verb adjunct period | |
Glade 23, subject ::= noun | |
Glade 25, noun ::= adjective_noun_lex | |
Glade 26, Symbol adjective_noun_lex: "Time" | |
Glade 22, verb ::= verb_lex | |
Glade 27, Symbol verb_lex: "flies" | |
Glade 21, adjunct ::= adverb | |
Glade 29, adverb ::= adverb_lex | |
Glade 31, Symbol adverb_lex: "fast" | |
Glade 20, Symbol period: "." | |
Glade 5 has 2 symches | |
Glade 5, Symch 0, sentence ::= subject verb adjunct period | |
Glade 39, subject ::= noun | |
Glade 91, noun ::= adjective_noun_lex | |
Glade 93, Symbol adjective_noun_lex: "Fruit" | |
Glade 38, verb ::= verb_lex | |
Glade 96, Symbol verb_lex: "flies" | |
Glade 37, adjunct ::= preposition object | |
Glade 58, preposition ::= preposition_lex | |
Glade 98, Symbol preposition_lex: "like" | |
Glade 41, object ::= article noun | |
Glade 54, article ::= article_lex | |
Glade 101, Symbol article_lex: "a" | |
Glade 100, noun ::= adjective_noun_lex | |
Glade 104, Symbol adjective_noun_lex: "banana" | |
Glade 36, Symbol period: "." | |
Glade 5, Symch 1, sentence ::= subject verb object period | |
Glade 43, subject ::= adjective noun | |
Glade 106, adjective ::= adjective_noun_lex | |
Glade 109, Symbol adjective_noun_lex: "Fruit" | |
Glade 107, noun ::= adjective_noun_lex | |
Glade 112, Symbol adjective_noun_lex: "flies" | |
Glade 42, verb ::= verb_lex | |
Glade 114, Symbol verb_lex: "like" | |
Glade 41 revisited | |
Glade 40, Symbol period: "." | |
Glade 4, sentence ::= subject verb object period | |
Glade 48, subject ::= adjective noun | |
Glade 51, adjective ::= adjective_noun_lex | |
Glade 53, Symbol adjective_noun_lex: "Fruit" | |
Glade 50, noun ::= adjective_noun_lex | |
Glade 55, Symbol adjective_noun_lex: "flies" | |
Glade 47, verb ::= verb_lex | |
Glade 57, Symbol verb_lex: "spoil" | |
Glade 46, object ::= article noun | |
Glade 60, article ::= article_lex | |
Glade 62, Symbol article_lex: "a" | |
Glade 59, noun ::= adjective_noun_lex | |
Glade 65, Symbol adjective_noun_lex: "banana" | |
Glade 45, Symbol period: "." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# Copyright 2013 Jeffrey Kegler | |
# This file is part of Marpa::R2. Marpa::R2 is free software: you can | |
# redistribute it and/or modify it under the terms of the GNU Lesser | |
# General Public License as published by the Free Software Foundation, | |
# either version 3 of the License, or (at your option) any later version. | |
# | |
# Marpa::R2 is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
# Lesser General Public License for more details. | |
# | |
# You should have received a copy of the GNU Lesser | |
# General Public License along with Marpa::R2. If not, see | |
# http://www.gnu.org/licenses/. | |
# This example is from Ralf Muschall, who clearly knows English | |
# grammar better than most native speakers. I've reworked the | |
# terminology to follow _A Comprehensive Grammar of the English | |
# Language_, by Quirk, Greenbaum, Leech and Svartvik. My edition | |
# was the "Seventh (corrected) impression 1989". | |
# | |
# When it is not a verb, I treat "like" | |
# as a preposition in an adjunct of manner, | |
# as per 8.79, p. 557; 9.4, pp. 661; and 9.48, pp. 698-699. | |
# | |
# The saying "time flies like an arrow; fruit flies like a banana" | |
# is attributed to Groucho Marx, but there is no reason to believe | |
# he ever said it. Apparently, the saying | |
# first appeared on the Usenet on net.jokes in 1982. | |
# I've documented this whole thing on Wikipedia: | |
# http://en.wikipedia.org/wiki/Time_flies_like_an_arrow | |
# | |
# The permalink is: | |
# http://en.wikipedia.org/w/index.php?title=Time_flies_like_an_arrow&oldid=311163283 | |
use 5.010; | |
use strict; | |
use warnings; | |
use English qw( -no_match_vars ); | |
use Test::More tests => 1; | |
use lib 'inc'; | |
use Marpa::R2::Test; | |
use Marpa::R2; | |
## no critic (Subroutines::RequireArgUnpacking) | |
use YAML; | |
sub do_paragraph { shift; return "para(\n " . join("\n ", @_) . "\n)\n" } | |
sub do_sva_sentence { return "sva($_[1];$_[2];$_[3])" } | |
sub do_svo_sentence { return "svo($_[1];$_[2];$_[3])" } | |
sub do_nominal_adjunct { return "adju($_[1];$_[2])" } | |
sub do_adverbial_adjunct { return "adju($_[1])" } | |
sub do_adjective { return "adje($_[1])" } | |
sub do_qualified_subject { return "s($_[1];$_[2])" } | |
sub do_bare_subject { return "s($_[1])" } | |
sub do_noun { return "n($_[1])" } | |
sub do_verb { return "v($_[1])" } | |
sub do_adverb { return "adv($_[1])" } | |
sub do_object { return "o($_[1];$_[2])" } | |
sub do_article { return "art($_[1])" } | |
sub do_preposition { return "pr($_[1])" } | |
## use critic | |
my $grammar = Marpa::R2::Scanless::G->new( | |
{ | |
source => \(<<'END_OF_SOURCE'), | |
paragraph ::= sentence+ action => do_paragraph | |
sentence ::= subject verb adjunct period action => do_sva_sentence | |
sentence ::= subject verb object period action => do_svo_sentence | |
adjunct ::= preposition object action => do_nominal_adjunct | |
adjunct ::= adverb action => do_adverbial_adjunct | |
adjective ::= adjective_noun_lex action => do_adjective | |
subject ::= adjective noun action => do_qualified_subject | |
subject ::= noun action => do_bare_subject | |
noun ::= adjective_noun_lex action => do_noun | |
adverb ::= adverb_lex action => do_adverb | |
verb ::= verb_lex action => do_verb | |
object ::= article noun action => do_object | |
article ::= article_lex action => do_article | |
preposition ::= preposition_lex action => do_preposition | |
preposition_lex ~ unicorn | |
verb_lex ~ unicorn | |
adjective_noun_lex ~ unicorn | |
article_lex ~ unicorn | |
adverb_lex ~ unicorn | |
period ~ unicorn | |
unicorn ~ [^\s\S] | |
END_OF_SOURCE | |
} | |
); | |
my $expected = <<'EOS'; | |
para( | |
sva(s(n(Time));v(flies);adju(pr(like);o(art(an);n(arrow)))) | |
sva(s(n(Time));v(flies);adju(adv(fast))) | |
svo(s(adje(Fruit);n(flies));v(like);o(art(a);n(banana))) | |
svo(s(adje(Fruit);n(flies));v(spoil);o(art(a);n(banana))) | |
) | |
EOS | |
my @actual = (); | |
my %lexical_class = ( | |
'preposition_lex' => 'like', | |
'verb_lex' => 'like flies spoil', | |
'adjective_noun_lex' => 'fruit banana time arrow flies', | |
'article_lex' => 'a an', | |
'adverb_lex' => 'fast', | |
'period' => '.' | |
); | |
my %vocabulary = (); | |
while ( my ( $lexical_class, $tokens ) = each %lexical_class ) { | |
for my $token ( split q{ }, $tokens ) { | |
push @{ $vocabulary{$token} }, $lexical_class; | |
} | |
} | |
sub asf_to_basic_tree { | |
my ( $asf, $glade ) = @_; | |
my $peak = $asf->peak(); | |
return glade_to_basic_tree( $asf, $peak, [] ); | |
} ## end sub asf_to_basic_tree | |
sub glade_to_basic_tree { | |
my ( $asf, $glade, $seen ) = @_; | |
return bless ["Glade $glade revisited"], 'My_Revisit' | |
if $seen->[$glade]; | |
$seen->[$glade] = 1; | |
my $grammar = $asf->grammar(); | |
my @symches = (); | |
my $symch_count = $asf->glade_symch_count($glade); | |
SYMCH: for ( my $symch_ix = 0; $symch_ix < $symch_count; $symch_ix++ ) { | |
my $rule_id = $asf->symch_rule_id( $glade, $symch_ix ); | |
if ( $rule_id < 0 ) { | |
my $literal = $asf->glade_literal($glade); | |
my $symbol_id = $asf->glade_symbol_id($glade); | |
my $display_form = $grammar->symbol_display_form($symbol_id); | |
push @symches, | |
bless [qq{Glade $glade, Symbol $display_form: "$literal"}], | |
'My_Token'; | |
next SYMCH; | |
} ## end if ( $rule_id < 0 ) | |
# ignore any truncation of the factorings | |
my $factoring_count = | |
$asf->symch_factoring_count( $glade, $symch_ix ); | |
my @symch_description = ("Glade $glade"); | |
push @symch_description, "Symch $symch_ix" if $symch_count > 1; | |
push @symch_description, $grammar->rule_show($rule_id); | |
my $symch_description = join q{, }, @symch_description; | |
my @factorings = ($symch_description); | |
for ( | |
my $factoring_ix = 0; | |
$factoring_ix < $factoring_count; | |
$factoring_ix++ | |
) | |
{ | |
my $downglades = | |
$asf->factoring_downglades( $glade, $symch_ix, | |
$factoring_ix ); | |
push @factorings, | |
bless [ map { glade_to_basic_tree( $asf, $_, $seen ) } | |
@{$downglades} ], 'My_Rule'; | |
} ## end for ( my $factoring_ix = 0; $factoring_ix < $factoring_count...) | |
if ( $factoring_count > 1 ) { | |
push @symches, | |
bless [ | |
"Glade $glade, symch $symch_ix has $factoring_count factorings", | |
@factorings | |
], | |
'My_Factorings'; | |
next SYMCH; | |
} ## end if ( $factoring_count > 1 ) | |
push @symches, bless [ @factorings[ 0, 1 ] ], 'My_Factorings'; | |
} ## end SYMCH: for ( my $symch_ix = 0; $symch_ix < $symch_count; ...) | |
return bless [ "Glade $glade has $symch_count symches", @symches ], | |
'My_Symches' | |
if $symch_count > 1; | |
return $symches[0]; | |
} ## end sub glade_to_basic_tree | |
sub array_display { | |
my ($array) = @_; | |
my ( undef, @lines ) = @{ array_lines_display($array) }; | |
my $text = q{}; | |
for my $line (@lines) { | |
my ( $indent, $body ) = @{$line}; | |
$indent -= 6; | |
$text .= ( q{ } x $indent ) . $body . "\n"; | |
} | |
return $text; | |
} ## end sub array_display | |
sub array_lines_display { | |
my ($array) = @_; | |
my $reftype = Scalar::Util::reftype($array) // '!undef!'; | |
return [ [ 0, $array ] ] if $reftype ne 'ARRAY'; | |
my @lines = (); | |
ELEMENT: for my $element ( @{$array} ) { | |
for my $line ( @{ array_lines_display($element) } ) { | |
my ( $indent, $body ) = @{$line}; | |
push @lines, [ $indent + 2, $body ]; | |
} | |
} ## end ELEMENT: for my $element ( @{$array} ) | |
return \@lines; | |
} ## end sub array_lines_display | |
my $recce = Marpa::R2::Scanless::R->new( { | |
grammar => $grammar , | |
semantics_package => 'main', | |
} ); | |
die 'Failed to create recognizer' if not $recce; | |
my $data = <<END_OF_DATA; | |
Time flies like an arrow. | |
Time flies fast. | |
Fruit flies like a banana. | |
Fruit flies spoil a banana. | |
END_OF_DATA | |
$recce->read( \$data, 0, 0 ); | |
my @tokens = grep { $_ } split /(\s+|\W)/, $data; | |
my $start = 0; | |
for my $token ( @tokens ) { | |
if (exists $vocabulary{lc $token} ){ | |
for my $lexical_class ( @{ $vocabulary{lc $token} } ) { | |
# say "'$token':$lexical_class @ $start:", length $token; | |
eval { $recce->lexeme_alternative( $lexical_class, $token ) }; | |
die $@, $recce->show_progress(), ' at ', __LINE__ if $@; | |
} | |
eval { $recce->lexeme_complete( $start, length $token ) }; | |
die $@, $recce->show_progress(), ' at ', __LINE__ if $@; | |
} | |
$start += length ($token); | |
} ## end for my $token ( split q{ }, $data ) | |
# report ambiguities via ASF's ambiguities_show() | |
if ( $recce->ambiguity_metric() > 1 ) { | |
my $asf = Marpa::R2::ASF->new( { slr => $recce } ); | |
die 'No ASF' if not defined $asf; | |
my $ambiguities = Marpa::R2::Internal::ASF::ambiguities($asf); | |
my $actual_value = 'Application grammar is ambiguous'; | |
my $actual_result = Marpa::R2::Internal::ASF::ambiguities_show( $asf, $ambiguities ); | |
say "# ambiguities via ASF's ambiguities_show()\n$actual_result"; | |
# report ambiguities via array_display() in ASF's synopsis | |
my $output_as_array = asf_to_basic_tree($asf); | |
my $actual_output = array_display($output_as_array); | |
say "# ambiguities via array_display() in ASF's synopsis:\n$actual_output" ; | |
} ## end if ( $recce->ambiguity_metric() > 1 ) | |
1; # In case used as "do" file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Good catch. There's a problem in the code you copied from my example in the test suite. Here's a patch: