Skip to content

Instantly share code, notes, and snippets.

@cognominal
Last active January 9, 2022 02:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cognominal/cdc05adc82ad98272ea7 to your computer and use it in GitHub Desktop.
Save cognominal/cdc05adc82ad98272ea7 to your computer and use it in GitHub Desktop.
An oddity that foils my attempts at syntactical hiligthing
use Perl6::Grammar;
use Perl6::Actions;
use Perl6::Compiler;
# test
# trying to revive this old gist
# it seems I want to acess a nqp match from raku
# This is an experiment in generating json suitable for a dynamic syntax
# higlighting background. The challenge is to get a compact representation which
# will be easily traversed to produce the annotated string.
# If we were in Perl 6, we would use C<Match.chunks>.
#
# now prints correctly in 3 formats (linear, pseudojson, html)
my &OOnlyHash := -> $m { $m.hash == 1 && nqp::ishash($m<O>) };
my &Leaf := -> $m { !$m.hash && !$m.list };
sub dump_match_rec($m, $indent) {
for sortMatch($m) {
my $sm := $_[1];
my $sm1 := subst($sm, /\n/, '\n', :global);
say($indent ~ $_[0] ~ ":'" ~ $sm1 ~ "' " ~ from($sm) ~ '..' ~ to($sm));
if !Leaf($sm) {
dump_match_rec($sm, $indent ~ " ");
}
}
}
sub from($e) {
my $from := $e.from;
for @($e) {
if $_.from < $from {
$from := $_.from;
}
}
$from
}
sub to($e) {
my $to := $e.to;
for @($e) {
if $_.to > $to {
$to := $_.to;
}
}
$to
}
sub chop($s) { $s := nqp::substr($s, 0, nqp::chars($s)-1) }
sub map(@a, &r) { my @res; @res.push: r($_) for @a; @res; }
sub indent($n) {
my $s := "\n";
$s := $s ~ ' ' while $n--;
$s;
}
sub hashsay($h) {
say($_ ~ ':' ~ $_.value) for $h;
}
  # =begin pod
# given a Match C<$m>, return an ordered array of
# its first level captured submatches.
# The element are of the form C<[ $name, $submatch ]> with C<$name> being
# either the submatch name or its indice as a string
# order. The ordered is given by the C<$submatch.from> order and when equal
# the reverse C<$submatch.to order. In case of equality we have a problem
# because we can reverse the embedding. We would need the parse tree of the
# regex to figure out.
# =end pod
sub sortMatch($m) {
my @a ;
my $i := 0;
sub smartPush($key, $val) {
$val := [ $val ] unless nqp::islist($val);
@a.push([$key, $_]) for $val;
}
if $m.list {
smartPush( ~$i++, $_) for $m.list ;
}
if $m.hash {
for $m.hash {
smartPush($_.key, $_.value) if !nqp::ishash($_.value);
}
}
sub cmp($a, $b) {
my $eq := from($a[1]) == from($b[1]);
my $sup := from($a[1]) > from($b[1]);
if $eq {
my $eq := to($a[1]) == to($b[1]);
my $sup := to($a[1]) < to($b[1]);
}
my $res := $eq ?? 0 !! $sup ?? 1 !! -1 ;
# say( "$res " ~ $a[0] ~ ":'" ~ $a[1] ~ "' " ~ $b[0] ~ ":'" ~ $b[1] ~ "'");
$res ;
}
my @sorted := @a.sort(&cmp);
my $from := 0;
my $to := 0;
my $last := Mu;
my @uniq;
my @nm;
# fold identical matches
# we currently assume matches with same .to and .from are identical.
# that may be wrong
my &pushFoldedMatches := -> $m { @uniq.push: [ nqp::join(',', @nm), $m[1] ]; }
for @sorted {
if $last && cmp($_, $last) {
pushFoldedMatches($last);
@nm := [$_[0]];
} else {
@nm.push: $_[0];
}
$last := $_;
}
pushFoldedMatches($last) if @nm;
@uniq;
}
class MatchShow {
has $!pos;
has $!m;
# =begin pod
#
# =end pod
method pushChunk(@a, $nm, $to) {
my $len := $to - $!pos;
if nqp::substr($nm, 0, 1) ne '~' || $len>0 {
my $substr := nqp::substr($!m.orig, $!pos, $len);
@a.push: [ $nm, self.textFormat($substr)];
$!pos := $!pos + $len ;
}
}
# These 2 method really are abstract
# method showStr($s) { }
# method format() { }
method display($nm, $m) {
$!pos := 0;
$!m := $m;
self.displayRecurse($m);
}
method displayRecurse($m) {
# say( $m ~ "'$m' " ~ +$m.list ~ +$m.hash);
my @a;
my $len;
for sortMatch($m) {
my $submatchNm := $_[0];
my $submatch := $_[1];
# non captured
self.pushChunk: @a, '~', from($submatch);
my &nonOHash := -> $m { $m.hash && !nqp::ishash($m<O>) };
my &nonLeaf := -> $m { nonOHash($m) || $m.list };
if nonLeaf($submatch) {
my $sm := $submatch;
my $prevsm := $sm;
my @nms := [ $submatchNm ];
while 1 {
last if !nqp::istype($sm, Match) || $sm.hash != 1 ;
$prevsm := $sm;
for $sm.hash {
@nms.push: $_.key;
$sm := $_.value;
}
}
my $joinedNm := nqp::join(' ', @nms);
@a.push: [$joinedNm, self.displayRecurse($prevsm) ];
} else {
self.pushChunk: @a, $submatchNm, to($submatch);
}
}
self.pushChunk: @a, '~', to($m);
my $res := self.format(@a);
$res;
}
my $*indent := 0;
method value($v, $k?) {}
method array($v) {}
method hash($v) {}
}
class MatchToJson is MatchShow {
method textFormat($s) {
$s := subst( $s, /\"/, '\\"', :global);
'"' ~ $s ~ '"';
}
method format(@a) {
'[' ~
nqp::join(',',
map(@a, -> $_ { self.textFormat($_[0]) ~ ':' ~ $_[1] } )
)
~ ']'
}
}
class MatchToHTML is MatchShow {
method format(@a) {
nqp::join(',', map(@a,
-> $_ { '<span class="' ~ $_[0] ~ '">' ~ $_[1] ~ "</span>" } ));
}
method textFormat($text) {
$text := subst( $text, /\&/, '&amp;', :global);
$text := subst( $text, /\</, '&lt', :global);
$text := subst( $text, /\>/, '&gt;', :global);
$text;
}
}
class Perl6ToJSON is Perl6::Compiler {
method json_dump($parse, *%adverbs) {
# dump_match($parse<statementlist><statement>[0]<EXPR>);
# dump_match($parse<statementlist><statement>[0]<EXPR>[0]<value><number><numish><integer>);
dump_match_rec($parse, '');
# nqp::exit(0);
# say(MatchToJson.new.value($parse, :topMost));
my $mtj := MatchToJson.new;
say($mtj.display('TOP', $parse));
$mtj := MatchToHTML.new;
say($mtj.display('TOP', $parse));
nqp::exit(0);
}
};
sub MAIN(@ARGS) {
# Create and configure compiler object.
my $comp := Perl6ToJSON.new();
$comp.language('perl6');
$comp.parsegrammar(Perl6::Grammar);
$comp.parseactions(Perl6::Actions);
$comp.addstage('json_dump', :before<past>);
hll-config($comp.config);
my $COMPILER_CONFIG := $comp.config;
# Add extra command line options.
my @clo := $comp.commandline_options();
@clo.push('parsetrace');
@clo.push('setting=s');
@clo.push('n');
@clo.push('p');
@clo.push('doc=s?');
@clo.push('optimize=s?');
@clo.push('c');
@clo.push('I=s');
@clo.push('M=s');
# Set up module loading trace
my @*MODULES := [];
# Set up END block list, which we'll run at exit.
my @*END_PHASERS := [];
# Enter the compiler.
$comp.command_line(@ARGS, :encoding('utf8'), :transcode('ascii iso-8859-1'));
# Run any END blocks before exiting.
for @*END_PHASERS { $_() }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment