-
-
Save cognominal/cdc05adc82ad98272ea7 to your computer and use it in GitHub Desktop.
An oddity that foils my attempts at syntactical hiligthing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use Perl6::Grammar; | |
use Perl6::Actions; | |
use Perl6::Compiler; | |
# test | |
# trying to revive this old gist | |
# it seems I want to acess a nqp match from raku | |
# This is an experiment in generating json suitable for a dynamic syntax | |
# higlighting background. The challenge is to get a compact representation which | |
# will be easily traversed to produce the annotated string. | |
# If we were in Perl 6, we would use C<Match.chunks>. | |
# | |
# now prints correctly in 3 formats (linear, pseudojson, html) | |
my &OOnlyHash := -> $m { $m.hash == 1 && nqp::ishash($m<O>) }; | |
my &Leaf := -> $m { !$m.hash && !$m.list }; | |
sub dump_match_rec($m, $indent) { | |
for sortMatch($m) { | |
my $sm := $_[1]; | |
my $sm1 := subst($sm, /\n/, '\n', :global); | |
say($indent ~ $_[0] ~ ":'" ~ $sm1 ~ "' " ~ from($sm) ~ '..' ~ to($sm)); | |
if !Leaf($sm) { | |
dump_match_rec($sm, $indent ~ " "); | |
} | |
} | |
} | |
sub from($e) { | |
my $from := $e.from; | |
for @($e) { | |
if $_.from < $from { | |
$from := $_.from; | |
} | |
} | |
$from | |
} | |
sub to($e) { | |
my $to := $e.to; | |
for @($e) { | |
if $_.to > $to { | |
$to := $_.to; | |
} | |
} | |
$to | |
} | |
sub chop($s) { $s := nqp::substr($s, 0, nqp::chars($s)-1) } | |
sub map(@a, &r) { my @res; @res.push: r($_) for @a; @res; } | |
sub indent($n) { | |
my $s := "\n"; | |
$s := $s ~ ' ' while $n--; | |
$s; | |
} | |
sub hashsay($h) { | |
say($_ ~ ':' ~ $_.value) for $h; | |
} | |
# =begin pod | |
# given a Match C<$m>, return an ordered array of | |
# its first level captured submatches. | |
# The element are of the form C<[ $name, $submatch ]> with C<$name> being | |
# either the submatch name or its indice as a string | |
# order. The ordered is given by the C<$submatch.from> order and when equal | |
# the reverse C<$submatch.to order. In case of equality we have a problem | |
# because we can reverse the embedding. We would need the parse tree of the | |
# regex to figure out. | |
# =end pod | |
sub sortMatch($m) { | |
my @a ; | |
my $i := 0; | |
sub smartPush($key, $val) { | |
$val := [ $val ] unless nqp::islist($val); | |
@a.push([$key, $_]) for $val; | |
} | |
if $m.list { | |
smartPush( ~$i++, $_) for $m.list ; | |
} | |
if $m.hash { | |
for $m.hash { | |
smartPush($_.key, $_.value) if !nqp::ishash($_.value); | |
} | |
} | |
sub cmp($a, $b) { | |
my $eq := from($a[1]) == from($b[1]); | |
my $sup := from($a[1]) > from($b[1]); | |
if $eq { | |
my $eq := to($a[1]) == to($b[1]); | |
my $sup := to($a[1]) < to($b[1]); | |
} | |
my $res := $eq ?? 0 !! $sup ?? 1 !! -1 ; | |
# say( "$res " ~ $a[0] ~ ":'" ~ $a[1] ~ "' " ~ $b[0] ~ ":'" ~ $b[1] ~ "'"); | |
$res ; | |
} | |
my @sorted := @a.sort(&cmp); | |
my $from := 0; | |
my $to := 0; | |
my $last := Mu; | |
my @uniq; | |
my @nm; | |
# fold identical matches | |
# we currently assume matches with same .to and .from are identical. | |
# that may be wrong | |
my &pushFoldedMatches := -> $m { @uniq.push: [ nqp::join(',', @nm), $m[1] ]; } | |
for @sorted { | |
if $last && cmp($_, $last) { | |
pushFoldedMatches($last); | |
@nm := [$_[0]]; | |
} else { | |
@nm.push: $_[0]; | |
} | |
$last := $_; | |
} | |
pushFoldedMatches($last) if @nm; | |
@uniq; | |
} | |
class MatchShow { | |
has $!pos; | |
has $!m; | |
# =begin pod | |
# | |
# =end pod | |
method pushChunk(@a, $nm, $to) { | |
my $len := $to - $!pos; | |
if nqp::substr($nm, 0, 1) ne '~' || $len>0 { | |
my $substr := nqp::substr($!m.orig, $!pos, $len); | |
@a.push: [ $nm, self.textFormat($substr)]; | |
$!pos := $!pos + $len ; | |
} | |
} | |
# These 2 method really are abstract | |
# method showStr($s) { } | |
# method format() { } | |
method display($nm, $m) { | |
$!pos := 0; | |
$!m := $m; | |
self.displayRecurse($m); | |
} | |
method displayRecurse($m) { | |
# say( $m ~ "'$m' " ~ +$m.list ~ +$m.hash); | |
my @a; | |
my $len; | |
for sortMatch($m) { | |
my $submatchNm := $_[0]; | |
my $submatch := $_[1]; | |
# non captured | |
self.pushChunk: @a, '~', from($submatch); | |
my &nonOHash := -> $m { $m.hash && !nqp::ishash($m<O>) }; | |
my &nonLeaf := -> $m { nonOHash($m) || $m.list }; | |
if nonLeaf($submatch) { | |
my $sm := $submatch; | |
my $prevsm := $sm; | |
my @nms := [ $submatchNm ]; | |
while 1 { | |
last if !nqp::istype($sm, Match) || $sm.hash != 1 ; | |
$prevsm := $sm; | |
for $sm.hash { | |
@nms.push: $_.key; | |
$sm := $_.value; | |
} | |
} | |
my $joinedNm := nqp::join(' ', @nms); | |
@a.push: [$joinedNm, self.displayRecurse($prevsm) ]; | |
} else { | |
self.pushChunk: @a, $submatchNm, to($submatch); | |
} | |
} | |
self.pushChunk: @a, '~', to($m); | |
my $res := self.format(@a); | |
$res; | |
} | |
my $*indent := 0; | |
method value($v, $k?) {} | |
method array($v) {} | |
method hash($v) {} | |
} | |
class MatchToJson is MatchShow { | |
method textFormat($s) { | |
$s := subst( $s, /\"/, '\\"', :global); | |
'"' ~ $s ~ '"'; | |
} | |
method format(@a) { | |
'[' ~ | |
nqp::join(',', | |
map(@a, -> $_ { self.textFormat($_[0]) ~ ':' ~ $_[1] } ) | |
) | |
~ ']' | |
} | |
} | |
class MatchToHTML is MatchShow { | |
method format(@a) { | |
nqp::join(',', map(@a, | |
-> $_ { '<span class="' ~ $_[0] ~ '">' ~ $_[1] ~ "</span>" } )); | |
} | |
method textFormat($text) { | |
$text := subst( $text, /\&/, '&', :global); | |
$text := subst( $text, /\</, '<', :global); | |
$text := subst( $text, /\>/, '>', :global); | |
$text; | |
} | |
} | |
class Perl6ToJSON is Perl6::Compiler { | |
method json_dump($parse, *%adverbs) { | |
# dump_match($parse<statementlist><statement>[0]<EXPR>); | |
# dump_match($parse<statementlist><statement>[0]<EXPR>[0]<value><number><numish><integer>); | |
dump_match_rec($parse, ''); | |
# nqp::exit(0); | |
# say(MatchToJson.new.value($parse, :topMost)); | |
my $mtj := MatchToJson.new; | |
say($mtj.display('TOP', $parse)); | |
$mtj := MatchToHTML.new; | |
say($mtj.display('TOP', $parse)); | |
nqp::exit(0); | |
} | |
}; | |
sub MAIN(@ARGS) { | |
# Create and configure compiler object. | |
my $comp := Perl6ToJSON.new(); | |
$comp.language('perl6'); | |
$comp.parsegrammar(Perl6::Grammar); | |
$comp.parseactions(Perl6::Actions); | |
$comp.addstage('json_dump', :before<past>); | |
hll-config($comp.config); | |
my $COMPILER_CONFIG := $comp.config; | |
# Add extra command line options. | |
my @clo := $comp.commandline_options(); | |
@clo.push('parsetrace'); | |
@clo.push('setting=s'); | |
@clo.push('n'); | |
@clo.push('p'); | |
@clo.push('doc=s?'); | |
@clo.push('optimize=s?'); | |
@clo.push('c'); | |
@clo.push('I=s'); | |
@clo.push('M=s'); | |
# Set up module loading trace | |
my @*MODULES := []; | |
# Set up END block list, which we'll run at exit. | |
my @*END_PHASERS := []; | |
# Enter the compiler. | |
$comp.command_line(@ARGS, :encoding('utf8'), :transcode('ascii iso-8859-1')); | |
# Run any END blocks before exiting. | |
for @*END_PHASERS { $_() } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment