anonymous / _desc.pl
Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

PPIx::Regexp::xplain because YAPE::Regex::Explain is dead; file ppixregexplain.pl needs _desc.pl

View _desc.pl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
our %desc = (
"PPIx::Regexp::Node" =>
[
"xRe::Node",
"a container",
#~ "PPIx::Regexp::Dumper->new( 'qr{(foo)}' )->print",
],
 
"PPIx::Regexp::Node::Range" =>
[
"xRe::Node::Range",
"a character range in a character class",
],
 
"PPIx::Regexp::Structure" =>
[
"xRe::Structure",
"a structure.",
],
 
"PPIx::Regexp::Structure::Assertion" =>
[
"xRe::Structure::Assertion",
"a parenthesized assertion ",
"(the grouptype below explains which one)", ##nah## TODO?!?! explain it here
],
 
"PPIx::Regexp::Structure::BranchReset" =>
[
"xRe::Structure::BranchReset",
"a branch reset group",
],
 
"PPIx::Regexp::Structure::Capture" =>
[
"xRe::Structure::Capture",
"Represent capture parentheses.",
],
 
"PPIx::Regexp::Structure::CharClass" =>
[
"xRe::Structure::CharClass",
"a character class",
],
 
"PPIx::Regexp::Structure::Code" =>
[
"xRe::Structure::Code",
"Represent one of the code structures.",
'WARNING: This extended regular expression feature is considered experimental, and may be changed without notice',
],
 
'(?p{ code })' => [
'',
#~ http://perl5.git.perl.org/perl.git/commit/14455d6cc193f1e4316f87b9dbe258db24ceb714
#~ /(?p{})/ changed to /(??{})/, per Larry's suggestion
#~ (?p{}) has been deprecated for a long time.
 
'warning (?p{}) has been removed Use (??{}) instead. L<http://search.cpan.org/dist/perl/pod/perl5100delta.pod#%28?p{}%29_has_been_removed>',
q{L<http://search.cpan.org/dist/perl/pod/perlre.pod#%28??{_code_}%29|perlre/(??{ code })>},
q{This is a "postponed" regular subexpression. This zero-width assertion executes any embedded Perl code. It always succeeds, and that its return value, rather than being assigned to $^R, is treated as a pattern, compiled if it's a string (or used as-is if its a qr// object), then matched as if it were inserted instead of this construct.},
],
'(??{ code })' => [
q{This is a "postponed" regular subexpression. This zero-width assertion executes any embedded Perl code. It always succeeds, and that its return value, rather than being assigned to $^R, is treated as a pattern, compiled if it's a string (or used as-is if its a qr// object), then matched as if it were inserted instead of this construct.},
q{L<http://search.cpan.org/dist/perl/pod/perlre.pod#%28??{_code_}%29>},
q{L<perlre/(??{ code })>},
],
'(?{ code })' => [
'This zero-width assertion executes any embedded Perl code. It always succeeds, and its return value is set as $^R',
'L<http://search.cpan.org/dist/perl/pod/perlre.pod#(?{_code_})>',
'L<perlre/(?{ code })>',
'L<perlvar/$^R>',
],
 
"PPIx::Regexp::Structure::Main" =>
[
"xRe::Structure::Main",
"a regular expression proper, or a substitution",
],
 
"PPIx::Regexp::Structure::Modifier" =>
[
"xRe::Structure::Modifier",
"Represent modifying parentheses",
"group but do not capture; Basic clustering",
],
 
"PPIx::Regexp::Structure::NamedCapture" =>
[
"xRe::Structure::NamedCapture",
"a named capture",
'L<<< perlre/(?<NAME>pattern) >>>',
'L<perlvar/%+>',
],
 
"PPIx::Regexp::Structure::Quantifier" =>
[
"xRe::Structure::Quantifier",
"Represent curly bracket quantifiers",
],
 
"PPIx::Regexp::Structure::RegexSet" =>
[
"xRe::Structure::RegexSet",
"a regexp character set",
'L<perlre/(?[ ])>',
'L<perlrecharclass/Extended Bracketed Character Classes>',
'L<http://search.cpan.org/dist/perl-5.18.0/pod/perlrecharclass.pod#Extended_Bracketed_Character_Classes>',
'no warnings "experimental::regex_sets";',
],
 
"PPIx::Regexp::Structure::Regexp" =>
[
"xRe::Structure::Regexp",
"Represent the top-level regular expression",
],
 
"PPIx::Regexp::Structure::Replacement" =>
[
"xRe::Structure::Replacement",
"Represent the replacement in s///",
],
 
"PPIx::Regexp::Structure::Subexpression" =>
[
"xRe::Structure::Subexpression",
"Represent an independent subexpression",
 
## redundant
'(?>pattern)',
'L<perlre/(?>pattern)>',
'It may also be useful in places where the "ratchet" or',
'"grab all you can, and do not give anything back" semantic is desirable.',
],
 
"PPIx::Regexp::Structure::Switch" =>
[
"xRe::Structure::Switch",
"a switch",
#~ 'L<perlre/(?(condition)yes-pattern|no-pattern)>',
#~ 'L<http://perldoc.perl.org/perlre.html#%28?%28condition%29yes-pattern%7Cno-pattern%29|perlre/(?(condition)yes-pattern|no-pattern)>',
'L<perlre/(?(condition)yes-pattern)>',
'L<http://p3rl.org/perlre#(?(condition)yes-pattern|no-pattern)>',
],
 
"PPIx::Regexp::Structure::Unknown" =>
[
"xRe::Structure::Unknown",
"Represent an unknown structure. (ERROR!TYPO!NONSENSE!)",
#~ "the following tokens aren't what you wanted",
 
## TODO be clever like perl?!? TODO ASK or TEST if I can assume this?
#~ $ perl -we " qr{(?(foo)bar|baz|burfle)}smx "
#~ Unknown switch condition (?(fo in regex; marked by <-- HERE in m/(?( <-- HERE foo)bar|baz|burfle)/ at -e line 1.
"ERROR die Unknown switch condition in regex;",
"for valid conditions see L<perlre/(?(condition)yes-pattern)>",
 
#~ THIS IS A LIE :) TODO REPORT BUG C<PPIx::Regexp::Structure::Unknown> has no descendants.
],
 
"PPIx::Regexp::Token::Assertion" =>
[
"xRe::Token::Assertion",
#~ "a simple assertion.",
#~ "a simple assertion (ex: \\A \\Z \\G ...).",
#~ "simple zero-width assertion (ex: \\A \\Z \\G ...).",
"simple zero-width assertion (zero-length, between pos()itions)",
],
 
"PPIx::Regexp::Token::Backreference" =>
[
"xRe::Token::Backreference",
"a back reference",
'L<perlglossary/backreference>',
#~ TODO REPORT BUG \g10 UNRECOGNIZED AS A REFERENCE misparsed as PPIx::Regexp::Token::Literal
#~ TODO BACKREFERENCE
#~ \k'NAME'
#~ /(.)(.)(.)(.)(.)(.)(.)(.)(.)\g10/ # \g10 is a backreference
#~ /(.)(.)(.)(.)(.)(.)(.)(.)(.)\10/ # \10 is octal
#~ /((.)(.)(.)(.)(.)(.)(.)(.)(.))\10/ # \10 is a backreference
#~ /((.)(.)(.)(.)(.)(.)(.)(.)(.))\010/ # \010 is octal
],
 
"PPIx::Regexp::Token::Backtrack" =>
[
"xRe::Token::Backtrack",
"Represent backtrack control.",
'L<perlre/Special Backtracking Control Verbs>',
'WARNING: These patterns are experimental and subject to change or removal in a future version of Perl.',
'Their usage in production code should be noted to avoid problems during upgrades.',
],
 
"PPIx::Regexp::Token::CharClass" =>
[
"xRe::Token::CharClass",
"a character class",
],
 
"PPIx::Regexp::Token::CharClass::POSIX" =>
[
"xRe::Token::CharClass::POSIX",
"a POSIX character class",
],
 
"PPIx::Regexp::Token::CharClass::POSIX::Unknown" =>
[
"xRe::Token::CharClass::POSIX::Unknown",
"Represent an unknown or unsupported POSIX character class",
],
 
"PPIx::Regexp::Token::CharClass::Simple" =>
[
"xRe::Token::CharClass::Simple",
"This class represents a simple character class", ## TODO IMPROVE? REMOVE?
],
 
"PPIx::Regexp::Token::Code" =>
[
"xRe::Token::Code",
"a chunk of Perl embedded in a regular expression.",
],
 
"PPIx::Regexp::Token::Comment" =>
[
"xRe::Token::Comment",
#~ "a comment.",
],
 
"PPIx::Regexp::Token::Condition" =>
[
"xRe::Token::Condition",
"Represent the condition of a switch",
'Checks if a specific capture group (or pattern) has matched something.',
],
 
"PPIx::Regexp::Token::Control" =>
[
"xRe::Token::Control",
"Case and quote control.",
'L<perlre/\F\l\u\L\U\Q\E>',,
 
#~ "PPIx::Regexp::Dumper->new( 'qr{\\Ufoo\\E}smx' )->print",
],
 
"PPIx::Regexp::Token::Delimiter" =>
[
"xRe::Token::Delimiter",
"Represent the delimiters of the regular expression",
],
 
"PPIx::Regexp::Token::Greediness" =>
[
"xRe::Token::Greediness",
"a greediness qualifier.",
#~ "PPIx::Regexp::Dumper->new( 'qr{foo*+}smx' )->print",
],
 
"PPIx::Regexp::Token::GroupType" =>
[
"xRe::Token::GroupType",
"a grouping parenthesis type.",
#~ "PPIx::Regexp::Dumper->new( 'qr{(?i:foo)}smx' )->print",
],
 
"PPIx::Regexp::Token::GroupType::Assertion" =>
[
"xRe::Token::GroupType::Assertion",
"a look ahead or look behind assertion",
#~ "PPIx::Regexp::Dumper->new( 'qr{foo(?=bar)}smx' )->print",
],
 
"PPIx::Regexp::Token::GroupType::BranchReset" =>
[
"xRe::Token::GroupType::BranchReset",
"a branch reset specifier",
"L<perlre/(?|pattern)>",
"L<perlre/(?E<verbar>pattern)>",
"capture groups are numbered from the same starting point in each alternation branch",
#~ "PPIx::Regexp::Dumper->new( 'qr{(?|(foo)|(bar))}smx' )->print",
],
 
"PPIx::Regexp::Token::GroupType::Code" =>
[
"xRe::Token::GroupType::Code",
"Represent one of the embedded code indicators",
],
 
"PPIx::Regexp::Token::GroupType::Modifier" =>
[
"xRe::Token::GroupType::Modifier",
"Represent the modifiers in a modifier group.",
#~ "PPIx::Regexp::Dumper->new( 'qr{(?i:foo)}smx' )->print",
],
 
"PPIx::Regexp::Token::GroupType::NamedCapture" =>
[
"xRe::Token::GroupType::NamedCapture",
#~ "a named capture",
#~ 'L<perlre/(?<NAME>pattern)>',
#~ 'L<<< perlre/(?<NAME>pattern) >>>',
##'L<perlre/(?&NAME)>', ## not this
#~ 'L<perlvar/%+>', ## redundant
#~ "PPIx::Regexp::Dumper->new( 'qr{(?<baz>foo)}smx' )->print",
"",
],
 
"PPIx::Regexp::Token::GroupType::Subexpression" =>
[
"xRe::Token::GroupType::Subexpression",
"Represent an independent subexpression marker",
## redundant
#~ '(?>pattern)',
#~ 'It may also be useful in places where the "ratchet" or',
#~ '"grab all you can, and do not give anything back" semantic is desirable.',
 
#~ "PPIx::Regexp::Dumper->new( 'qr{foo(?>bar)}smx' )->print",
],
 
"PPIx::Regexp::Token::GroupType::Switch" =>
[
"xRe::Token::GroupType::Switch",
"Represent the introducing characters for a switch",
#~ "PPIx::Regexp::Dumper->new( 'qr{(?(1)foo|bar)}smx' )->print",
 
 
## TODO be clever like perl?!? TODO ASK or TEST if I can assume this?
#~ $ perl -we " qr{(?(foo)bar|baz|burfle)}smx "
#~ Unknown switch condition (?(fo in regex; marked by <-- HERE in m/(?( <-- HERE foo)bar|baz|burfle)/ at -e line 1.
#~ "die Unknown switch condition in regex;",
"for valid conditions see L<perlre/(?(condition)yes-pattern)>",
],
 
"PPIx::Regexp::Token::Interpolation" =>
[
"xRe::Token::Interpolation",
"Represent an interpolation in the PPIx::Regexp package.",
#~ 'It is a variable! whose contents are used as a pattern, subject to \F\l\u\L\U\Q\E',
'It is a variable! subject to \F\l\u\L\U\Q\E',
#~ and L<perlre/Modifiers>',
'L<perlre/\F\l\u\L\U\Q\E>',
#~ 'L<perlre/\Q\U\E\F\u\l\L>',
#~ 'L<perlre/\Q\U\E\L\F\u\l>', #<<<
#~ 'subject to L<perlre/Modifiers>',
 
#~ "PPIx::Regexp::Dumper->new('qr{\$foo}smx')->print",
],
 
#~ 2013-08-05-04:22:50
"PPIx::Regexp::Token::Interpolation-Regexp" => ['It is a variable! whose contents are used as a pattern ','subject to L<perlre/Modifiers>'],
"PPIx::Regexp::Token::Interpolation-Substitution" => ['It is a variable! whose contents are used as a REPLACEMENT string'],
 
 
"PPIx::Regexp::Token::Literal" =>
[
"xRe::Token::Literal",
"a literal character",
#~ "PPIx::Regexp::Dumper->new( 'qr{foo}smx' )->print",
],
 
#~ $ perl -Mre=debug -wle " $_ = q{aBaBaBaB}; m{((?i)ab)ab}"
"token_modifier_propagates_right" => "These modifiers PROPAGATE to the right for the remainder of the pattern, or the remainder of the enclosing pattern group (if any). Ex: /((?i)CASeINSeNSITIVeHeRe)CASESENSITIVE/",
"PPIx::Regexp::Token::Modifier" =>
[
"xRe::Token::Modifier",
#~ "Represent (trailing) modifiers.",
#~ "Modifier for one of these operators: match(m//) or substitution (s///) or regexp-constructor (qr//)",
#~ "Represent (trailing) modifiers for match, substitution, regexp constructor",
"Represent 1)embedded pattern-match modifiers or 2)(trailing) modifiers for operators match, substitution, regexp constructor ",
#~ "Represent (trailing) modifiers for m// s/// qr///", ## 2013-06-20-08:02:53 dontwanna
### TODO qr// needs better name than regex compile
#~ 2013-06-12-03:55:26 Regexp constructor sounds good
#~ "PPIx::Regexp::Dumper->new( 'qr{foo}smx' )->print",
],
 
"PPIx::Regexp::Token::Operator" =>
[
"xRe::Token::Operator",
"Represent an operator.",
#~ "PPIx::Regexp::Dumper->new( 'qr{foo|bar}smx' )->print",
],
 
"PPIx::Regexp::Token::Quantifier" =>
[
"xRe::Token::Quantifier",
"Represent an atomic quantifier.",
],
 
"PPIx::Regexp::Token::Recursion" =>
[
"xRe::Token::Recursion",
"a recursion",
#~ "(?PARNO)",
#~ "L<perlre/(?PARNO)>", ## perlre problematic
"L<perlre/(?PARNO) (?-PARNO) (?+PARNO) (?R) (?0)>", ## perlre problematic
#~ "PPIx::Regexp::Dumper->new( 'qr{(foo(?1)?)}smx' )->print",
],
 
"PPIx::Regexp::Token::Reference" =>
[
"xRe::Token::Reference",
"a reference to a capture",
#~ "PPIx::Regexp::Dumper->new( 'qr{\\1}smx' )->print",
],
 
"PPIx::Regexp::Token::Structure" =>
[
"xRe::Token::Structure",
"Represent structural elements.",
#~ 'Represent structural elements. (like "[","]", "{","}" "(",")" delimiters)', ## m too
#~ ' ',
],
 
"PPIx::Regexp::Token::Unknown" =>
[
"xRe::Token::Unknown",
"Represent an unknown token (A FAILURE; AN ERROR)",
],
 
"PPIx::Regexp::Token::Unmatched" =>
[
"xRe::Token::Unmatched",
"Represent an unmatched right bracket (a TYPO!)",
],
 
"PPIx::Regexp::Token::Whitespace" =>
[
"xRe::Token::Whitespace",
"Represent whitespace",
],
 
 
'?u' => [
"according to unicode semantics",
],
 
'?d' => [
'according to "Depends" or "Dodgy" or "Default" semantics',
],
 
'?a' => [
'according to ASCII-restrict (or ASCII-safe) semantics',
],
 
'?aa' => [
'according to stricter-ASCII-restrict (or stricter-ASCII-safe) semantics',
],
 
 
 
'*' => 'match preceding pattern 0 or more times',
'+' => 'match preceding pattern 1 or more times',
#~ "PPIx::Regexp::Token::Quantifier".'+' => 'match preceding pattern 1 or more times', ## didn't work, has to do
'?' => 'match preceding pattern 0 or 1 times; is optional',
#~ 2013-06-20-02:39:00
most_possible => '(matching the most amount possible)',
least_possible => '(matching the least amount possible)',
only_last_n => 'WARNING only the LAST repetition of the captured pattern will be stored in %%%%',
## greediness
#~ '+?' => ["and matching (preceding pattern) the least amount possible",'Match shortest string first'],
#~ '++' => ["and give nothing back (ratchet);","modifies preceding quantifier so preceding pattern doesn't backtrack", 'Match longest string and give nothing back'],
"PPIx::Regexp::Token::Greediness".'?' => ["and matching (preceding pattern) the least amount possible",'Match shortest string first'],
"PPIx::Regexp::Token::Greediness".'+' => ["and give nothing back (ratchet);","modifies preceding quantifier so preceding pattern doesn't backtrack", 'Match longest string and give nothing back'],
 
 
#~ 2013-06-13-02:59:39
"\\d" => [
"\\d Match a decimal digit character.",
"[0-9]",
"L<perldebguts/DIGIT>",
],
"\\D" => [
"\\D Match a non-decimal-digit character.",
"L<perldebguts/NDIGIT>",
'Match not "[0-9]" meaning match "[^0-9]";',
],
"\\Da" => [
"\\D Match a non-decimal-digit character (/a ASCII-restrict semantics).",
"L<perldebguts/NDIGITA>",
'Match not "[0-9]" meaning match "[^0-9]";',
],
"\\da" => [
#~ "\\d Match a decimal digit character.",
"\\d Match a decimal digit character (/a ASCII-restrict semantics).",
"Match exactly [0-9]",
"L<perldebguts/DIGITA>",
],
"\\Dl" => [
"\\D Match a non-decimal-digit character.",
"L<perldebguts/NDIGITL>",
],
"\\dl" => [
"\\d Match a decimal digit character.",
"L<perldebguts/DIGITL>",
],
"\\du" => [
"Match a decimal digit character.",
"matches exactly what \\p{Digit} matches.",
],
#~ "\\H" => "\\H Match a character that isn't horizontal whitespace.",
#~ "\\H" => "\\H Match a character that is NOT horizontal whitespace.",
#~ "\\h" => "\\h Match a horizontal whitespace character.",
"\\H" => "\\H Match a character that is NOT horizontal whitespace. \\P{HorizSpace} or [^\\N{U+0009}\\N{U+0020}\\N{U+00A0}\\N{U+1680}\\N{U+180E}\\N{U+2000}-\\N{U+200A}\\N{U+202F}\\N{U+205F}\\N{U+3000}]",
"\\h" => "\\h Match a horizontal whitespace character. \\p{HorizSpace} or [\\N{U+0009}\\N{U+0020}\\N{U+00A0}\\N{U+1680}\\N{U+180E}\\N{U+2000}-\\N{U+200A}\\N{U+202F}\\N{U+205F}\\N{U+3000}]",
#~ 2013-06-14-18:16:35
#~ http://perl5.git.perl.org/perl.git/blob?f=regcomp.c#l9910
"PPIx::Regexp::Token::Unknown"."\\N" => 'ERROR die \N in a character class must be a named character: \N{...} in regex;',
#~ http://perl5.git.perl.org/perl.git/blob?f=regexec.c#l3673
#~ http://perl5.git.perl.org/perl.git/blob?f=regexec.c#l6688
"\\N" => [
#~ "\\N Match a character that isn't a newline (\\n). Experimental.",
"\\N Match a character that is NOT a newline (\\n). Experimental.",
'L<perldebguts/REG_ANY>',
],
"\\R" => [
"generic newline;",
"anything considered a linebreak sequence by Unicode;",
"L<perlrecharclass/Backslash Sequences>",
"anything that can be considered a newline under Unicode",
"[\\x{000A}\\x{000C}\\x{000D}\\x{0085}\\x{2028}\\x{2029}]",
#~ http://perl5.git.perl.org/perl.git/blob?f=regcharclass.h
'LNBREAK: Line Break: \R',
'\p{VertSpace} and "\x0D\x0A" # CRLF - Network (Windows) line ending',
 
],
"\\sa" => "\\s Match a whitespace character. ASCII-restrict; Match exactly [ \\f\\n\\r\\t] (and in perl5.18 vertical tab chr(11))",
"\\s" => "\\s Match a whitespace character.",
"\\S" => "\\S Match a non-whitespace character.",
"\\Sa" => "\\S Match a non-whitespace character. ASCII-restrict; Match exactly [^ \\f\\n\\r\\t] (and in perl5.18 NOT vertical tab chr(11))",
#~ "\\v" => "\\v Match a vertical whitespace character.",
"\\v" => "\\v Match a vertical whitespace character. \\p{VertSpace} or [\\N{U+000A}-\\N{U+000D}\\N{U+0085}\\N{U+2028}-\\N{U+2029}]",
#~ "\\V" => "\\V Match a character that isn't vertical whitespace.",
#~ "\\V" => "\\V Match a character that is NOT vertical whitespace.",
"\\V" => "\\V Match a character that is NOT vertical whitespace. \\P{VertSpace} or [^\\N{U+000A}-\\N{U+000D}\\N{U+0085}\\N{U+2028}-\\N{U+2029}]",
"\\W" => [
"\\W Match a non-\"word\" character.",
"L<perldebguts/NALNUM>",
"L<perlrecharclass/\\W>",
"\\W matches not [a-zA-Z0-9_] meaning [^a-zA-Z0-9_].",
],
"\\Wa" => [
"\\W Match a non-\"word\" character.",
"L<perldebguts/NALNUMA>",
"L<perlrecharclass/\\W>",
"\\W matches not [a-zA-Z0-9_] meaning [^a-zA-Z0-9_].",
],
"\\Wl" => [
"\\W Match a non-\"word\" character; according to use locale;",
"L<perlrecharclass/\\W>",
"L<perldebguts/NALNUML>",
],
"\\Wu" => [
"\\W Match a non-\"word\" character; according to unicode semantics",
"L<perlrecharclass/\\W>",
"\\W matches exactly what \\P{Word} matches (not \\p{Word}).",
"L<perldebguts/NALNUMU>",
],
"\\w" => [
"\\w Match a \"word\" character.",
"L<perlrecharclass/\\w>",
"\\w matches [a-zA-Z0-9_].",
"L<perldebguts/ALNUM>",
],
"\\wa" => [
"\\w Match a \"word\" character. (?a:\w)",
"L<perlrecharclass/\\w>",
"\\w matches [a-zA-Z0-9_].",
"L<perldebguts/ALNUMA>",
],
"\\wl" => [
"\\w Match a \"word\" character; according to use locale; (?l:\w)",
"L<perlrecharclass/\\w>",
"L<perldebguts/ALNUML>",
],
"\\wu" => [
"\\w Match a \"word\" character; according to unicode semantics; (?u:\w)",
"L<perlrecharclass/\\w>",
"\\w matches exactly what \\p{Word} matches.",
"L<perldebguts/ALNUMU>",
],
 
'.' => 'any character except \n',
#~ '.s' => 'any character (including \n)' . join( ' aka ', '', '[\w\W]', '[\s\S]' , '[\d\D]', '\p{All}' ),
'.s' => 'any character (including \n)' . join( ' alias ', '', '[\w\W]', '[\s\S]' , '[\d\D]', '\p{All}' ),
 
#~ 2013-06-13-03:39:54
'[:alpha:]' => [ 'letters', '\p{PosixAlpha}',],
'[:alnum:]' => ['letters and digits','\p{PosixAlpha}'],
'[:ascii:]' => ['all ASCII characters (\000 - \177)', ],
'[:cntrl:]' => ['control characters (those with ASCII values less than 32)',, '\p{PosixCntrl}', ],
'[:digit:]' => ['digits (like \d)',, '\p{PosixDigit}', ],
'[:graph:]' => ['alphanumeric and punctuation characters',, '\p{PosixGraph}', ],
'[:lower:]' => ['lowercase letters',, '\p{PosixLower}', ],
'[:print:]' => ['alphanumeric, punctuation, and whitespace characters',, '\p{PosixPrint}', ],
'[:punct:]' => ['punctuation characters',, '\p{PosixPunct}', ],
'[:space:]' => ['whitespace characters (like \s)',, '\p{PosixSpace}', ],
'[:upper:]' => ['uppercase letters',, '\p{PosixUpper}', ],
'[:word:]' => ['alphanumeric and underscore characters (like \w)',, '\p{PosixWord}', ],
'[:xdigit:]' => ['hexadecimal digits (a-f, A-F, 0-9)',, '\p{Posix}', ],
 
"(*ACCEPT)" => [
'WARNING: This feature is highly experimental. It is not recommended for production code.',
"(*ACCEPT) Causes match to succeed at the point of the (*ACCEPT)",
'L<perlre/(*ACCEPT)>',
],
"(*COMMIT)" => ["(*COMMIT) Causes match failure when backtracked into on failure",'L<perlre/(*COMMIT)>',],
"(*ACCEPT:NAME)" => ["ERROR die Verb pattern 'ACCEPT' may not have an argument in regex;",],
"(*COMMIT:NAME)" => ["ERROR die Verb pattern 'COMMIT' may not have an argument in regex;",],
"(*F:NAME)" => ["ERROR die Verb pattern 'F' may not have an argument in regex;",],
"(*FAIL:NAME)" => ["ERROR die Verb pattern 'FAIL' may not have an argument in regex;",],
"(*F)" => ["(*FAIL) Always fails, forcing backtrack", 'L<perlre/(*FAIL) (*F)>', ],
"(*FAIL)" => ["(*FAIL) Always fails, forcing backtrack", 'L<perlre/(*FAIL) (*F)>', ],
#~ "(*MARK)" => ["(*MARK) Name branches of alternation, target for (*SKIP)",'L<perlre/(*MARK) (*MARK:NAME)>',],
"(*MARK)" => ["(*MARK) Name branches of alternation, target for (*SKIP)",'L<perlre/(*MARK:NAME) (*:NAME)>',],
"(*MARK:NAME)" => ["(*MARK:NAME) Name branches of alternation, target for (*SKIP)",'L<perlre/(*MARK:NAME) (*:NAME)>',],
"(*PRUNE)" => ["(*PRUNE) Prevent backtracking past here on failure",'L<perlre/(*PRUNE) (*PRUNE:NAME)>', ],
"(*PRUNE:NAME)" => ["(*PRUNE:NAME) Prevent backtracking past here on failure",'L<perlre/(*PRUNE) (*PRUNE:NAME)>', ],
"(*SKIP)" => ["(*SKIP) Like (*PRUNE) but also discards match to this point", 'L<perlre/(*SKIP) (*SKIP:NAME)>',],
"(*SKIP:NAME)" => ["(*SKIP:NAME) Like (*PRUNE) but also discards match to this point", 'L<perlre/(*SKIP) (*SKIP:NAME)>',],
"(*THEN)" => ["(*THEN) Forces next alternation on failure", 'L<perlre/(*THEN) (*THEN:NAME)>',],
"(*THEN:NAME)" => ["(*THEN:NAME) Forces next alternation on failure", 'L<perlre/(*THEN) (*THEN:NAME)>',],
#~ "(*UNKNOWN)" => "ERROR warn UNRECOGNIZED VERB (%%%%)",
#~ "(*UNKNOWN:NAME)" => "ERROR warn UNRECOGNIZED VERB:NAME (%%%%:%%%%)",
#~ 2013-08-06-01:47:38
"(*UNKNOWN)" => "ERROR die Unknown verb pattern '%%%%' in regex;",
"(*UNKNOWN:NAME)" => "ERROR die Unknown verb pattern '%%%%:%%%%' in regex;",
 
'$' => 'match before an optional \n, and the end of the string', ## todo ANCHOR ANCHOR DESC
'$m' => 'match before an optional \n, and the end of a "line"',
'\A' => 'match the beginning of the string',
'^' => 'match the beginning of the string',
'^m' => 'match the beginning of a "line"',
'\z' => 'match the end of the string',
'\Z' => 'match before an optional \n, and the end of the string',
#~ '\G' => 'match where the last m//g left off',
'\G' => 'match where the last m//g left off; \G Match only at pos() (e.g. at the end-of-match position of prior m//g)',
'\b' => 'match the boundary between a word char (\w) and something that is not a word char (\W); OUTSIDE a "word"',
'\bl' => 'match the boundary between a word char (\w) and something that is not a word char (\W); according to use locale;; OUTSIDE a "word"',
'\bu' => 'match the boundary between a word char (\p{Word}) and something that is not a word char (\P{Word}); according to unicode semantics; OUTSIDE a "word"',
'\ba' => 'match the boundary between a word char (\w) and something that is not a word char (\W); according to ASCII-restrict; OUTSIDE a "word"',
#~ 2013-08-11-19:59:39 #~ TODO #~ touching OUTSIDE a "word"
#~ perlre
#~ \b Match a word boundary
#~ \B Match except at a word boundary
#~ perlrebackslash
#~ \b Word/non-word boundary. (Backspace in []).
#~ \B Not a word/non-word boundary. Not in [].
#~ perlrequick and perlretut
#~ The word anchor \b matches a boundary between a word character and a non-word character \w\W or \W\w :
#~ perlretut
#~ Similarly, the word boundary anchor \b matches wherever a character matching \w is next to a character that doesn't, but it doesn't eat up any characters itself.
#~ \b looks both ahead and behind, to see if the characters on either side differ in their "word-ness".
## perldebguts
#~ /\B/u \Bu NBOUNDU Match "" at any word non-boundary using Unicode semantics
 
#~
#~ '\B' => 'match the boundary between two word chars (\w) or two non-word chars (\W)',
#~ '\Bl' => 'match the boundary between two word chars (\w) or two non-word chars (\W); according to use locale;',
'\B' => 'match the boundary between two word chars (\w); INSIDE a "word"',
'\Bl' => 'match the boundary between two word chars (\w); according to use locale;; INSIDE a "word"',
'\Bu' => 'match the boundary between two word chars (\\p{Word}); according to unicode semantics; INSIDE a "word"',
'\Ba' => 'match the boundary between two word chars (\\p{Word}); according to ASCII-restrict; INSIDE a "word"',
#~ 2013-06-14-17:20:08 doesn't affect capture groups
#~ $ perl -Mre=debug -le " $_=q{12345}; m{(.{4}\K)\K(.)}; print $1,$2 "
"\\K" => [
'A zero-width positive look-behind assertion.',
'"(?<=pattern)" "\\K"',
'L<perlre/Look-Around Assertions>',
'"keep" everything matched prior to the \K and do not include it in $& ',
'match left of \K and discard (not-Keep) from $& ',
],
 
'?=' => [ '(?=pattern)', 'L<perlre/(?=pattern)>', 'A zero-width positive look-ahead assertion.', 'For example, C</\w+(?=\t)/> matches a word followed by a tab, without including the tab in C<$&>.' ],
'?<=' => [
'(?<=pattern)',
'L<perlre/(?<=pattern)>',
'C<(?<=pattern)> C<\K>',
'A zero-width positive look-behind assertion.',
'For example, C</(?<=\t)\w+/> matches a word that follows a tab, without including the tab in C<$&>.',
'Works only for fixed-width look-behind.',
],
'?!' => [ '(?!pattern)', 'L<perlre/(?!pattern)>',
'A zero-width negative look-ahead assertion.',
#~ q{For example C</foo(?!bar)/> matches any occurrence of "foo" that isn't followed by "bar". },
q{For example C</foo(?!bar)/> matches any occurrence of "foo" that is NOT followed by "bar". },
'Note however that look-ahead and look-behind are NOT the same thing. ',
'You cannot use this for look-behind.',
],
'?<!' => [ '(?<!pattern)', 'L<perlre/(?<!pattern)>',
 
'A zero-width negative look-behind assertion.',
'For example C</(?<!bar)foo/> matches any occurrence of "foo" that does not follow "bar".',
'Works only for fixed-width look-behind.',
 
],
#~ 'errn?<!' => 'ERROR die Variable length lookbehind not implemented in regex (** maybe false positive, detection not bulletproof, not hanlde (?i:a) )',
#~ 'errn?<=' => 'ERROR die Variable length lookbehind not implemented in regex (** maybe false positive, detection not bulletproof, not hanlde (?i:a) )',
'errn?<!' => 'ERROR die Variable length lookbehind not implemented in regex; you used variable length alterations like "a|aa" or you used variable-length quantifiers like "*", "+" or "?"',
'errn?<=' => 'ERROR die Variable length lookbehind not implemented in regex; you used variable length alterations like "a|aa" or you used variable-length quantifiers like "*", "+" or "?"',
 
#~ 2013-06-13-04:15:45
# flags
 
#~ 2013-07-26-16:38:29
#~ 'mods.i' => '/i case-insensitive',
#~ 'mods.-i' => '?-i: case-sensitive',
#~ 'mods.m' => '/m with ^ and $ matching start and end of line',
#~ 'mods.-m' => '?-m: with ^ and $ matching normally (start and end of string)',
#~ 'mods.s' => '/s with . matching \n',
#~ 'mods.-s' => '?-s: with . not matching \n',
#~ 'mods.x' => '/x disregarding whitespace and comments',
#~ 'mods.-x' => '?-x: matching whitespace and # normally',
#~ 'mods.u' => '/u sets the character set to Unicode.',
 
'mods.i' => '(?i) case-insensitive',
'mods.-i' => '(?-i) case-sensitive',
'mods.m' => '(?m) with ^ and $ matching start and end of line',
'mods.-m' => '(?-m) with ^ and $ matching normally (start and end of string)',
'mods.s' => '(?s) with . matching \n',
'mods.-s' => '(?-s) with . not matching \n',
'mods.x' => '(?x) disregarding whitespace and comments',
'mods.-x' => '(?-x) matching whitespace and # normally',
'mods/x' => '/x disregarding whitespace and comments',
'match_semantics.u' => '(?u) sets the character set to Unicode.',
#~ http://search.cpan.org/~rjbs/perl-5.18.0/pod/perlre.pod#/a_%28and_/aa%29
'match_semantics.a' => [ ## match_semantics
#~ '/a is ASCII-restrict (or ASCII-safe); https://metacpan.org/module/perlre#a-and-aa',
#~ '/a is ASCII-restrict (or ASCII-safe); http://search.cpan.org/dist/perl/pod/perlre.pod#/a_%28and_/aa%29',
'/a is ASCII-restrict (or ASCII-safe); L<<<http://search.cpan.org/dist/perl/pod/perlre.pod#/a_%28and_/aa%29|/a (and /aa)>>>',
'/a it causes the sequences \d, \s, \w, and the Posix character classes to match only in the ASCII range.',
'/a also sets the character set to Unicode, BUT adds several restrictions for ASCII-safe matching.',
],
#~ 'match_semantics.aa' => '/aa forbids the intermixing of ASCII and non-ASCII; ASCII-restrict-strict ; ASCII-safe-strict;',
#~ 'match_semantics.aa' => '/aa forbids the intermixing of ASCII and non-ASCII; ASCII-restrict-insensitive; Prevents this match "k" =~ /N{KELVIN SIGN}/aia',
#~ 'match_semantics.aa' => '/aa ASCII-restrict-case-insensitive; forbids the intermixing of ASCII and non-ASCII; Prevents this match "kk" =~ /\N{KELVIN SIGN}\N{U+212A}/i',
#~ _aa.pl
#~ 'match_semantics.aa' => '/aa ASCII-restrict-case-insensitive; Prevents ASCII-range from matching non-ASCII-range case-insensitively. Prevents this match "kk" =~ /\N{KELVIN SIGN}\N{U+212A}/i; https://metacpan.org/module/perlre#a-and-aa',
#~ 'match_semantics.aa' => '/aa ASCII-restrict-case-insensitive; Prevents ASCII-range from matching non-ASCII-range case-insensitively. Prevents this match "kk" =~ /\N{KELVIN SIGN}\N{U+212A}/i; http://search.cpan.org/dist/perl/pod/perlre.pod#/a_%28and_/aa%29',
'match_semantics.aa' => '/aa ASCII-restrict-case-insensitive; Prevents ASCII-range from matching non-ASCII-range case-insensitively. Prevents this match "kk" =~ /\N{KELVIN SIGN}\N{U+212A}/i; L<<<http://search.cpan.org/dist/perl/pod/perlre.pod#/a_%28and_/aa%29|/a (and /aa)>>>',
#~ because CORE::fc("\N{KELVIN SIGN}\N{U+212A}") eq "\F\N{KELVIN SIGN}\N{U+212A}\Q" eq "kk"
#~ http://search.cpan.org/~rjbs/perl-5.18.0/pod/perlre.pod#/d
'match_semantics.^' => '(?^) is (?d) is "Depends" or "Dodgy" or "Default"; L<perlunicode/The "Unicode Bug">;',
'match_semantics.d' => [
'(?d) is "Depends" or "Dodgy" or "Default"; L<perlunicode/The "Unicode Bug">;',
'(?d) is the old, problematic, pre-5.14 Default character set behavior. Its only use is to force that old behavior.',
],
'match_semantics.l' => "/l sets the character set to current locale. See L<perllocale>",
'match_semantics.u' => "/u sets the character set to Unicode.",
'mods/l' => [ "/l WARNING NOT RECOMMENDED instead use locale; ", ],
'mods/u' => [ "/u WARNING NOT RECOMMENDED instead use feature 'unicode_strings'", ],
'mods.o' => [ "ERROR warn Useless (?o) - use /o modifier in regex; /o Compile pattern only once.", ],
'mods/o' => [ "/o Compile pattern only once.", ],
'mods.p' => [ '(?p) Preserve the string matched such that ${^PREMATCH}, ${^MATCH}, and ${^POSTMATCH} are available for use after matching. GLOBAL!TRICKY!(RT#117135)',],
'mods/p' => [ '/p Preserve the string matched such that ${^PREMATCH}, ${^MATCH}, and ${^POSTMATCH} are available for use after matching. GLOBAL!TRICKY!(RT#117135)',],
#~ Note also that the p modifier is special in that its presence anywhere in a pattern has a global effect.
'mods.-a' => [ 'ERROR die Regexp modifier "a" may not appear after the "-" in regex;', ],
 
#~ 2013-08-12-01:50:18
#~ untrippable
#~ TODO REPORT BUG? SHOULD BE RECOGNIZED AS Token::Modifier/GroupType::Modifier
#~ 'qr{(?-^)(?-^:u}', ## #~ xRe::Structure::Capture #~ /1/C1/C58/C0 ; xRe::Token::Unknown
#~ 'mods.-^' => [ 'ERROR die Regexp modifier "^" aka "d" may not appear after the "-" in regex;', ],
'mods.-d' => [ 'ERROR die Regexp modifier "d" may not appear after the "-" in regex;', ],
'mods.-l' => [ 'ERROR die Regexp modifier "l" may not appear after the "-" in regex;', ],
'mods.-u' => [ 'ERROR die Regexp modifier "u" may not appear after the "-" in regex;', ],
'mods.-p' => [ 'THINKO warn Useless use of (?-p) in regex;', ],
'mods.-p' => [ 'THINKO warn Useless use of (?-p) in regex; You cant turn (?p) off; (?p) is global', ],
 
#~ evaln
'mods/s/e' => '(/e) Evaluate the right side as an expression.',
'mods/s/ee' => '(/ee) Evaluate the right side as a string then eval the result (maybe %%%% times). WARNING DANGEROUS L<perlfunc/eval>',
'mods/s/r' => '(/r) Return substitution and leave the original string untouched; not modify $foo in $foo =~ s///r',
 
#~ 2013-07-28-20:13:01
'mods/i' => '(/i) case-insensitive',
#~ untrippable #~ 'mods/-i' => '(/-i) case-sensitive',
'mods/m' => '(/m) with ^ and $ matching start and end of line',
#~ untrippable #~ 'mods/-m' => '(/-m) with ^ and $ matching normally (start and end of string)',
'mods/s' => '(/s) with . matching \n',
## 2013-07-26-02:44:01
#~ 'mods.twice' => 'ERROR die Regexp modifier "%%%%" may not appear twice in regex;',
'mods.nottwice' => 'ERROR die Regexp modifier "%%%%" may not appear twice in regex;',
'mods.twicemax' => 'ERROR die Regexp modifier "%%%%" may appear a maximum of twice in regex;',
'mods.exclusive' => 'ERROR die Regexp modifiers "%%%%" and "%%%%" are mutually exclusive',
#~ 'mods.unknown' => 'ERROR UNKNOWN MODIFIER "%%%%" (?%%%%)',
'mods.unknown' => 'ERROR UNKNOWN MODIFIER "%%%%" die Sequence (?%%%%...) not recognized in regex;',
'mods/unknown' => 'ERROR UNKNOWN MODIFIER "%%%%" die Having no space between pattern and following word is deprecated',
#~ 2013-07-26-03:31:02
#~ 'mods/g' => [ "/g Match globally, i.e., find all occurrences.", ],
'mods/g' => [ "/g Match globally, i.e., find all occurrences.", 'in list context (@matches=m//g) return all matches; in scalar context($count=m//g) return number of matches' ],
'mods/c' => [ "/c Do not reset search position on a failed match when /g is in effect.", ],
#~ '|' => ['OR ; Alternation (outside character class)'],
#~ '|' => ['OR ; Alternation (outside character class); match_left OR match_right'],
'|' => ['OR ; Alternation ; match_left OR match_right'],
 
#~ 2013-06-14-00:39:34
'parsing_failures' => [
"# ERROR WARNING PARSING FAILURE, EXPLANATIONS UNREAL ie IMAGINED ie WRONG ",
"# WARNING PARSING FAILURE, EXPLANATIONS UNREAL ",
"# WARNING PARSING FAILURE, EXPLANATIONS IMAGINED ",
"# WARNING PARSING FAILURE, EXPLANATIONS UNRELIABLE ",
"# WARNING PARSING FAILURE, EXPLANATIONS WRONG ",
"# ",
],
'matches_as_follows' => 'matches as follows:',
'the_regex' => 'The regular expression ',
'm_pat_at_add' => 'match the preceding pattern at address=',
#~ $ perl -Mre=debug -we " qr{*}" ## never used because its Token::Unknown
'quant_f_not' => 'IMPOSSIBLE ERROR die Quantifier follows nothing in regex;',
'm_recur_ata' => 'MATCH RECURSION at address=',
'f_e_r_n_exit' => "FATAL ERROR die Reference to nonexistent group in regex;",
'n_exist_group' => "FATAL ERROR die Reference to nonexistent group in regex;",
 
#~ 2013-06-14-04:23:22
 
"(DEFINE)" => [
"L<perlre/(DEFINE)>",
'define subpatterns which will be executed only by the recursion mechanism',
'It is recommended that you put DEFINE block at the end of the pattern,',
'and that you name any subpatterns defined within it.',
'the yes-pattern is never directly executed, and no no-pattern is allowed',
'Similar in spirit to (?{0}) but more efficient.',
],
'(DEFINE)pointless' => 'WARNING a (DEFINE) section without (?<NAMEd>patterns) is pointless; an empty (DEFINE) section is pointless',
#~ 2013-06-14-04:58:49
 
'regexset.!' => 'complement (everything NOT in following set)',
'regexset.&' => 'intersection',
'regexset.+' => 'union',
'regexset.|' => "another name for '+', hence means union",
'regexset.-' => "subtraction (matched by left operand (above), excluding right operand (below))",
'regexset.^' => [
"symmetric difference (the union minus the intersection);",
"like exclusive or;",
"set of code points that are matched by either, but not both, of the operands.",
],
#~ 2013-08-12-01:20:21 dupes
#~ 'PPIx::Regexp::Structure::RegexSet!' => 'complement (everything NOT in following set)',
#~ 'PPIx::Regexp::Structure::RegexSet&' => 'intersection',
#~ 'PPIx::Regexp::Structure::RegexSet+' => 'union',
#~ 'PPIx::Regexp::Structure::RegexSet|' => "another name for '+', hence means union",
#~ 'PPIx::Regexp::Structure::RegexSet-' => "subtraction (matched by left operand (above), excluding right operand (below))",
#~ 'PPIx::Regexp::Structure::RegexSet^' => [
#~ "symmetric difference (the union minus the intersection);",
#~ "like exclusive or;",
#~ "set of code points that are matched by either, but not both, of the operands.",
#~ ],
"PPIx::Regexp::Structure::CharClass".'^' => "Character class inversion (all characters except the following)",
"PPIx::Regexp::Node::Range".'-' => "'-' is character range operator (a-z means all characters from a to z)",
#~ '-' => "Character range (inside character class)", ## 2013-06-20-07:17:11
#~ 2013-06-14-06:07:00
#~ '{n}' => ['Match exactly n times', 'L<perlre/Quantifiers>', ],
#~ '{n,}' => ['Match at least n times', 'L<perlre/Quantifiers>', ],
#~ '{n,m}' => ['Match at least n but not more than m times', 'L<perlre/Quantifiers>', ],
#~ 2013-06-14-06:24:37
#~ '{n}' => 'Match exactly n times',
#~ '{n,}' => 'Match at least n times',
#~ '{n,m}' => 'Match at least n but not more than m times',
#~ 2013-08-03-17:05:44
'{n}' => '{n} Match exactly (%%%%) times',
'{n,}' => '{n,} Match at least (%%%%) times',
'{n,m}' => '{n,m} Match at least (%%%%) but not more than (%%%%) times',
'{,m}' => 'ERROR:P', ## \Q{,m}\E
'(n)' => [ 'Checks if the numbered capturing group has matched something.', "L<perlre/(1) (2) ...>", ],
'(<NAME>)' => ['Checks if a group with the given name has matched something.', "L<perlre/(<NAME>) ('NAME')>", ],
#~ "('NAME')" => 'Checks if a group with the given name has matched something.',
"(R)" => [
"Checks if the expression has been evaluated inside of recursion.",
"L<perlre/(R)>",
],
"(Rn)" => ["Checks if the expression has been evaluated while executing directly inside of the n-th capture group.",'L<perlre/(R1) (R2) ...>'],
"(R&NAME)" => [
"L<perlre/(R&NAME)>",
"Similar to (R1) , this predicate checks to see if we're executing directly inside of the leftmost group with a given name ",
'(this is the same logic used by (?&NAME) to disambiguate).',
'It does not check the full stack, but only the name of the innermost active recursion.',
],
"posix_inside" => "ERROR warn POSIX syntax [: :] belongs inside character classes in regex; like this [[:word:]]", ## [::][:unknown:]
"\\C" => ['Single octet, even under UTF-8. Not in [].','L<perldebguts/CANY>',],
#~ 2013-06-14-19:04:04
'\l' => ['Lowercase next character. Not in [].', "L<perlfunc/lcfirst>",, ],
'\u' => ['Uppercase next character. Not in [].', "L<perlfunc/ucfirst>" ],
'\L' => ['Lowercase till \E. Not in [].', "L<perlfunc/lc>", ],
'\U' => ['Uppercase till \E. Not in [].', "L<perlfunc/uc>", ],
'\Q' => ['quotemeta till \E. Not in [].', 'Quote (disable) pattern metacharacters till \E.', 'L<perlfunc/quotemeta>', ],
'\E' => ['Turn off \Q, \L and \U processing. Not in [].', '', ],
'\F' => ['Foldcase till \E. Not in [].', '', ],
#~ 2013-06-14-20:55:08
#~ m{\a[\b]\e
'\a' => q('\a' (alarm)),
"\b" => q('\b' (backspace)), ## NOT '\b'
'\e' => q('\e' (escape)),
'\f' => q('\f' (form feed)),
'\n' => q('\n' (newline)),
'\r' => q('\r' (carriage return)),
'\t' => q('\t' (tab)),
#~ '\X' => q{Unicode "eXtended grapheme cluster"; leter and diacritic mark; Not in [].},
'\X' => q{Unicode "eXtended grapheme cluster"; leter and diacritic mark; Multiple code points that add up to a single visual character. L<perldebguts/CLUMP>},
 
#~ 2013-06-16-02:57:43
'check_prefix' => 'Checks to see if the following has matched. ',
mnext_nth_capture => "MATCH the NEXT nth (%%%%) capture group from this position ",
mprev_nth_capture => "MATCH the PREVIOUS nth (%%%%) capture group from this position ",
#~ match_the_capture => 'MATCH THE (%%%%) capture ; MATCH "\\%%%%" aka (in replacement) "$%%%%" ',
match_the_capture => 'MATCH THE (%%%%) capture ; MATCH "\\%%%%" alias (in replacement) "$%%%%" ',
#~ check_the_capture => 'Checks to see if the following has matched. (%%%%) capture ; MATCH "\\%%%%" aka (in replacement) "$%%%%" ',
#~ check_the_capture => 'Checks to see if the (%%%%) capture has matched; The backreference "\\%%%%" aka (in replacement) "$%%%%" ',
check_the_capture => 'Checks to see if the (%%%%) capture has matched; The backreference "\\%%%%" alias (in replacement) "$%%%%" ',
 
#~ cnext_nth_capture => "Checks to see if the following has matched. The NEXT nth (%%%%) capture group from this position ",
#~ cprev_nth_capture => "Checks to see if the following has matched. The PREVIOUS nth (%%%%) capture group from this position ",
 
 
cnext_nth_capture => "Checks to see if the NEXT nth (%%%%) capture group from this position has matched.",
cprev_nth_capture => "Checks to see if the PREVIOUS nth (%%%%) capture group from this position has matched.",
#~ 'cm_recur_ata' => 'Checks to see if the following has matched RECURSION at address=%%%%',
'cm_recur_ata' => 'Checks to see if we are executing directly inside the capture at address=%%%%',
#~ 2013-06-16-04:06:54
#~ check_n_capture => 'Checks to see if the following has matched. "\g{%%%%}" aka "(?&%%%%)" aka "(?P>%%%%)"',
#~ check_n_capture => 'Checks to see if the following capture has matched: "\g{%%%%}" aka "(?&%%%%)" aka "(?P>%%%%)"',
#~ check_n_capture => 'Checks to see if we are executing directly inside the capture "\g{%%%%}" aka "(?&%%%%)" aka "(?P>%%%%)"',
check_n_capture => 'Checks to see if we are executing directly inside the capture "\g{%%%%}" alias "(?&%%%%)" alias "(?P>%%%%)"',
'\P' => ['L<perlrecharclass/Unicode Properties>','TODO warn REPORT BUG for PPIx::Regexp; \PP is \P{Prop} ; for example \PN is \P{Number}; ' ],
'\p' => ['L<perlrecharclass/Unicode Properties>','TODO warn REPORT BUG for PPIx::Regexp; \pP is \p{Prop} ; for example \pN is \p{Number}; ' ],
'eo_grouping' => 'end of grouping for %%%%',
'dodgy-u-name' => '(/d) dodgy forced to (/u) unicode semantics because \N{} found in pattern',
'dodgy-u-prop' => '(/d) dodgy forced to (/u) unicode semantics because \p{} found in pattern',
'dodgy-u-255' => '(/d) dodgy forced to (/u) unicode semantics because code point above 255 found in pattern',
#~ 'dodgy-u-rset' => '(/d) dodgy forced to (/u) unicode semantics because L<perlre/(?[ ])> found in pattern',
'dodgy-u-rset' => '(/d) dodgy forced to (/u) unicode semantics because (?[ ]) found in pattern',
); ## our %desc
#~ die scalar %desc; ## 201/512 ## 2013-08-11-18:43:53
View _desc.pl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666
#!/usr/bin/perl --
use strict;
use warnings;
use Data::Dump qw/ dd pp /;
use charnames (); ## for ord
use HTML::Entities();
use Unicode::UCD();
 
 
use Getopt::Long();
use PPI::Document;
use PPIx::Regexp::Dumper;
 
use vars qw/ $opt_pmshortcut $opt_coverage /;
 
unless( caller ){
Main( @ARGV );
exit( 0 );
}
 
sub Main { goto &MainXplain }
sub MainXplain {
my %opt;
Getopt::Long::GetOptionsFromArray(
\@_,
\%opt,
q{text|t!},
q{html!},
q{dumper|ddr!},
q{dumpee|dde!},
q{help|h!},
q{perlmonks|pmshortcut|pm|p!},
q{coverage|c!},
);
$opt{help} and return print Usage();
@_ or return print Usage();
$opt{text} or $opt{html}=1;
$opt{coverage} and $opt_coverage=1;
unshift @_, \%opt;
goto &Mexplain;
}
 
sub Usage {
"
Usage:
$0
$0 --help
$0 --html qr/\\d\\w/u
$0 --text qr/\\d\\w/u
$0 \\d\\w qr/\\d\\w/u s/\\d\\w/rand/gex m/\\d\\w/aax ...
# --html is default
# bare pattern becomes qr//
# remember to quote args according to your shell rules
"
}
 
sub Mexplain {
my $args = shift;
local $opt_pmshortcut = $$args{perlmonks};
for my $restr ( @_ ){
my $pdoc = PPI::Document->new( \$restr );
my @res = map { PPIx::Regexp->new( $_ ) } @{
$pdoc->find(
sub {
return 1 if ref($_[1]) =~ m{
PPI::Token::QuoteLike::Regexp
| PPI::Token::Regexp::Match
| PPI::Token::Regexp::Substitute
}ix;
},
)
|| []
};
@res or @res = PPIx::Regexp->new( "qr{$restr}" );
for my $re ( @res ){
if( $$args{html} ){
darnhtml( $re->xplain );
} else {
darntext( $re->xplain );
}
$$args{dumper} and dd( $re );
$$args{dumpee} and dd( $re->xplain );
}
undef @res; undef $pdoc;
}
}
 
 
sub darntext {
my( $ref ) = @_;
my @lols;
if( ref $ref eq 'HASH' ){
my $start = $ref->{start};
my $con = $ref->{start_con};
my $hr = $ref->{start_hr};
my $depth = $ref->{depth} || 0;
my $dent = ' ' x $depth;
$_ = "# $dent $_" for grep { defined $_ } @$start;
$_ = "$dent $_" for grep { defined $_ } $con;
my $conhr = [ grep { defined $_ } $con, $hr ];
my $chits = $ref->{chits};
@lols = ( $start, $conhr, $chits );
} else {
@lols = $ref;
}
for my $lol ( @lols ){
for my $l ( @$lol ){
if( ref $l ){
darntext( $l );
} else {
print "$l\n";
}
}
}
return;
}
 
 
 
sub darnhtml{
print "<!DOCTYPE html><html><head><title> title </title></head><body>\n";;;
#~ <base href="http://perlmonks.com/">
&darnhtmltable;
print "</body></html>\n";
}
 
sub darnhtmltable {
my( $ref ) = @_;
my $depth = 0;
my @lols;
if( ref $ref eq 'HASH' ){
my $start = $ref->{start};
my $con = $ref->{start_con};
my $hr = $ref->{start_hr};
$depth = $ref->{depth} || 0;
darn_table( $depth, $con, $start , $hr );
@lols = $ref->{chits};
} else {
@lols = $ref;
}
for my $lol ( @lols ){
for my $l ( @$lol ){
if( ref $l ){
darnhtmltable( $l );
} else {
darn_table( $depth, " ", [$l] );
}
}
}
return;
}
 
 
sub enent {
my $ret = HTML::Entities::encode_entities( $_[0] );
$ret =~ s{\[}{&#91;}g;
$ret =~ s{\]}{&#93;}g;
$ret =~ s{\|}{&#124;}g;
return $ret;
}
sub darn_table {
my( $depth, $con, $desc, $hr ) = @_;
defined $con or $con= ' ';
print "<table>\n";
#~ print "<table border=1>\n";
print "<tr>";
print "<td><pre>", ' ' x ($depth+3) , enent($con),"</pre></td>\n";
print '<td>', '&nbsp;' x ($depth+3) , "</td>\n";
print "<td>";
#~ shift @$desc;;; ### ditch address= TODO make it { address => '' }
#~ shift @$desc;;; ### ditch token xRE:: TODO makeit { token => [ '','', ] }
 
my( @three ) = splice @$desc, 0, 3; ## make it a tooltip?!??!??!?
#~ unshift @$desc, '# '.join ' ; ', @three;
unshift @$desc, 3==@three ? ( '# '.join ' ; ', @three ) : ( @three );
## TO?DO? http://www.thecssninja.com/css/css-tree-menu# Pure CSS collapsible tree menu | The CSS Ninja - All things CSS, JavaScript & HTML
local$_; for(@$desc){
if( m{\sat\saddress=\s*(\S+)} ){
$_ = sprintf q{<a href="#%s">%s</a>}, enent("$1"), enent($_);
} elsif( m{\baddress=\s*(\S+)} ){
$_ = sprintf q{<a name="%s">%s</a>}, enent("$1"), enent($_);
#~ $_ = sprintf q{<a name="%s"></a>}, enent("$1"), ;
#~ } elsif( m{L<(.*)>} ){
#~ } elsif( m{L<{1,3}(.*)>{1,3}} ){
#~ } elsif( m{L<+\b(.*)\b>+} ){
#~ } elsif( m{L<++(.*?)>+} ){
#~ } elsif( m{L(?:<<<|<)(.*?)(?:>>>|>)}x ){
} elsif( m{L(?:<<<|<)(.*[^>])(?:>>>|>)}x ){
my $odoc = $1;
$odoc =~ s/^\s+|\s+$//g;
my $doc = enent($odoc);
#~ $doc =~ s{^(.+?)/(.+)$}{$1#$2};
if( my( $one, $frag ) = $doc =~ m{^(.+?)/(.+)$} ){
$frag =~ s/\s/-/g;
$frag =~ s{\[}{%5b}g;
$frag =~ s{\]}{%5d}g;
$frag =~ s{\|}{%7c}g;
$doc = "$one#$frag";
}
#~ my $href = "http://perlmonks.com/?node=doc://$doc";
#~ $href = enent($odoc) if $odoc =~ m{^http://}i;
#~ $_ = qq{<a href="$href">}.enent($_).'</a>';
#~
my $href = "http://perlmonks.com/?node=doc://$doc";
if( $odoc =~ m{^http://}i ){
my( $oleft, $oright ) = split /\|/, $odoc;
#~ $href = enent($odoc) ;
$href = enent($oleft) ;
$_ = qq{<a href="$href">}.enent($_).'</a>';
} else {
if( $opt_pmshortcut ){
$_ = "[doc://$doc|".enent($_)."]";
} else {
my $href = "http://perlmonks.com/?node=doc://$doc";
$href = enent($odoc) if $odoc =~ m{^http://}i;
$_ = qq{<a href="$href">}.enent($_).'</a>';
}
}
#~ } elsif( m{^\s*.*?(?:\bdie\b|\bwarn)}i ){
#~ $_ = '<b>'.enent($_).'</b>';
} else {
$_ = enent($_);
}
$_ = "<b>$_</b>" if /\bdie\b|\bwarn/i;
}
if( @$desc>1 and $desc->[1] !~ /^#/ ){ $_="# $_" for @$desc; }
$con and $con=~/\x22,\s*$/ and push @$desc, '#<b>'.enent($con).'</b>';
$hr and push @$desc, $hr;
print map { "$_<br>\n" } @$desc;
print "</td></tr></table>\n";
}
 
#~ package PPIx::Regexp::Element; ## dumb
sub PPIx::Regexp::Element::xplain_desc {
goto &PPIx::Regexp::Node::xplain_desc
}
#~ package PPIx::Regexp::Node; ## dumb
#~ 2013-07-23-05:37:02 dumb for umlclass.bat
{ package PPIx::Regexp::Element; package PPIx::Regexp::Node; package PPIx::Regexp::Structure::Capture; package PPIx::Regexp::Structure::CharClass; package PPIx::Regexp::Structure::Code; package PPIx::Regexp::Structure::Modifier; package PPIx::Regexp::Structure::Quantifier; package PPIx::Regexp::Structure::Replacement; package PPIx::Regexp::Structure; package PPIx::Regexp::Token::Backreference; package PPIx::Regexp::Token::Backtrack; package PPIx::Regexp::Token::CharClass::POSIX; package PPIx::Regexp::Token::CharClass::Simple; package PPIx::Regexp::Token::Code; package PPIx::Regexp::Token::Comment; package PPIx::Regexp::Token::Condition; package PPIx::Regexp::Token::Delimiter; package PPIx::Regexp::Token::Greediness; package PPIx::Regexp::Token::GroupType::Modifier; package PPIx::Regexp::Token::GroupType::Switch; package PPIx::Regexp::Token::Interpolation; package PPIx::Regexp::Token::Literal; package PPIx::Regexp::Token::Modifier; package PPIx::Regexp::Token::Operator; package PPIx::Regexp::Token::Quantifier; package PPIx::Regexp::Token::Recursion; package PPIx::Regexp::Token::Structure; package PPIx::Regexp::Token::Unknown; package PPIx::Regexp::Token::Whitespace; package PPIx::Regexp::Token; package PPIx::Regexp; package main; }
 
our %desc; BEGIN { require '_desc.pl'; }
sub PPIx::Regexp::Node::xplain_desc {
my @ret = &PPIx::Regexp::Node::xplain_desc_real;
#~ @ret and warn "# @_ => ",pp(\@ret), "\n";
if( @ret ){ ## successfull descriptions, stuff we actually used
my $self = shift;
#~ warn "# $self\n", pp( \@_ ), " => ", pp(\@ret), ",\n\n";
$opt_coverage and warn "# $self\n", pp( \@_ ), " => ", pp(\@ret), ",\n\n";
}
return wantarray ? @ret : join('', @ret );
}
sub PPIx::Regexp::Node::xplain_desc_real {
#~ sub PPIx::Regexp::Node::xplain_desc {
#~ warn "@_\n"; ## grogelinginadequate
my( $self, $key , @repos ) = @_;
%desc or require '_desc.pl';
if( my $ret = $desc{ $key } ){
if( ref $ret ){
return @$ret;
} else {
my $ix = 0; ## yick
@repos and $ret =~ s/%%%%/my$r=$repos[$ix++]; defined $r?$r:'%%%%'/ge;;
return $ret;
}
}
return;
}
 
sub PPIx::Regexp::Element::xplain_start {
goto &PPIx::Regexp::Node::xplain_start
}
sub PPIx::Regexp::Node::xplain_start {
my( $self, %args ) = @_;
my $depth = $args{depth} || 0;
my @ret;
push @ret, 'address='. $self->address;
push @ret, $self->xplain_desc( ref $self );;
push @ret, $self->xplain_perl_version;;
#~ push @ret, 'address='. $self->address;
if( defined( my $ord = eval { $self->ordinal } ) ){
#~ my $unicode10 = Unicode::UCD::charinfo( $ord )->{unicode10} ;
my $unicode10 = unicode10( $ord );
#~ $unicode10 or warn pp( {crapola => Unicode::UCD::charinfo( $ord ) } ); ## 2013-08-18-04:12:49
push @ret,
#~ join ' aka ',
join ' alias ',
"ordinal= ord( chr( $ord ) )",
sprintf('\\N{U+%04.4X}', $ord ),
sprintf('\%03o', $ord ),
( $unicode10 ? $unicode10 : () ),
chr( $ord ),
;;;;;;;;;
}
if( not $args{no_mods} ){
if( $self->xmods_susceptible ){
push @ret, xplain_modifiers( $self );
}
push @ret, 'is_case_sensitive' if eval { $self->is_case_sensitive };
}
 
my $con = eval { $self->{content} };
my $xmods = eval { $self->xmods };
my $semantics = eval { $$xmods{match_semantics} };
#~ msixpodualgcer
my @cons = grep { $$xmods{$_} } qw{ m s i x p o d u a l g c e r };
if( $con and not $args{no_con_desc} ){
@cons = map { $con.$_ } grep { defined $_ } @cons, $semantics, '';;
#~ push @cons, ref($self).$con; ## TODO REFUDGE
#~ warn pp { con => $con, semantics => $semantics, cons => \@cons };
#~ 2013-08-11-19:51:28
#~ warn "\n\n", pp( [ qw/FUDGE REFUDGE /, con => $con, semantics => $semantics, cons => \@cons ],[]),"\n\n";
my @desc;
for my $con ( @cons ){
@desc = $self->xplain_desc( $con );;;
if( @desc ){
last;
}
}
@desc and push @ret, @desc;
}
return \@ret;
}
 
sub PPIx::Regexp::Element::address { goto &PPIx::Regexp::Node::address }
our $__rootaddr = 0;
sub PPIx::Regexp::Node::address {
my( $self, %args ) = @_;
my $addr = $$self{_xaddr} || '';
return $addr if $addr;
my @pops = $self;
my $prev = $self;
while( $prev = $prev->parent ){
unshift @pops, $prev;
}
my $root = shift @pops;
my $rootaddr = eval { $root->{__rootaddr}||=++$__rootaddr } ;
$rootaddr ||= sprintf(' 0+%x',$root);
#~ return join '',
$addr = join '',
'/'.$rootaddr,
( map {
my ( $method, $inx ) = $_->_my_inx();
$method = uc$1 if $method =~ m{^(.)};
"/$method$inx";
} @pops ), ' ';
#~ $$self{_xaddr}=$addr;
return $addr;
}
 
 
sub PPIx::Regexp::xplain {
my( $self, %args ) = @_;
$self->{xfailures}=0;
$self->xmods_propagate;
my $depth = $args{depth} || 0;
my @ret;
my $ret = { depth => $depth, start => [@ret], chits => \@ret };
undef @ret;
for my $child ( eval { $self->children } ){
if( eval{ $child->children } ){ #$haskids
push @ret, $child->xplain( %args, depth => $depth + 3 );
} else {
push @ret, $child->xplain( %args, depth => $depth + 2 ); ## GRRRRRR
}
}
{ ## the_ter_rible xfailures
my @ter = (
"my \$regstr = join '', ",
);;;
my $source = $self->source ;
my $sourceref = ref $source ;
$sourceref = '' if not defined $sourceref;
$source = '' if not defined $source;
if( $source ){
my $rr = $self->xplain_desc("the_regex");
if( $sourceref ){
$rr .= "($sourceref)";
my $con = $source->content;
$con or $con = $self->content;;
$con or $con = "";
$con or die dd $self;
$source = $con;
}
$rr .=":\n\n";
$rr .= $source;
$rr .="\n\n" . $self->xplain_desc("matches_as_follows")."\n";
$rr =~ s{^}{# }gm;
push @ter, split /\n/, $rr;
}
#~ if( my $fail = eval { $self->failures } ){
if( my $totalfail = eval { $self->failures + $self->{xfailures} } ){
push @ter, pp( $self );
#~ push @ret,"# failures=$fail";
my $fail = eval { $self->failures } ||0;
my $xfail = eval { $self->{xfailures} } ||0;
push @ter,"# failures=$totalfail == fail($fail) + xfail($xfail)";
push @ter, $self->xplain_desc("parsing_failures");
}
push @ter, join '', "#r: ", join ' / ', grep { defined $_ } ref( $self ), $sourceref;
push @ter, join '', "#r= ",Data::Dump::quote( $self->content );
push @ter, '# ';
unshift @{ $$ret{start} }, @ter;
}
if( my(@why) = xdodgy_unicode_override( $self ) ){
push @{$ret[-1]{start}}, map { $self->xplain_desc($_) } @why; ## yick
}
push @ret, { depth => $depth, start => [";;;;;;;;;;\n\n"], };
return $ret;;
} ## end of sub PPIx::Regexp::xplain
 
sub PPIx::Regexp::Node::parent_modifiers {
my( $self ) = @_;
my $root = $self->root;
my $kid = $root->last_element;
return eval { $kid->modifiers };
}
 
sub PPIx::Regexp::Element::root {
my( $self ) = @_;
my $root = $self;
while( my $newp = $root->_parent ){
$root = $newp ;
}
return $root;
}
 
 
sub PPIx::Regexp::Element::xplain_perl_version {
my( $self ) = @_;
my @ret;
if( my $val = eval { $self->perl_version_introduced } ){
#~ if ( $val ne '5.000' ){
if ( $val > 5.006 ){ ## otherwise its waaay too much :)
push @ret, "perl_version_introduced=$val";
}
}
if( my $val = eval { $self->perl_version_removed } ){
push @ret, "warning perl_version_removed =$val";
}
return @ret;
}
 
sub PPIx::Regexp::Structure::Replacement::xplain {
#~ push @_ , ( in_replacement => 1, hr_length => 6 );
push @_ , ( in_replacement => 1, );
goto &PPIx::Regexp::Structure::xplain;; ## cause it deletes start_con
}
 
 
sub PPIx::Regexp::Structure::xplain {
#~ push @_, ( hr_length => 6 );
my $ret = &PPIx::Regexp::Node::xplain;
delete $ret->{start_con};
#~ $$ret{start_hr} = join '', "# ", '-' x 6 ;
$$ret{start_hr} = join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
return $ret;
}
 
sub PPIx::Regexp::Structure::Quantifier::xn_comma_m { ## n_comma_m {n,m}
my( $self ) = @_;
my( $n, $comma, $m ) = map { eval { $_->{content} } } $self->children;
my @ncm = ('n',',','m'); # {n,m}
defined $n or $ncm[0]='';
defined $comma or $ncm[1]='';
defined $m or $ncm[2]='';
return $n, $comma, $m , @ncm;
}
sub PPIx::Regexp::Structure::Quantifier::xplain {
my( $self ) = @_;
push @_, ( no_con_desc => 1 ); #jic
my $ret = &PPIx::Regexp::Token::Quantifier::xplain;
my( $n, $comma, $m , @ncm ) = $self->xn_comma_m;
push @{$$ret{start}},
$self->xplain_desc( join('', '{',@ncm,'}'), 'n', 'm' ),
$self->xplain_desc( join('', '{',@ncm,'}'), $n, $m ),
'L<perlre/Quantifiers>',
;;;;;;;
return $ret;
}
 
sub PPIx::Regexp::Token::Quantifier::xplain {
my( $self, %args ) = @_;
my $ret = $self->PPIx::Regexp::Node::xplain( %args, no_elements => 1 );
my $pp = $self->preceding_pattern_address;;
my $ns = $self->next_sibling;
my $nsc = eval { $ns->{content} };
my $nsgreedy = eval { $ns->isa("PPIx::Regexp::Token::Greediness") };
if( not($nsgreedy) or ( $nsgreedy and $nsc ne '?') ){
push @{$$ret{start}}, $self->xplain_desc('most_possible');
}elsif( $nsc eq '?'){
push @{$$ret{start}}, $self->xplain_desc('least_possible');
}
my $ps = $self->previous_sibling;
if( eval { $ps->isa("PPIx::Regexp::Structure::Capture") } ){
my $number = eval { $ps->number } ;
my $name = eval { $ps->name };
my $only = "";
$number and $only .= qq{"\$$number"};
$name and $only .= qq{ or "\$+{$name}"};;
push @{$$ret{start}}, $self->xplain_desc('only_last_n', $only );
}
push @{$$ret{start}},
$pp ? $self->xplain_desc( 'm_pat_at_add' ) . $pp
: $self->xplain_desc( 'quant_f_not' ) ;;;
#~ $$ret{start_hr} = join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
#~ $nsgreedy or $$ret{start_hr} = join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
$$ret{start_hr} = $nsgreedy ? '' : join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
$ret;
}
 
 
 
 
sub PPIx::Regexp::Token::Greediness::xplain {
my( $self, %args ) = @_;
$args{no_con_desc}=1;
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
push @{$$ret{start}}, $self->xplain_desc( ref($self).$self->{content} ); ## grr
push @{$$ret{start}}, $self->xplain_desc( 'm_pat_at_add' ) . $self->preceding_pattern_address;
$ret;
}
 
sub PPIx::Regexp::Element::preceding_pattern_address {
my( $self, %args ) = @_;
#~ return eval { $self->previous_sibling->address }; ## its always this isn't it? TODO figure it out
## bug, for greediness previous_sibling is quantifier, so go again
my $ps = $self->previous_sibling;
if( $ps->isa('PPIx::Regexp::Token::Quantifier' ) ){
$ps = $ps->previous_sibling;
}
return eval { $ps->address };
}
 
sub PPIx::Regexp::Structure::Code::xplain {
my( $self, %args ) = @_;
$args{no_elements}=1;
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
my $type = $self->type->content; ## PPIx::Regexp::Token::GroupType::Code
my $key = "(${type}{ code })";
push @{$$ret{start}}, $self->xplain_desc( $key );
return $ret;
}
 
 
sub PPIx::Regexp::Token::Structure::xplain {
push @_, ( no_elements => 1 , no_mods => 1, fudgeme => 0, );
goto &PPIx::Regexp::Node::xplain;
}
 
sub PPIx::Regexp::Token::Code::xplain {
my( $self, %args ) = @_;
$args{no_elements} = 1 if $self->ancestor_isa('PPIx::Regexp::Structure::Replacement');
return $self->PPIx::Regexp::Node::xplain( %args );
}
 
#~ 2013-06-14-05:22:25
#~ TO??DO?? unicharproptoregexrange-unicode-regex-range-character-class-ucd.pl
sub PPIx::Regexp::Token::CharClass::Simple::xplain {
my( $self , %args ) = @_;
$args{no_mods}=1;
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
if( my $con = eval{$self->{content}} ){
if( $con =~ /^\\[pP]\{/ ){
push @{$$ret{start}}, "L<perluniprops/$con>";;; ## autogenerated
} else {
push @{$$ret{start}}, "L<perlrecharclass/$con>";;;
push @{$$ret{start}}, "L<perlrebackslash/$con>";;;
}
push @{$$ret{start}}, xplain_modifiers( $self, { GONERS => [qw/ i /] } );
}
$$ret{start_hr} = join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
return $ret;
}
 
sub PPIx::Regexp::Token::CharClass::POSIX::xplain {
push @_, ( no_con_desc => 0 );
goto &PPIx::Regexp::Token::CharClass::Simple::xplain;
}
 
sub PPIx::Regexp::Structure::CharClass::xplain {
my( $self ) = @_;
push @_, ( no_con_desc => 1 );
my $ret = &PPIx::Regexp::Structure::xplain;
if( $self->children > 1 ){
my $colons = join '', map { $_->content } $self->schild(0), $self->schild(-1) ;
if( $colons eq '::'){
#~ if( $colons eq '::' and $self->children > 1 ){
push @{$$ret{start}}, $self->xplain_desc('posix_inside');
}
}
return $ret;
}
 
sub PPIx::Regexp::Token::Interpolation::xplain {
#~ goto &PPIx::Regexp::Token::xplain ## start_content
my( $self ) = @_;
my $ret = &PPIx::Regexp::Token::xplain;
my $key = $self->ancestor_isa('PPIx::Regexp::Structure::Replacement')
? 'PPIx::Regexp::Token::Interpolation-Substitution'
: 'PPIx::Regexp::Token::Interpolation-Regexp' ;
push @{$$ret{start}}, $self->xplain_desc( $key );
return $ret;
}
sub PPIx::Regexp::Token::Literal::xplain { goto &PPIx::Regexp::Token::xplain; }
sub PPIx::Regexp::Token::xplain {
my( $self, %args ) = @_;
my $ret = $self->PPIx::Regexp::Node::xplain( %args, no_elements => 1, no_mods => 0, );
my $con = $self->{content};
if( $con =~ m{\\g\d+} ){
push @{$$ret{start}}, (
"TODO warn REPORT BUG \\g10 UNRECOGNIZED AS A PPIx::Regexp::Token::Backreference misparsed as PPIx::Regexp::Token::Literal (\\g10 is not \\10, can't be treated as octal)",
$self->xplain_desc('n_exist_group')
);
}
{ no warnings 'uninitialized';
if( $self->ancestor_isa('PPIx::Regexp::Structure::CharClass') and $con =~ m{^\\\d+$} and eval { $self->next_sibling->{content} eq '-' } ){
push @{$$ret{start}}, 'ERROR warn TODO REPORTBUG octals NOT PARSED AS PPIx::Regexp::Node::Range';
}
}
return $ret;
}
 
sub PPIx::Regexp::Token::Recursion::xplain {
#~ my( $self ) = @_;
#~ my $ret = &PPIx::Regexp::Node::xplain;
 
my( $self , %args ) = @_;
## guessing ## $ perl -le " warn int m{(lc(?i:(?1)|end)+)}smx for qw/ lcend lclcend lclcEND lcLCend /"
#~ $ perl -e " print int m{(lc(?i:(?1)|end)+)}smx for qw/ lcend lclcend lclcEND lcLCend /"
#~ 1110
$args{no_mods}=1;
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
my $absolute = eval { $self->absolute } ;
my $number = eval { $self->number } ;
if( $number ){
my $padd = eval{ $self->find_recursion_number( $absolute )->address };
if( my ( $direction ) = $number =~ /^([\-\+])/ ){
my $desckey = $direction eq '+' ? 'mnext_nth_capture' : 'mprev_nth_capture';
push @{$$ret{start}}, $self->xplain_desc( $desckey, $number );
}
$absolute and push @{$$ret{start}}, $self->xplain_desc('match_the_capture', ($absolute) x 3 );
if( $padd ){
push @{$$ret{start}}, $self->xplain_desc('m_recur_ata').$padd;
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group')
}
}
if( my $name = eval { $self->name } ){
push @{$$ret{start}}, 'name='.$name.join(' alias ','', '"(?&'.$name.')"', '"(?P>'.$name.')"' );
my $padd = eval{ $self->find_recursion_number( $name )->address };
if( $padd ){
push @{$$ret{start}}, $self->xplain_desc('m_recur_ata').$padd;
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group')
}
}
return $ret;
}
 
sub PPIx::Regexp::Token::Backreference::xplain {
my( $self ) = @_;
push @_, ( no_mods => 0 ); ## susceptible except /x
my $ret = &PPIx::Regexp::Node::xplain;
my $absolute = eval { $self->absolute } ;
my $number = eval { $self->number } ;
if( $number ){
my $padd = eval{ $self->find_recursion_number( $absolute )->address };
if( my ( $direction ) = $number =~ /^([\-\+])/ ){
my $desckey = $direction eq '+' ? 'mnext_nth_capture' : 'mprev_nth_capture';
push @{$$ret{start}}, $self->xplain_desc( $desckey, $number );
}
$absolute and push @{$$ret{start}}, $self->xplain_desc('match_the_capture', ($absolute) x 3 );
if( $padd ){
push @{$$ret{start}}, $self->xplain_desc('m_recur_ata').$padd;
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group');;
}
}
if( my $name = eval { $self->name } ){
push @{$$ret{start}}, 'L<perlre/(?P=NAME)>';
#~ push @{$$ret{start}}, 'L<perldebguts/NREF>'; ## TODO?? NREFF? NREFFL? NREFFU? NREFFA? unlinkable
push @{$$ret{start}}, 'MATCH "\\g{'.$name.'}"'.join(' alias ','', '"\\k<'.$name.'>"', '"(?&'.$name.')"', '"(?P>'.$name.')"' );
if( my $capture = $self->find_recursion_number( $name ) ){
push @{$$ret{start}}, $self->xplain_desc('m_recur_ata').$capture ->address;
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group');
}
}
return $ret;
}
 
sub PPIx::Regexp::Token::Condition::xplain {
#~ push @_, ( no_con_desc => 0 ); ## want it, its not no doubling
#~ my( $self ) = @_;
#~ my $ret = &PPIx::Regexp::Node::xplain;
my( $self , %args ) = @_;
$args{no_mods}=1; ## 2013-08-05-04:38:50
delete $args{no_con_desc}; ## 2013-08-11-19:19:21
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
my $absolute = eval { $self->absolute } ;
my $number = eval { $self->number } ;
my $name = eval { $self->name } ;
my $con = eval { $self->content } ;
my $check_prefix = $self->xplain_desc('check_prefix');
if( my( $left, $right) = $con =~ m{
^
\(
( # $1
<
|
'
|
R\&?
)? ## first alteration is optional
( # $2
[^\)]*
)
\)
}x ){
my $gotr = defined $left ? ( $left =~ /^R/ ) : 0 ;
my $desc = join '',
( $gotr ? $left : () ),
( $number ? 'n' : () ),
( $name ? 'NAME' : () ),
;;;;;;;;;;;;;
if($name and not $gotr) {
$desc = "(<$desc>)" ;
} else {
$desc = "($desc)";
}
push @{$$ret{start}}, $self->xplain_desc( $desc );
}
if( $number ){
my $padd = eval{ $self->find_recursion_number( $absolute )->address };
#~ 2013-08-11-20:40:29 apparently this will never happen because (?(1)) is legal but (?(-1)) is not legal
if( my ( $direction ) = $number =~ /^([\-\+])/ ){
my $desckey = $direction eq '+' ? 'cnext_nth_capture' : 'cprev_nth_capture';
push @{$$ret{start}}, $self->xplain_desc( $desckey, $number );
}
$absolute and push @{$$ret{start}}, $self->xplain_desc('check_the_capture', ($absolute) x 3 );
if( $padd ){
push @{$$ret{start}}, $self->xplain_desc('cm_recur_ata', $padd );
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group')
}
}
if( $name ){
push @{$$ret{start}}, 'L<perlre/(?(condition)yes-pattern)>';
#~ push @{$$ret{start}}, $check_prefix.'"\\g{'.$name.'}"'.join(' aka ','', '"(?&'.$name.')"', '"(?P>'.$name.')"' );
push @{$$ret{start}}, $self->xplain_desc('check_n_capture', ($name) x 3 );
if( my $capture = $self->find_recursion_number( $name ) ){
push @{$$ret{start}}, $self->xplain_desc('cm_recur_ata', $capture ->address );
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group');
}
}
if( $con eq '(DEFINE)'){
## grr #~ Can't locate object method "find" via package "PPIx::Regexp::Token::Condition"
#~ if( not $self->find(sub{ return 1 if $_[0]->isa('PPIx::Regexp::Structure::NamedCapture'); return 0; }, ) ){
#~ if( not $self->find_first(sub{ return 1 if $_[0]->isa('PPIx::Regexp::Structure::NamedCapture'); return 0; }, ) ){
### belongs in parent?
if( not $self->parent->find_first(sub{ return 1 if $_[0]->isa('PPIx::Regexp::Structure::NamedCapture'); return 0; }, ) ){
push @{$$ret{start}}, $self->xplain_desc('(DEFINE)pointless');
}
}
return $ret;
} ## end of fudgy sub PPIx::Regexp::Token::Condition::xplain
 
 
sub PPIx::Regexp::Element::find_recursion_number {
goto &PPIx::Regexp::Node::find_recursion_number
}
 
sub PPIx::Regexp::Node::find_recursion_number {
my( $self, $n ) = @_;
$self->root->find_first( sub {
my $type = $_[1]->isa('PPIx::Regexp::Structure::NamedCapture')
|| $_[1]->isa('PPIx::Regexp::Structure::Capture')
|| $_[1]->isa('PPIx::Regexp::Token::GroupType::NamedCapture')
;;;
if( $type ){
if( my $number = eval{$_[1]->number} ){
return 1 if $number eq $n ;## found a good one
}
if( my $name = eval{$_[1]->name} ){
return 1 if $name eq $n ;## found a good one
}
}
return 0; ## keep searching
}, );
}
 
sub PPIx::Regexp::Structure::Capture::xplain {
my( $self ) = @_;
my $ret = &PPIx::Regexp::Node::xplain;
my $number = eval { $self->number } ;
my $name = eval { $self->name };
$name and $name = 'name='.$name.join(' alias ','', '"\\g{'.$name.'}"', '"\\k<'.$name.'>"', '"(?&'.$name.')"', '"(?P>'.$name.')"' , qq{"\$+{$name}"} );;
$number and $number = 'number='.$number.' alias "$'.$number.'" or "\\'.$number.'"';
$number and push @{$$ret{start}}, $number ;
$name and push @{$$ret{start}}, $name;
delete $ret->{start_con};
$$ret{start_hr} = join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
## the fudgyness goes on
$number and push @{$ret->{chits}[-1]{start}}, $self->xplain_desc('eo_grouping', $number );
$name and push @{$ret->{chits}[-1]{start}}, $self->xplain_desc('eo_grouping', $name );
return $ret;
}
 
sub PPIx::Regexp::Element::xplain { goto &PPIx::Regexp::Node::xplain }
sub PPIx::Regexp::Node::xplain {
my( $self, %args ) = @_;
my $depth = $args{depth} || 0;
my $start = $self->xplain_start( %args );
my @chits;
my $ret = { depth => $depth, start => $start , chits => \@chits };
$$ret{start_hr} = join '', "# ", '-' x ( $args{hr_length} || 66 );
$$ret{start_con} = Data::Dump::pp( $self->content ).',';
if( $args{no_elements} ){
delete $$ret{chits};
return $ret;
}
for my $start ( eval { $self->start } ){
push @chits, $start->xplain( %args, depth => $depth );
}
 
for my $type ( eval { $self->type } ){
if( my @exp = $type->xplain( %args, depth => $depth + 1 ) ){
push @chits, @exp;
}
}
 
for my $child ( eval { $self->children } ){
if( eval{ $child->children } ){ #$haskids
push @chits, $child->xplain( %args, depth => $depth + 3 );
} else {
push @chits, $child->xplain( %args, depth => $depth + 2 );
}
}
 
for my $finish ( eval { $self->finish } ){
push @chits, $finish->xplain( %args, depth => $depth );
}
if( not @$ret{chits} ){
delete $$ret{chits};
}
return $ret;
 
} ## end of sub PPIx::Regexp::Node::xplain
 
 
sub PPIx::Regexp::Structure::Modifier::xplain {
push @_, no_mods => 1;
goto &PPIx::Regexp::Structure::xplain
}
 
sub PPIx::Regexp::Token::GroupType::Modifier::xplain {
my( $self , %args ) = @_;
my $ret = $self->PPIx::Regexp::Node::xplain( %args, no_elements => 1 , no_mods => 0 );
if( my @mods = eval { $self->modifiers } ){ ## yick
my %mods = @mods; %mods and push@{$$ret{start}}, join(' ', 'mods(', map { " $_ = $mods{$_} "} keys%mods ).')';;
my @exp = xplain_modifiers( $self );
@exp and push @{$$ret{start}}, @exp;
}
##fudgy
return $ret;
}
 
sub PPIx::Regexp::Token::Modifier::xplain {
my( $self , %args ) = @_;
delete $args{no_mods};
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
push @{$$ret{start}}, xplain_modifiers( $self );
my $con = eval{ $self->{content}};
if( length $con and $self != $self->root->modifier and $con =~ m{^\(.+\)$}sm ){
push @{$$ret{start}}, $self->xplain_desc( "token_modifier_propagates_right" );; ## 2013-07-30-03:02:24 TODO MORE RET KEYS
}
$ret;
}
 
 
sub PPIx::Regexp::Token::Operator::xplain {
my( $self , %args ) = @_;
$args{no_elements} = 1;
$args{no_mods} = 1;
$args{no_con_desc} = 1;
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
my @desc;
if( @desc = eval { $self->xplain_desc( ref($self->parent). $self->{content} ) } )
{
push @{$$ret{start}}, @desc ;
my $refpc = ref($self->parent). $self->{content};
my ( $method, $inx ) = $self->_my_inx();
if( $refpc eq "PPIx::Regexp::Structure::CharClass".'^' and $method ne 'type' ){
push @{$$ret{start}}, "ERROR warn TODO REPORT BUG THIS IS LITERAL ^ NOT NEGATION";
}
if( $refpc eq "PPIx::Regexp::Structure::CharClass".'^'
and $method eq 'type'
and eval { $self->parent->child( 0 )->content eq ']' }
){
push @{$$ret{start}}, "ERROR warn TODO REPORT BUG A LONE ^ IN A CHARCLASS IS A IS LITERAL ^ NOT NEGATION";
}
} elsif( $self->ancestor_isa('PPIx::Regexp::Structure::RegexSet') and @desc = eval { $self->xplain_desc( 'regexset.'.$self->{content} ) } )
{
push @{$$ret{start}}, @desc ;
} elsif( @desc = eval { $self->xplain_desc( $self->{content} ) } )
{
push @{$$ret{start}}, @desc ;
}
return $ret;
}
 
sub PPIx::Regexp::Element::ancestor_isa {
my( $self, $type ) = @_;
my $root = $self;
while( my $newp = $root->_parent ){
return 1 if $type eq ref $newp;
$root = $newp ;
}
return 0;
}
 
 
sub PPIx::Regexp::Token::Backtrack::xplain {
my( $self ) = @_;
push @_, ( no_con_desc => 1 ); ## no doubling
my $ret = &PPIx::Regexp::Node::xplain;
my( $con ) = $ret->{start_con} =~ m{ \( ( [^\)]+ ) \) }x;
my( $verb, $arg ) = split /\:/, $con;
if( $verb and $arg ){
if( my @desc = $self->xplain_desc( "($verb:NAME)" ) ){
push @{$$ret{start}}, @desc;
} else {
push @{$$ret{start}}, $self->xplain_desc( "(*UNKNOWN:NAME)", $verb, $arg ) ;
}
}
if( $verb ){
if( my @desc = $self->xplain_desc( "($verb)" ) ){
push @{$$ret{start}}, @desc;
} else {
push @{$$ret{start}}, $self->xplain_desc( "(*UNKNOWN)", $verb ) ;
}
}
$ret;
}
 
 
sub PPIx::Regexp::Token::Unknown::xplain {
my( $self ) = @_;
my $ret = &PPIx::Regexp::Node::xplain;
push @{$$ret{start}}, $self->xplain_desc( ref($self).$self->{content} ); ## grr
if( '?' eq $self->{content} ){
push @{$$ret{start}}, $self->xplain_desc( 'quant_f_not' ); ## meh
}
return $ret;
}
 
sub PPIx::Regexp::Token::GroupType::Switch::xplain {
push @_, ( no_con_desc => 1 ); ## no doubling
goto &PPIx::Regexp::Node::xplain;
}
sub PPIx::Regexp::Token::Delimiter::xplain {
push @_, ( no_con_desc => 1 ); ## s\\\sex \ with parent modifiers becomes \s
goto &PPIx::Regexp::Node::xplain;
}
 
#~ $ perl -Mre=debug -wle " qr/[a-z]/i"
#~ $ perl lolabe.pl -t -ddr " qr/[a-z]/i"
#~ $ perl lolabe.pl -t " qr/[a-z]/i"
 
sub unicode10 {
my $it = Unicode::UCD::charinfo( $_[0] );
my $uc10 = $it->{unicode10} || $it->{name};
return $uc10;
}
 
sub PPIx::Regexp::Node::Range::xplain {
#~ my( $self ) = @_;
#~ my $ret = &PPIx::Regexp::Node::xplain;
my( $self , %args ) = @_;
$args{no_elements} = 1; ## doesn't interfere with wxPPI
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
my( $left, $right ) = map{ $self->schild($_)->ordinal } 0 , -1;
#~ push @{$$ret{start}}, sprintf("code points %d to %d", $left, $right );
push @{$$ret{start}}, sprintf("code points ord(chr( %d )) to ord(chr( %d ))", $left, $right );
#~ push @{$$ret{start}}, sprintf("code points \\%03o to \\%03o", $left, $right );
#~ push @{$$ret{start}}, sprintf("code points '\\N{U+%04.4X}' to '\\N{U+%04.4X}'", $left, $right );
push @{$$ret{start}}, sprintf('code points "\N{U+%04.4X}" to "\N{U+%04.4X}"', $left, $right );
$left = unicode10( $left );
$right = unicode10( $right );
if( $left and $right ){
push @{$$ret{start}}, sprintf('characters "%s" to "%s"', $left, $right );
}
$ret;
}
 
#~ sub PPIx::Regexp::Token::Comment::xplain { goto &PPIx::Regexp::Token::Whitespace::xplain }
sub PPIx::Regexp::Token::Comment::xplain {
my( $self, %args ) = @_;
$args{no_con_desc}=1;
my $ret = $self->PPIx::Regexp::Token::xplain( %args );
}
sub PPIx::Regexp::Token::Whitespace::xplain {
#~ return Data::Dump::pp( $_[0]->content ).','; ## content
return { start_con => Data::Dump::pp( $_[0]->content ).',' }; ## content
}
 
 
sub PPIx::Regexp::Element::xmods { goto &PPIx::Regexp::Node::xmods }
sub PPIx::Regexp::Node::xmods {
my $xmods = $_[0]->{xmods};
return $xmods
? wantarray
? %{ $xmods }
: $xmods
: ();
}
 
 
sub PPIx::Regexp::Element::xmods_susceptible { goto &PPIx::Regexp::Node::xmods_susceptible }
sub PPIx::Regexp::Node::xmods_susceptible {
use List::Util();
return List::Util::first {
$_[0]->isa($_)
} qw/ PPIx::Regexp::Token::Literal
PPIx::Regexp::Token::Reference
PPIx::Regexp::Token::CharClass
PPIx::Regexp::Token::Interpolation
PPIx::Regexp::Token::Assertion
/
;;;;
}
 
 
#~ 2013-07-26-03:43:03
#~ sub PPIx::Regexp::is_qr { goto &PPIx::Regexp::is_compile }
#~ sub PPIx::Regexp::is_compile { return !! $_[0]->root->child( 0 )->content eq 'qr' }
#~ sub PPIx::Regexp::is_substitute { return !! $_[0]->root->replacement }
#~ sub PPIx::Regexp::is_match { return !( $_[0]->is_substitute && $_[0]->is_compile ) }
#~ #~ is_compile { eval { $_[0]->root->source->isa('PPI::Token::QuoteLike::Regexp') } }
#~ #~ is_match { eval { $_[0]->root->source->isa('PPI::Token::Regexp::Match') } }
#~ #~ is_substitute { eval { $_[0]->root->source->isa('PPI::Token::Regexp::Substitute') } }
 
 
 
#~ sub PPIx::Regexp::is_compile { return !! $_[0]->child( 0 )->content eq 'qr' }
sub PPIx::Regexp::is_compile { return !! $_[0]->type->content eq 'qr' }
sub PPIx::Regexp::is_substitute { return !! $_[0]->replacement }
sub PPIx::Regexp::is_match { return !( $_[0]->is_substitute && $_[0]->is_compile ) }
#~ sub PPIx::Regexp::xtype {
#~ return $_[0]->is_substitute
#~ ? 's'
#~ : $_[0]->is_compile
#~ ? 'qr'
#~ : 'm'
#~ ;;;;;;;;
#~ }
sub PPIx::Regexp::xtype {
return
$_[0]->is_substitute ? 's'
: $_[0]->is_compile ? 'qr'
: 'm';
}
 
 
#~ 2013-07-28-16:31:40
#~ (?see-n) would return
#~ my( $modorder, $modcount ) = [ qw/ s e e n / ], { s => 1 , e => 2, n => 0 }
#~
#~ if( exists $modcount->{e} and my $count = $modcount->{e} ){
#~ ## we saw it, and it was on this many times
#~ }
#~
#~
sub PPIx::Regexp::Token::Modifier::xmods_explode {
my $notroot = int eval { $_[0] != $_[0]->root->modifier };
my $con = $_[0]->{content};
my @mods;
my %mods;
$con =~ s{
^
(?:
\Q(?\E
|
\Q?\E
)
|
(?:
\Q:\E
|\Q)\E
|\Q:)\E
) $
}{}gx;
my( $onners, $offers ) = ( split( '-', $con ), '','' );
my @onners = $onners =~ m/(.)/g;
my @offers = $offers =~ m/(.)/g;
$mods{$_}++ for @onners;
#~ delete @mods{@offers} ; ## OFFERS TRUMP ONNERS
$mods{$_}=0 for @offers ; ## OFFERS TRUMP ONNERS
@mods = ( @onners, @offers );
if( $notroot ){
if( my @goners = grep { exists $mods{ $_ } } '?', ':', '(', ')', ){ ## JIC
delete @mods{ @goners } ;
$_[0]->root->{xfailures}+=int@goners ; ## GRR
}
}
if( ( my @two = grep { $mods{$_} } qw/ a d l u / ) > 1 ){
$_[0]->root->{xfailures}+=int@two; ## GRR
}
return \@mods, \%mods;
}
 
### walks tree, can->modifiers
sub PPIx::Regexp::xmods_propagate {
my( $node , $depth, $xmods ) = @_;
$depth ||= 0;
$xmods ||= {};
if( $node->isa('PPIx::Regexp') ){
my($arraymods, $hashmods ) = $node->modifier->xmods_explode;
%{$xmods} = %{$hashmods};
$node->root->{xmods} = {%{$xmods}}; ## markit
delete $xmods->{o}; ## do not propagate
PPIx::Regexp::xmods_propagate( $node->regular_expression, 0, $xmods );
} else {
my @kids = eval { $node->elements };
if( not @kids ){
@kids = map { eval { $node->$_ } } qw{ start type children finish };
}
for my $kid ( @kids ){
if( $kid->can('modifiers') ){
my( $arraymods, $hashmods ) = $kid->xmods_explode;
my %newmods = $kid->xmerge_mods( $xmods, $hashmods );
if( $kid->isa( 'PPIx::Regexp::Token::GroupType::Modifier' ) ){
$xmods = \%newmods; ### propagate to children (?i:)
} else { ## propagate to siblings (?i)
%{$xmods} = $kid->xmerge_mods( $xmods, $hashmods );
}
#~ if( my $p = delete $xmods->{p} ){ ## TODO NOT WORKING
#~ $node->root->{xmods}{p} = $p; ## cause (?p)/p is global, propagates UP
#~ }
}
if( $kid->xmods_susceptible ){
delete $xmods->{o}; ## do not propagate, mods.o, error but not UNKNOWN , grrr
$kid->{xmods}={%{$xmods}};
}
PPIx::Regexp::xmods_propagate( $kid , $depth + 1, $xmods );
}
}
return;
}
 
###
sub PPIx::Regexp::Element::xmerge_mods {
my( $self , $old, $new ) = @_;
my %mods = %$old;
while( my( $mod, $on ) = each %$new ){
if( $on ){
$mods{$mod} = $on;
} else {
#~ $mods{$mod} = !!0; #off
$mods{$mod} = 0; #off
}
}
if( my $ms = $mods{match_semantics} ){
$mods{ $ms } ||= 1;
}
### ^ a d l u p match_semantics proper merging propagation
### if new ones, nuke existing ones
if( my @mss = grep { $new->{ $_ } } qw/ ^ a d l u / ){
delete @mods{ qw/ ^ a d l u / };
for my $ms ( @mss ){
if( my $msv = $new->{ $ms } ){ ## and propagate new one (cause ONE)
$mods{ $ms } = $msv;
#~ $mods{ match_semantics } = $ms;
$mods{ match_semantics } = $ms eq '^' ? 'd' : $ms;
last;
}
}
}
return wantarray ? %mods : \%mods;
}
 
## for ->can('modifiers') explodes {content} and explains all options, even the mistakes
## for non-can-modifiers explains xmods, but not UNKNOWNS (mistakes without descriptions)
## increments xfailures (grr)
sub xplain_modifiers {
my( $self , $OPTS ) = @_;
undef $OPTS if not ref $OPTS;
my @ret;
my $can_modifiers = $self->can('modifiers');
my $specialEEE = 0;
my %seen;
my %mods = eval { $self->xmods };
my @mods = keys %mods;
if( !%mods and $can_modifiers ){
my( $arraymods, $hashmods ) = $self->xmods_explode;
@mods = @$arraymods;
%mods = %$hashmods;
}
#~ (?adlupimsx-imsx) <<< perlre
#~ (?^alupimsx) <<< perlre
#~ (?^msixp..ual) <<< perlre
#~ (?msixp.dual-imsx) <<< perlre
#~ qr/STRING/msixpodual <<< perlop
#~ m/PATTERN/msixpodualgc <<< perlop
#~ m?PATTERN?msixpodualgc <<< perlop
#~ s/PATTERN/REPLACEMENT/msixpodualgcer <<< perlop
my @prefix = ( 'mods.', 'match_semantics.' ); ## for inline, for (?adlupimsx-imsx)
if( $self == eval { $self->root->modifier } ){
if( $self->root->is_substitute ){
@prefix = ( 'mods/s/', 'mods/', 'match_semantics.' , ); # s///eee
$specialEEE ++;
} else {
@prefix = ( 'mods/', 'match_semantics.', ); ## m//x qr//x
}
} else {
#~ delete @mods{qw/ p /}; ## (?p) is global like /p is global in 5.16, so JIC don't propagate this past root
#~ 2013-07-30-03:13:33 mistake is mistake, propagate it, don't explain it except in PPIx::Regexp::Token::Modifier
}
delete $mods{match_semantics}; ## not used by xplain_modifiers
 
if( grep { $self->isa($_) } qw/ PPIx::Regexp::Token::Literal PPIx::Regexp::Token::CharClass / ){
delete @mods{qw/ p m s x /};
## LITERALS are susceptible to ^ a d l u for case-insensitivity BUT NOT p m s x
}
 
#~ if( grep { $self->isa($_) } qw/ PPIx::Regexp::Token::CharClass::Simple / ){
#~ delete @mods{qw/ i /}; ## \d \w aren't susceptible to case , never case sensitive
#~ }
#~ \p{Latin} is susceptible, grrr
#~ \p{Latin} is perl_version_introduced=5.006001
#~ BUT \p{Latin} currently doesn't get called modifiers
 
#~ 2013-07-22-03:29:41 GUESSING!!!!
if( grep { $self->isa($_) } qw/ PPIx::Regexp::Token::Recursion / ){
#~ delete @mods{qw/ p match_semantics /};
#~ 2013-08-03-16:40:48
#~ $ perl -e " print int m{(lc(?i:(?1)|end)+)}smx for qw/ lcend lclcend lclcEND lcLCend /"
#~ 1110
undef %mods;
}
#~ 2013-08-03-16:43:40
## susceptible to /i , not /x, cause its a string-literal not a pattern
#~ $ perl -e " print int m{(lc)(?i:\1)} for qw/ lclc lcLC / "
#~ 11
### might interact somehow with a/aa that needs explaining
if( grep { $self->isa( $_ ) } qw/ PPIx::Regexp::Token::Backreference / ){
delete @mods{qw[ x m s p a d l u ]};
}
delete @mods{ eval{ @{ $OPTS->{GONERS} } } }; ###### m/\w/i
@mods = grep { exists $mods{$_ } } @mods;
### THE LEGITIMATE DOUBLES (/aa, /ee)
if( my $count = $mods{a} ){
if( $count > 1 ){
push @ret, $self->xplain_desc("match_semantics.aa" ); ## TODO more
}
}
if( $specialEEE and $can_modifiers and $mods{e} and 1 != ( my $count = $mods{e} ) ){
push @ret, $self->xplain_desc( 'mods/s/ee', $count-1 );
}
MODSLOOP:
for my $mod ( @mods ){
my $count = $mods{ $mod } ;
my $sufix = $count ? $mod : '-'.$mod;
next if not defined $count; ## exists $mods{ $mod } ## cause /(?-x:i)/x
for my $prefix( @prefix ){
next MODSLOOP if $seen{$mod};
if( my @desc = $self->xplain_desc( "$prefix$sufix" ) ){
push @ret, @desc;
$seen{$mod}++;
next MODSLOOP;
}
}
if( $can_modifiers and !exists$seen{$mod} ){
$self->root->{xfailures} ++;
push @ret, $self->xplain_desc( "$prefix[-2]unknown", $sufix, $sufix ); ## ICK!!!
}
} ## end MODSLOOP
if( $can_modifiers ){
for my $twice ( qw/ d l u / ){
if( exists $mods{$twice} and $mods{$twice} > 1 ){
push @ret, $self->xplain_desc("mods.nottwice", $twice ); ## perlbug ## $ perl -e " m/(?ad)/ "
}
}
if( $mods{a} ){
if( $mods{a} > 2 or int( grep { exists $mods{$_} } qw/ d l u / ) ){
push @ret, $self->xplain_desc("mods.twicemax", "a" ); ## $ perl -e " m/(?aaa)/ "
}
}
if( ( my @two = grep { $mods{$_} } qw/ a d l u / ) > 1 ){
$self->root->{xfailures}++; ## YUCK
while( @two > 1 ){
push @ret, $self->xplain_desc("mods.exclusive", @two[0,1] );
splice @two, 1,1;
}
}
}
return @ret;
} ## end of sub xplain_modifiers
 
 
#~ 2013-08-03-16:59:33
#~ todo sub ... xis_variable determines if fixed width pattern or variable
#~ todo sub ... xlength ditches structure/modifiers to return length of literals and charclasses
#~ todo sub ... xsets nodes/groups literals seperated by operators
#~ todo sub ... xis_fixed_width determines if fixed width
#~ todo sub ... xis_variable_width
#~ todo sub ... quantized
#~ todo sub ... quantified group quantified nodes, establish children/finish quantifier relationship,
#~ no extra indentation levels, no address changes???
#~ todo PPIx::Regexp::Structure::Quantized
#~ todo PPIx::Regexp::Structure::String (2+ literal tokens)
#~ todo PPIx::Regexp::Structure::Literals (2+ literal tokens)
 
#~ 2013-08-05-01:31:56 false positive on /(?<!a|(i:a))/
#~ 2013-08-11-03:08:48 false positive no more
sub PPIx::Regexp::Node::xis_fixed_width {
my( $self ) = @_;
my $is_variable = 0;
#~ $is_variable++ if $self->find_first( sub { return 1 if $_[1]->isa('PPIx::Regexp::Token::Quantifier') ; }, );
my $problems = $self->find( sub {
return 1 if grep { $_[1]->isa( $_ ) } qw/
PPIx::Regexp::Token::Quantifier
PPIx::Regexp::Structure::Quantifier
PPIx::Regexp::Token::Reference
/;
return 0;
},
);
$problems ||= [];
for my $prob ( @$problems ){
if( $prob->can('xn_comma_m') ){
my( $n, $comma, $m , @ncm ) = $prob->xn_comma_m;
if( $n != $m ){
$is_variable++;
}
} else {
$is_variable++;
}
}
$is_variable and return ! $is_variable; ## SAVE SOME WORK
 
#~ perl lolabe.pl -t -ddr " m/(?<!a|aa)E/" >2
#~ perl lolabe.pl -t -ddr " m/(?<!a?)E/" >2
#~ perl lolabe.pl -t -ddr " m/(?<!a{2,3})E/" >2
#~ perl lolabe.pl -t -ddr " m/(?<!a|aa)E/; m/(?<!a?)E/; m/(?<!a{2,3})E/; m/(?<=a|(?i:a)|a)/" >2
#~ perl lolabe.pl -t -ddr " m/(?<=a|(?i:a)|a)/" >2
#~ perl lolabe.pl -t -ddr " m/\d++\d{1,2}(?<=a|(?i:a)|a)/" >2
#~ perl lolabe.pl -t -ddr " m/(?<!a|(?-i:(?i:a)))(?<=a|(?i:a))/" >2
#~ 2013-08-11-02:29:24
#~ variable PPIx::Regexp::Token::Reference #~ $ perl -Mre=debug -wle "m/(.)(?<=a|\1)/"
#~ 2013-08-11-02:44:30 2013-08-11-03:04:35 gah incremental
#~ perl lolabe.pl -t -ddr " m/(?<!a|(?-i:(?i:a)))(?<=a|(?i:a))/" >2
#~ 2013-08-11-03:19:56
#~ typo #~ perl lolabe.pl -t -ddr " m/(?<!a|(?-i:(?i:(?[a]))))(?<=a|(?i:a))/" >2
#~ #~ perl lolabe.pl -t -ddr " m/(?<!a|(?-i:(?i:(?[[a]]))))(?<=a|(?i:a))/" >2
#~ #~ perl lolabe.pl -t -ddr " m/(?<!a|(?-i:(?i:(?[\w]))))(?<=a|(?i:a))/" >2
#~ #~ perl lolabe.pl -t -ddr " m/(?<!aa|(?[[a]])(?-i:(?i:(?[\w]))))(?<=a|(?i:a))/" >2
#~ 2013-08-11-03:40:23
#~ #~ perl lolabe.pl -t -ddr " m/(?<!aaa|\w(?[[a]])(?-i:(?i:(?[\w]))))(?<=a|(?i:a))/" >2
my @lengths = ( 0 );
#~ for my $kid ( $self->elements ){
for my $kid ( $self->children ){
if( $kid->isa('PPIx::Regexp::Token::Operator') ){ ## assume alteration
push @lengths, 0;
} else {
#~ $lengths[-1] += length( $kid->content );
$lengths[-1] += length( $kid->content ) - xstf_length( $kid );
#~ my $lc = length( $kid->content ) ;
#~ my $xc = xstf_length( $kid );
#~ $kid->{xlc}=$lc;
#~ $kid->{xxc}=$xc;
#~ $lengths[-1] += $lc - $xc;
#~ warn pp { damn =>scalar( $kid->content), lc => $lc , xc => $xc } ;
#~ warn pp( my $c = $kid->content );
#~ $lengths[-1] += length( $c );
}
}
$self->{xxls}=[@lengths];
#~ warn pp\@lengths;
while( @lengths > 1 ){
if( $lengths[-1] != $lengths[-2] ){
$is_variable++;
}
pop @lengths;
}
## grr, too many tokens can_be_quantified
#~ PPIx::Regexp::Token::Structure can_be_quantified even though its structural
#~ PPIx::Regexp::Token::Operator can_be_quantified even though its an OPERATOR
return ! $is_variable;
}
 
sub PPIx::Regexp::Structure::Assertion::xplain {
my( $self, %args ) = @_;
delete $args{no_elements};
my $ret = $self->PPIx::Regexp::Structure::xplain( %args );
if( not $self->xis_fixed_width ){
push @{$$ret{start}}, $self->xplain_desc('errn'.$self->type->content);
}
return $ret;
}
 
#~ dodgy , force unicode if: 1) string is utf8 2) pattern is utf8 3) pattern mentions codepoint above 255 4)pat uses unicode name \N{} 5)pat uses unicode property \p{} 6) pat uses RegexSet (?[ ])
#~ force unicode semantics if
#~ #~ 1 the target string is encoded in UTF-8; or
#~ #~ 2 the pattern is encoded in UTF-8; or
#~ #~ 3 the pattern explicitly mentions a code point that is above 255 (say by \x{100} ); or
#~ #~ 4 the pattern uses a Unicode name (\N{...} ); or
#~ #~ 5 the pattern uses a Unicode property (\p{...} ); or
#~ #~ 6 the pattern uses (?[ ])
sub xdodgy_unicode_override {
my( $self ) = @_;
#~ perl lolabe.pl -t -ddr "m/\pN\p{N}\x{100}\N{Kelvin}/d" >2
my $unicode = 0;
my %why;
$self->find( sub {
#~ $unicode += grep { $_[1]->isa( $_ ) } qw/ PPIx::Regexp::Structure::RegexSet / ;
if( $_[1]->isa('PPIx::Regexp::Structure::RegexSet') ){
$unicode++;
$why{'dodgy-u-rset'}++;
}elsif( $_[1]->isa('PPIx::Regexp::Token::Literal') ){
my $ord = $_[1]->ordinal;
#~ if( not defined $ord or $ord > 255 or ( $_[1]->{content} =~ m{^\\[pPN]} ) ){
if( defined $ord and $ord > 255 ){
$unicode++;
#~ push @why, 'dodgy-u-255';
$why{'dodgy-u-255'}++;
}elsif( $_[1]->{content} =~ m{^\\[pP]} ){
$unicode++;
$why{'dodgy-u-prop'}++;
}elsif( $_[1]->{content} =~ m{^\\[N]} ){
$why{'dodgy-u-name'}++;
$unicode++;
}
}elsif( $_[1]->isa('PPIx::Regexp::Token::CharClass::Simple') ){
if( my $con = eval{$self->{content}} ){
if( $con =~ /^\\[pP]\{/ ){
$why{'dodgy-u-prop'}++;
$unicode++;
}
}
}
return 0;
} );
## pattern encoded in UTF-8 check
#~ return $unicode ;
return keys %why;
}
1;
 
sub PPIx::Regexp::Token::GroupType::NamedCapture::xplain {
my( $self, %args ) = @_;
return {
depth => $args{depth},
start => [
'address='. $self->address,
$self->xplain_desc( ref $self),
],
start_con => Data::Dump::pp( $self->content ).',',
start_hr => join '', "# ", '-' x ( $args{hr_length} || 66 ),
};
}
 
 
 
#~ 2013-08-11-01:50:27
#~ PPIx::Regexp::Token::Structure
#~ PPIx::Regexp::Token::GroupType
#~ sub is_start { $_[0]->address =~ m{/[S]\d+$}i }
#~ sub is_type { $_[0]->address =~ m{/[T]\d+$}i }
#~ sub is_finish { $_[0]->address =~ m{/[F]\d+$}i }
#~ 2013-08-11-01:57:59
## cant use Find because of PPIx::Regexp::Structure::Quantifier \d{2,3} 2,3 are contents
## and don't want to think about that
#~ 2013-08-11-02:11:54
#~ stf PPIx::Regexp::Structure::Assertion
#~ stf PPIx::Regexp::Structure::BranchReset
#~ stf PPIx::Regexp::Structure::Capture
#~ C #~ stf+operators PPIx::Regexp::Structure::CharClass
#~ c PPIx::Regexp::Structure::Code
#~ 0E PPIx::Regexp::Structure::Main
#~ stf PPIx::Regexp::Structure::Modifier
#~ c #~ stf PPIx::Regexp::Structure::Quantifier
#~ stf PPIx::Regexp::Structure::Subexpression
#~ stf PPIx::Regexp::Structure::Switch
#~ stf PPIx::Regexp::Structure::Unknown
#~ 2013-08-11-03:18:57
#~ c0 PPIx::Regexp::Structure::RegexSet
##
#~ sspm PPIx::Regexp::Structure::Assertion PPIx::Regexp::Structure::BranchReset PPIx::Regexp::Structure::Capture PPIx::Regexp::Structure::CharClass PPIx::Regexp::Structure::Code PPIx::Regexp::Structure::Main PPIx::Regexp::Structure::Modifier PPIx::Regexp::Structure::Quantifier PPIx::Regexp::Structure::Subexpression PPIx::Regexp::Structure::Switch PPIx::Regexp::Structure::Unknown
##
#~ 2013-08-11-02:32:41
##
#~ s length of start
#~ t length of type
#~ f length of finish
#~ c length of content
#~ 0 length 0
#~ Q length is VARIABLE (ITS A QUANTIFIER)
#~ E error (shouldn't encounter this)
##
#~ first letter is length to be removed to be subtracted to obtain real-literal-fixed-width-length
#~ second letter is length to be added to obtain real-literal-fixed-width-length
#~ E is an error means its not fixed-width or its impossible situation so forget it
##
#~ c0 PPIx::Regexp::Token::Assertion
#~ c0 PPIx::Regexp::Token::Backtrack
#~ cQ PPIx::Regexp::Token::CharClass
#~ cE PPIx::Regexp::Token::Code
#~ c0 PPIx::Regexp::Token::Comment
#~ c0 PPIx::Regexp::Token::Control
#~ cEQ PPIx::Regexp::Token::Greediness
#~ c0 PPIx::Regexp::Token::GroupType
#~ c1 PPIx::Regexp::Token::Literal
#~ cE PPIx::Regexp::Token::Modifier
#~ c0 PPIx::Regexp::Token::Operator
#~ cE PPIx::Regexp::Token::Quantifier
#~ cE PPIx::Regexp::Token::Reference
#~ c0 PPIx::Regexp::Token::Structure
#~ c0 PPIx::Regexp::Token::Unknown
#~ cE PPIx::Regexp::Token::Unmatched
#~ c0 PPIx::Regexp::Token::Whitespace
#~ 2013-08-11-03:42:02
#~ c1 PPIx::Regexp::Token::CharClass::Simple
##
#~ sspm PPIx::Regexp::Token::Assertion PPIx::Regexp::Token::Backtrack PPIx::Regexp::Token::CharClass PPIx::Regexp::Token::Code PPIx::Regexp::Token::Comment PPIx::Regexp::Token::Control PPIx::Regexp::Token::Greediness PPIx::Regexp::Token::GroupType PPIx::Regexp::Token::Literal PPIx::Regexp::Token::Modifier PPIx::Regexp::Token::Operator PPIx::Regexp::Token::Quantifier PPIx::Regexp::Token::Reference PPIx::Regexp::Token::Structure PPIx::Regexp::Token::Unknown PPIx::Regexp::Token::Unmatched PPIx::Regexp::Token::Whitespace
#~ 2013-08-11-02:39:11
#~ length of structural elements to be removed
#~ to be subtracted from length of content
#~ to obtain real-literal-fixed-width-length
sub xstf_length {
my( $node , $depth ) = @_;
$depth ||= 0;
my $length = 0;
my @kids = eval { $node->elements };
if( not @kids ){
@kids = map { eval { $node->$_ } } qw{ start type children finish };
}
my $count_content = grep {
!! $node->isa($_)
} qw'
PPIx::Regexp::Structure::CharClass
PPIx::Regexp::Structure::Code
PPIx::Regexp::Structure::Quantifier
PPIx::Regexp::Structure::RegexSet
';;;;
 
my $count_once = grep {
!! $node->isa($_)
} qw'
PPIx::Regexp::Structure::CharClass
PPIx::Regexp::Structure::RegexSet
PPIx::Regexp::Token::CharClass
PPIx::Regexp::Token::Literal
';;;;
if( $count_once and not @kids and not $count_content ){ ## ICK! \w has length 2 but quantifies as 1
return length( $node->content )-1; ## \x{} has length > 2 but quantifies as 1
}
if( $count_content ){
return $length + length( $node->content ) - $count_once; ## cause charclasses/regexsets quantify as 1
}
for my $kid ( @kids ){
if( $kid->address =~ m{/[STF]\d+\s$}i ){
$length += length $kid->content;
#~ } elsif( $kid->children ) { ## inadequate
} elsif( $kid->isa('PPIx::Regexp::Structure') ) { ## should be stf type
$length += xstf_length( $kid, $depth+1 );
}
}
return $length;
} ## end of sub xstf_length
#~ self-fulfilling enlightenment or gratifying ignorance.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.