Skip to content

Instantly share code, notes, and snippets.

Created November 5, 2013 19:37
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save anonymous/7324838 to your computer and use it in GitHub Desktop.
Save anonymous/7324838 to your computer and use it in GitHub Desktop.
PPIx::Regexp::xplain because YAPE::Regex::Explain is dead; file ppixregexplain.pl needs _desc.pl
our %desc = (
"PPIx::Regexp::Node" =>
[
"xRe::Node",
"a container",
#~ "PPIx::Regexp::Dumper->new( 'qr{(foo)}' )->print",
],
"PPIx::Regexp::Node::Range" =>
[
"xRe::Node::Range",
"a character range in a character class",
],
"PPIx::Regexp::Structure" =>
[
"xRe::Structure",
"a structure.",
],
"PPIx::Regexp::Structure::Assertion" =>
[
"xRe::Structure::Assertion",
"a parenthesized assertion ",
"(the grouptype below explains which one)", ##nah## TODO?!?! explain it here
],
"PPIx::Regexp::Structure::BranchReset" =>
[
"xRe::Structure::BranchReset",
"a branch reset group",
],
"PPIx::Regexp::Structure::Capture" =>
[
"xRe::Structure::Capture",
"Represent capture parentheses.",
],
"PPIx::Regexp::Structure::CharClass" =>
[
"xRe::Structure::CharClass",
"a character class",
],
"PPIx::Regexp::Structure::Code" =>
[
"xRe::Structure::Code",
"Represent one of the code structures.",
'WARNING: This extended regular expression feature is considered experimental, and may be changed without notice',
],
'(?p{ code })' => [
'',
#~ http://perl5.git.perl.org/perl.git/commit/14455d6cc193f1e4316f87b9dbe258db24ceb714
#~ /(?p{})/ changed to /(??{})/, per Larry's suggestion
#~ (?p{}) has been deprecated for a long time.
'warning (?p{}) has been removed Use (??{}) instead. L<http://search.cpan.org/dist/perl/pod/perl5100delta.pod#%28?p{}%29_has_been_removed>',
q{L<http://search.cpan.org/dist/perl/pod/perlre.pod#%28??{_code_}%29|perlre/(??{ code })>},
q{This is a "postponed" regular subexpression. This zero-width assertion executes any embedded Perl code. It always succeeds, and that its return value, rather than being assigned to $^R, is treated as a pattern, compiled if it's a string (or used as-is if its a qr// object), then matched as if it were inserted instead of this construct.},
],
'(??{ code })' => [
q{This is a "postponed" regular subexpression. This zero-width assertion executes any embedded Perl code. It always succeeds, and that its return value, rather than being assigned to $^R, is treated as a pattern, compiled if it's a string (or used as-is if its a qr// object), then matched as if it were inserted instead of this construct.},
q{L<http://search.cpan.org/dist/perl/pod/perlre.pod#%28??{_code_}%29>},
q{L<perlre/(??{ code })>},
],
'(?{ code })' => [
'This zero-width assertion executes any embedded Perl code. It always succeeds, and its return value is set as $^R',
'L<http://search.cpan.org/dist/perl/pod/perlre.pod#(?{_code_})>',
'L<perlre/(?{ code })>',
'L<perlvar/$^R>',
],
"PPIx::Regexp::Structure::Main" =>
[
"xRe::Structure::Main",
"a regular expression proper, or a substitution",
],
"PPIx::Regexp::Structure::Modifier" =>
[
"xRe::Structure::Modifier",
"Represent modifying parentheses",
"group but do not capture; Basic clustering",
],
"PPIx::Regexp::Structure::NamedCapture" =>
[
"xRe::Structure::NamedCapture",
"a named capture",
'L<<< perlre/(?<NAME>pattern) >>>',
'L<perlvar/%+>',
],
"PPIx::Regexp::Structure::Quantifier" =>
[
"xRe::Structure::Quantifier",
"Represent curly bracket quantifiers",
],
"PPIx::Regexp::Structure::RegexSet" =>
[
"xRe::Structure::RegexSet",
"a regexp character set",
'L<perlre/(?[ ])>',
'L<perlrecharclass/Extended Bracketed Character Classes>',
'L<http://search.cpan.org/dist/perl-5.18.0/pod/perlrecharclass.pod#Extended_Bracketed_Character_Classes>',
'no warnings "experimental::regex_sets";',
],
"PPIx::Regexp::Structure::Regexp" =>
[
"xRe::Structure::Regexp",
"Represent the top-level regular expression",
],
"PPIx::Regexp::Structure::Replacement" =>
[
"xRe::Structure::Replacement",
"Represent the replacement in s///",
],
"PPIx::Regexp::Structure::Subexpression" =>
[
"xRe::Structure::Subexpression",
"Represent an independent subexpression",
## redundant
'(?>pattern)',
'L<perlre/(?>pattern)>',
'It may also be useful in places where the "ratchet" or',
'"grab all you can, and do not give anything back" semantic is desirable.',
],
"PPIx::Regexp::Structure::Switch" =>
[
"xRe::Structure::Switch",
"a switch",
#~ 'L<perlre/(?(condition)yes-pattern|no-pattern)>',
#~ 'L<http://perldoc.perl.org/perlre.html#%28?%28condition%29yes-pattern%7Cno-pattern%29|perlre/(?(condition)yes-pattern|no-pattern)>',
'L<perlre/(?(condition)yes-pattern)>',
'L<http://p3rl.org/perlre#(?(condition)yes-pattern|no-pattern)>',
],
"PPIx::Regexp::Structure::Unknown" =>
[
"xRe::Structure::Unknown",
"Represent an unknown structure. (ERROR!TYPO!NONSENSE!)",
#~ "the following tokens aren't what you wanted",
## TODO be clever like perl?!? TODO ASK or TEST if I can assume this?
#~ $ perl -we " qr{(?(foo)bar|baz|burfle)}smx "
#~ Unknown switch condition (?(fo in regex; marked by <-- HERE in m/(?( <-- HERE foo)bar|baz|burfle)/ at -e line 1.
"ERROR die Unknown switch condition in regex;",
"for valid conditions see L<perlre/(?(condition)yes-pattern)>",
#~ THIS IS A LIE :) TODO REPORT BUG C<PPIx::Regexp::Structure::Unknown> has no descendants.
],
"PPIx::Regexp::Token::Assertion" =>
[
"xRe::Token::Assertion",
#~ "a simple assertion.",
#~ "a simple assertion (ex: \\A \\Z \\G ...).",
#~ "simple zero-width assertion (ex: \\A \\Z \\G ...).",
"simple zero-width assertion (zero-length, between pos()itions)",
],
"PPIx::Regexp::Token::Backreference" =>
[
"xRe::Token::Backreference",
"a back reference",
'L<perlglossary/backreference>',
#~ TODO REPORT BUG \g10 UNRECOGNIZED AS A REFERENCE misparsed as PPIx::Regexp::Token::Literal
#~ TODO BACKREFERENCE
#~ \k'NAME'
#~ /(.)(.)(.)(.)(.)(.)(.)(.)(.)\g10/ # \g10 is a backreference
#~ /(.)(.)(.)(.)(.)(.)(.)(.)(.)\10/ # \10 is octal
#~ /((.)(.)(.)(.)(.)(.)(.)(.)(.))\10/ # \10 is a backreference
#~ /((.)(.)(.)(.)(.)(.)(.)(.)(.))\010/ # \010 is octal
],
"PPIx::Regexp::Token::Backtrack" =>
[
"xRe::Token::Backtrack",
"Represent backtrack control.",
'L<perlre/Special Backtracking Control Verbs>',
'WARNING: These patterns are experimental and subject to change or removal in a future version of Perl.',
'Their usage in production code should be noted to avoid problems during upgrades.',
],
"PPIx::Regexp::Token::CharClass" =>
[
"xRe::Token::CharClass",
"a character class",
],
"PPIx::Regexp::Token::CharClass::POSIX" =>
[
"xRe::Token::CharClass::POSIX",
"a POSIX character class",
],
"PPIx::Regexp::Token::CharClass::POSIX::Unknown" =>
[
"xRe::Token::CharClass::POSIX::Unknown",
"Represent an unknown or unsupported POSIX character class",
],
"PPIx::Regexp::Token::CharClass::Simple" =>
[
"xRe::Token::CharClass::Simple",
"This class represents a simple character class", ## TODO IMPROVE? REMOVE?
],
"PPIx::Regexp::Token::Code" =>
[
"xRe::Token::Code",
"a chunk of Perl embedded in a regular expression.",
],
"PPIx::Regexp::Token::Comment" =>
[
"xRe::Token::Comment",
#~ "a comment.",
],
"PPIx::Regexp::Token::Condition" =>
[
"xRe::Token::Condition",
"Represent the condition of a switch",
'Checks if a specific capture group (or pattern) has matched something.',
],
"PPIx::Regexp::Token::Control" =>
[
"xRe::Token::Control",
"Case and quote control.",
'L<perlre/\F\l\u\L\U\Q\E>',,
#~ "PPIx::Regexp::Dumper->new( 'qr{\\Ufoo\\E}smx' )->print",
],
"PPIx::Regexp::Token::Delimiter" =>
[
"xRe::Token::Delimiter",
"Represent the delimiters of the regular expression",
],
"PPIx::Regexp::Token::Greediness" =>
[
"xRe::Token::Greediness",
"a greediness qualifier.",
#~ "PPIx::Regexp::Dumper->new( 'qr{foo*+}smx' )->print",
],
"PPIx::Regexp::Token::GroupType" =>
[
"xRe::Token::GroupType",
"a grouping parenthesis type.",
#~ "PPIx::Regexp::Dumper->new( 'qr{(?i:foo)}smx' )->print",
],
"PPIx::Regexp::Token::GroupType::Assertion" =>
[
"xRe::Token::GroupType::Assertion",
"a look ahead or look behind assertion",
#~ "PPIx::Regexp::Dumper->new( 'qr{foo(?=bar)}smx' )->print",
],
"PPIx::Regexp::Token::GroupType::BranchReset" =>
[
"xRe::Token::GroupType::BranchReset",
"a branch reset specifier",
"L<perlre/(?|pattern)>",
"L<perlre/(?E<verbar>pattern)>",
"capture groups are numbered from the same starting point in each alternation branch",
#~ "PPIx::Regexp::Dumper->new( 'qr{(?|(foo)|(bar))}smx' )->print",
],
"PPIx::Regexp::Token::GroupType::Code" =>
[
"xRe::Token::GroupType::Code",
"Represent one of the embedded code indicators",
],
"PPIx::Regexp::Token::GroupType::Modifier" =>
[
"xRe::Token::GroupType::Modifier",
"Represent the modifiers in a modifier group.",
#~ "PPIx::Regexp::Dumper->new( 'qr{(?i:foo)}smx' )->print",
],
"PPIx::Regexp::Token::GroupType::NamedCapture" =>
[
"xRe::Token::GroupType::NamedCapture",
#~ "a named capture",
#~ 'L<perlre/(?<NAME>pattern)>',
#~ 'L<<< perlre/(?<NAME>pattern) >>>',
##'L<perlre/(?&NAME)>', ## not this
#~ 'L<perlvar/%+>', ## redundant
#~ "PPIx::Regexp::Dumper->new( 'qr{(?<baz>foo)}smx' )->print",
"",
],
"PPIx::Regexp::Token::GroupType::Subexpression" =>
[
"xRe::Token::GroupType::Subexpression",
"Represent an independent subexpression marker",
## redundant
#~ '(?>pattern)',
#~ 'It may also be useful in places where the "ratchet" or',
#~ '"grab all you can, and do not give anything back" semantic is desirable.',
#~ "PPIx::Regexp::Dumper->new( 'qr{foo(?>bar)}smx' )->print",
],
"PPIx::Regexp::Token::GroupType::Switch" =>
[
"xRe::Token::GroupType::Switch",
"Represent the introducing characters for a switch",
#~ "PPIx::Regexp::Dumper->new( 'qr{(?(1)foo|bar)}smx' )->print",
## TODO be clever like perl?!? TODO ASK or TEST if I can assume this?
#~ $ perl -we " qr{(?(foo)bar|baz|burfle)}smx "
#~ Unknown switch condition (?(fo in regex; marked by <-- HERE in m/(?( <-- HERE foo)bar|baz|burfle)/ at -e line 1.
#~ "die Unknown switch condition in regex;",
"for valid conditions see L<perlre/(?(condition)yes-pattern)>",
],
"PPIx::Regexp::Token::Interpolation" =>
[
"xRe::Token::Interpolation",
"Represent an interpolation in the PPIx::Regexp package.",
#~ 'It is a variable! whose contents are used as a pattern, subject to \F\l\u\L\U\Q\E',
'It is a variable! subject to \F\l\u\L\U\Q\E',
#~ and L<perlre/Modifiers>',
'L<perlre/\F\l\u\L\U\Q\E>',
#~ 'L<perlre/\Q\U\E\F\u\l\L>',
#~ 'L<perlre/\Q\U\E\L\F\u\l>', #<<<
#~ 'subject to L<perlre/Modifiers>',
#~ "PPIx::Regexp::Dumper->new('qr{\$foo}smx')->print",
],
#~ 2013-08-05-04:22:50
"PPIx::Regexp::Token::Interpolation-Regexp" => ['It is a variable! whose contents are used as a pattern ','subject to L<perlre/Modifiers>'],
"PPIx::Regexp::Token::Interpolation-Substitution" => ['It is a variable! whose contents are used as a REPLACEMENT string'],
"PPIx::Regexp::Token::Literal" =>
[
"xRe::Token::Literal",
"a literal character",
#~ "PPIx::Regexp::Dumper->new( 'qr{foo}smx' )->print",
],
#~ $ perl -Mre=debug -wle " $_ = q{aBaBaBaB}; m{((?i)ab)ab}"
"token_modifier_propagates_right" => "These modifiers PROPAGATE to the right for the remainder of the pattern, or the remainder of the enclosing pattern group (if any). Ex: /((?i)CASeINSeNSITIVeHeRe)CASESENSITIVE/",
"PPIx::Regexp::Token::Modifier" =>
[
"xRe::Token::Modifier",
#~ "Represent (trailing) modifiers.",
#~ "Modifier for one of these operators: match(m//) or substitution (s///) or regexp-constructor (qr//)",
#~ "Represent (trailing) modifiers for match, substitution, regexp constructor",
"Represent 1)embedded pattern-match modifiers or 2)(trailing) modifiers for operators match, substitution, regexp constructor ",
#~ "Represent (trailing) modifiers for m// s/// qr///", ## 2013-06-20-08:02:53 dontwanna
### TODO qr// needs better name than regex compile
#~ 2013-06-12-03:55:26 Regexp constructor sounds good
#~ "PPIx::Regexp::Dumper->new( 'qr{foo}smx' )->print",
],
"PPIx::Regexp::Token::Operator" =>
[
"xRe::Token::Operator",
"Represent an operator.",
#~ "PPIx::Regexp::Dumper->new( 'qr{foo|bar}smx' )->print",
],
"PPIx::Regexp::Token::Quantifier" =>
[
"xRe::Token::Quantifier",
"Represent an atomic quantifier.",
],
"PPIx::Regexp::Token::Recursion" =>
[
"xRe::Token::Recursion",
"a recursion",
#~ "(?PARNO)",
#~ "L<perlre/(?PARNO)>", ## perlre problematic
"L<perlre/(?PARNO) (?-PARNO) (?+PARNO) (?R) (?0)>", ## perlre problematic
#~ "PPIx::Regexp::Dumper->new( 'qr{(foo(?1)?)}smx' )->print",
],
"PPIx::Regexp::Token::Reference" =>
[
"xRe::Token::Reference",
"a reference to a capture",
#~ "PPIx::Regexp::Dumper->new( 'qr{\\1}smx' )->print",
],
"PPIx::Regexp::Token::Structure" =>
[
"xRe::Token::Structure",
"Represent structural elements.",
#~ 'Represent structural elements. (like "[","]", "{","}" "(",")" delimiters)', ## m too
#~ ' ',
],
"PPIx::Regexp::Token::Unknown" =>
[
"xRe::Token::Unknown",
"Represent an unknown token (A FAILURE; AN ERROR)",
],
"PPIx::Regexp::Token::Unmatched" =>
[
"xRe::Token::Unmatched",
"Represent an unmatched right bracket (a TYPO!)",
],
"PPIx::Regexp::Token::Whitespace" =>
[
"xRe::Token::Whitespace",
"Represent whitespace",
],
'?u' => [
"according to unicode semantics",
],
'?d' => [
'according to "Depends" or "Dodgy" or "Default" semantics',
],
'?a' => [
'according to ASCII-restrict (or ASCII-safe) semantics',
],
'?aa' => [
'according to stricter-ASCII-restrict (or stricter-ASCII-safe) semantics',
],
'*' => 'match preceding pattern 0 or more times',
'+' => 'match preceding pattern 1 or more times',
#~ "PPIx::Regexp::Token::Quantifier".'+' => 'match preceding pattern 1 or more times', ## didn't work, has to do
'?' => 'match preceding pattern 0 or 1 times; is optional',
#~ 2013-06-20-02:39:00
most_possible => '(matching the most amount possible)',
least_possible => '(matching the least amount possible)',
only_last_n => 'WARNING only the LAST repetition of the captured pattern will be stored in %%%%',
## greediness
#~ '+?' => ["and matching (preceding pattern) the least amount possible",'Match shortest string first'],
#~ '++' => ["and give nothing back (ratchet);","modifies preceding quantifier so preceding pattern doesn't backtrack", 'Match longest string and give nothing back'],
"PPIx::Regexp::Token::Greediness".'?' => ["and matching (preceding pattern) the least amount possible",'Match shortest string first'],
"PPIx::Regexp::Token::Greediness".'+' => ["and give nothing back (ratchet);","modifies preceding quantifier so preceding pattern doesn't backtrack", 'Match longest string and give nothing back'],
#~ 2013-06-13-02:59:39
"\\d" => [
"\\d Match a decimal digit character.",
"[0-9]",
"L<perldebguts/DIGIT>",
],
"\\D" => [
"\\D Match a non-decimal-digit character.",
"L<perldebguts/NDIGIT>",
'Match not "[0-9]" meaning match "[^0-9]";',
],
"\\Da" => [
"\\D Match a non-decimal-digit character (/a ASCII-restrict semantics).",
"L<perldebguts/NDIGITA>",
'Match not "[0-9]" meaning match "[^0-9]";',
],
"\\da" => [
#~ "\\d Match a decimal digit character.",
"\\d Match a decimal digit character (/a ASCII-restrict semantics).",
"Match exactly [0-9]",
"L<perldebguts/DIGITA>",
],
"\\Dl" => [
"\\D Match a non-decimal-digit character.",
"L<perldebguts/NDIGITL>",
],
"\\dl" => [
"\\d Match a decimal digit character.",
"L<perldebguts/DIGITL>",
],
"\\du" => [
"Match a decimal digit character.",
"matches exactly what \\p{Digit} matches.",
],
#~ "\\H" => "\\H Match a character that isn't horizontal whitespace.",
#~ "\\H" => "\\H Match a character that is NOT horizontal whitespace.",
#~ "\\h" => "\\h Match a horizontal whitespace character.",
"\\H" => "\\H Match a character that is NOT horizontal whitespace. \\P{HorizSpace} or [^\\N{U+0009}\\N{U+0020}\\N{U+00A0}\\N{U+1680}\\N{U+180E}\\N{U+2000}-\\N{U+200A}\\N{U+202F}\\N{U+205F}\\N{U+3000}]",
"\\h" => "\\h Match a horizontal whitespace character. \\p{HorizSpace} or [\\N{U+0009}\\N{U+0020}\\N{U+00A0}\\N{U+1680}\\N{U+180E}\\N{U+2000}-\\N{U+200A}\\N{U+202F}\\N{U+205F}\\N{U+3000}]",
#~ 2013-06-14-18:16:35
#~ http://perl5.git.perl.org/perl.git/blob?f=regcomp.c#l9910
"PPIx::Regexp::Token::Unknown"."\\N" => 'ERROR die \N in a character class must be a named character: \N{...} in regex;',
#~ http://perl5.git.perl.org/perl.git/blob?f=regexec.c#l3673
#~ http://perl5.git.perl.org/perl.git/blob?f=regexec.c#l6688
"\\N" => [
#~ "\\N Match a character that isn't a newline (\\n). Experimental.",
"\\N Match a character that is NOT a newline (\\n). Experimental.",
'L<perldebguts/REG_ANY>',
],
"\\R" => [
"generic newline;",
"anything considered a linebreak sequence by Unicode;",
"L<perlrecharclass/Backslash Sequences>",
"anything that can be considered a newline under Unicode",
"[\\x{000A}\\x{000C}\\x{000D}\\x{0085}\\x{2028}\\x{2029}]",
#~ http://perl5.git.perl.org/perl.git/blob?f=regcharclass.h
'LNBREAK: Line Break: \R',
'\p{VertSpace} and "\x0D\x0A" # CRLF - Network (Windows) line ending',
],
"\\sa" => "\\s Match a whitespace character. ASCII-restrict; Match exactly [ \\f\\n\\r\\t] (and in perl5.18 vertical tab chr(11))",
"\\s" => "\\s Match a whitespace character.",
"\\S" => "\\S Match a non-whitespace character.",
"\\Sa" => "\\S Match a non-whitespace character. ASCII-restrict; Match exactly [^ \\f\\n\\r\\t] (and in perl5.18 NOT vertical tab chr(11))",
#~ "\\v" => "\\v Match a vertical whitespace character.",
"\\v" => "\\v Match a vertical whitespace character. \\p{VertSpace} or [\\N{U+000A}-\\N{U+000D}\\N{U+0085}\\N{U+2028}-\\N{U+2029}]",
#~ "\\V" => "\\V Match a character that isn't vertical whitespace.",
#~ "\\V" => "\\V Match a character that is NOT vertical whitespace.",
"\\V" => "\\V Match a character that is NOT vertical whitespace. \\P{VertSpace} or [^\\N{U+000A}-\\N{U+000D}\\N{U+0085}\\N{U+2028}-\\N{U+2029}]",
"\\W" => [
"\\W Match a non-\"word\" character.",
"L<perldebguts/NALNUM>",
"L<perlrecharclass/\\W>",
"\\W matches not [a-zA-Z0-9_] meaning [^a-zA-Z0-9_].",
],
"\\Wa" => [
"\\W Match a non-\"word\" character.",
"L<perldebguts/NALNUMA>",
"L<perlrecharclass/\\W>",
"\\W matches not [a-zA-Z0-9_] meaning [^a-zA-Z0-9_].",
],
"\\Wl" => [
"\\W Match a non-\"word\" character; according to use locale;",
"L<perlrecharclass/\\W>",
"L<perldebguts/NALNUML>",
],
"\\Wu" => [
"\\W Match a non-\"word\" character; according to unicode semantics",
"L<perlrecharclass/\\W>",
"\\W matches exactly what \\P{Word} matches (not \\p{Word}).",
"L<perldebguts/NALNUMU>",
],
"\\w" => [
"\\w Match a \"word\" character.",
"L<perlrecharclass/\\w>",
"\\w matches [a-zA-Z0-9_].",
"L<perldebguts/ALNUM>",
],
"\\wa" => [
"\\w Match a \"word\" character. (?a:\w)",
"L<perlrecharclass/\\w>",
"\\w matches [a-zA-Z0-9_].",
"L<perldebguts/ALNUMA>",
],
"\\wl" => [
"\\w Match a \"word\" character; according to use locale; (?l:\w)",
"L<perlrecharclass/\\w>",
"L<perldebguts/ALNUML>",
],
"\\wu" => [
"\\w Match a \"word\" character; according to unicode semantics; (?u:\w)",
"L<perlrecharclass/\\w>",
"\\w matches exactly what \\p{Word} matches.",
"L<perldebguts/ALNUMU>",
],
'.' => 'any character except \n',
#~ '.s' => 'any character (including \n)' . join( ' aka ', '', '[\w\W]', '[\s\S]' , '[\d\D]', '\p{All}' ),
'.s' => 'any character (including \n)' . join( ' alias ', '', '[\w\W]', '[\s\S]' , '[\d\D]', '\p{All}' ),
#~ 2013-06-13-03:39:54
'[:alpha:]' => [ 'letters', '\p{PosixAlpha}',],
'[:alnum:]' => ['letters and digits','\p{PosixAlpha}'],
'[:ascii:]' => ['all ASCII characters (\000 - \177)', ],
'[:cntrl:]' => ['control characters (those with ASCII values less than 32)',, '\p{PosixCntrl}', ],
'[:digit:]' => ['digits (like \d)',, '\p{PosixDigit}', ],
'[:graph:]' => ['alphanumeric and punctuation characters',, '\p{PosixGraph}', ],
'[:lower:]' => ['lowercase letters',, '\p{PosixLower}', ],
'[:print:]' => ['alphanumeric, punctuation, and whitespace characters',, '\p{PosixPrint}', ],
'[:punct:]' => ['punctuation characters',, '\p{PosixPunct}', ],
'[:space:]' => ['whitespace characters (like \s)',, '\p{PosixSpace}', ],
'[:upper:]' => ['uppercase letters',, '\p{PosixUpper}', ],
'[:word:]' => ['alphanumeric and underscore characters (like \w)',, '\p{PosixWord}', ],
'[:xdigit:]' => ['hexadecimal digits (a-f, A-F, 0-9)',, '\p{Posix}', ],
"(*ACCEPT)" => [
'WARNING: This feature is highly experimental. It is not recommended for production code.',
"(*ACCEPT) Causes match to succeed at the point of the (*ACCEPT)",
'L<perlre/(*ACCEPT)>',
],
"(*COMMIT)" => ["(*COMMIT) Causes match failure when backtracked into on failure",'L<perlre/(*COMMIT)>',],
"(*ACCEPT:NAME)" => ["ERROR die Verb pattern 'ACCEPT' may not have an argument in regex;",],
"(*COMMIT:NAME)" => ["ERROR die Verb pattern 'COMMIT' may not have an argument in regex;",],
"(*F:NAME)" => ["ERROR die Verb pattern 'F' may not have an argument in regex;",],
"(*FAIL:NAME)" => ["ERROR die Verb pattern 'FAIL' may not have an argument in regex;",],
"(*F)" => ["(*FAIL) Always fails, forcing backtrack", 'L<perlre/(*FAIL) (*F)>', ],
"(*FAIL)" => ["(*FAIL) Always fails, forcing backtrack", 'L<perlre/(*FAIL) (*F)>', ],
#~ "(*MARK)" => ["(*MARK) Name branches of alternation, target for (*SKIP)",'L<perlre/(*MARK) (*MARK:NAME)>',],
"(*MARK)" => ["(*MARK) Name branches of alternation, target for (*SKIP)",'L<perlre/(*MARK:NAME) (*:NAME)>',],
"(*MARK:NAME)" => ["(*MARK:NAME) Name branches of alternation, target for (*SKIP)",'L<perlre/(*MARK:NAME) (*:NAME)>',],
"(*PRUNE)" => ["(*PRUNE) Prevent backtracking past here on failure",'L<perlre/(*PRUNE) (*PRUNE:NAME)>', ],
"(*PRUNE:NAME)" => ["(*PRUNE:NAME) Prevent backtracking past here on failure",'L<perlre/(*PRUNE) (*PRUNE:NAME)>', ],
"(*SKIP)" => ["(*SKIP) Like (*PRUNE) but also discards match to this point", 'L<perlre/(*SKIP) (*SKIP:NAME)>',],
"(*SKIP:NAME)" => ["(*SKIP:NAME) Like (*PRUNE) but also discards match to this point", 'L<perlre/(*SKIP) (*SKIP:NAME)>',],
"(*THEN)" => ["(*THEN) Forces next alternation on failure", 'L<perlre/(*THEN) (*THEN:NAME)>',],
"(*THEN:NAME)" => ["(*THEN:NAME) Forces next alternation on failure", 'L<perlre/(*THEN) (*THEN:NAME)>',],
#~ "(*UNKNOWN)" => "ERROR warn UNRECOGNIZED VERB (%%%%)",
#~ "(*UNKNOWN:NAME)" => "ERROR warn UNRECOGNIZED VERB:NAME (%%%%:%%%%)",
#~ 2013-08-06-01:47:38
"(*UNKNOWN)" => "ERROR die Unknown verb pattern '%%%%' in regex;",
"(*UNKNOWN:NAME)" => "ERROR die Unknown verb pattern '%%%%:%%%%' in regex;",
'$' => 'match before an optional \n, and the end of the string', ## todo ANCHOR ANCHOR DESC
'$m' => 'match before an optional \n, and the end of a "line"',
'\A' => 'match the beginning of the string',
'^' => 'match the beginning of the string',
'^m' => 'match the beginning of a "line"',
'\z' => 'match the end of the string',
'\Z' => 'match before an optional \n, and the end of the string',
#~ '\G' => 'match where the last m//g left off',
'\G' => 'match where the last m//g left off; \G Match only at pos() (e.g. at the end-of-match position of prior m//g)',
'\b' => 'match the boundary between a word char (\w) and something that is not a word char (\W); OUTSIDE a "word"',
'\bl' => 'match the boundary between a word char (\w) and something that is not a word char (\W); according to use locale;; OUTSIDE a "word"',
'\bu' => 'match the boundary between a word char (\p{Word}) and something that is not a word char (\P{Word}); according to unicode semantics; OUTSIDE a "word"',
'\ba' => 'match the boundary between a word char (\w) and something that is not a word char (\W); according to ASCII-restrict; OUTSIDE a "word"',
#~ 2013-08-11-19:59:39 #~ TODO #~ touching OUTSIDE a "word"
#~ perlre
#~ \b Match a word boundary
#~ \B Match except at a word boundary
#~ perlrebackslash
#~ \b Word/non-word boundary. (Backspace in []).
#~ \B Not a word/non-word boundary. Not in [].
#~ perlrequick and perlretut
#~ The word anchor \b matches a boundary between a word character and a non-word character \w\W or \W\w :
#~ perlretut
#~ Similarly, the word boundary anchor \b matches wherever a character matching \w is next to a character that doesn't, but it doesn't eat up any characters itself.
#~ \b looks both ahead and behind, to see if the characters on either side differ in their "word-ness".
## perldebguts
#~ /\B/u \Bu NBOUNDU Match "" at any word non-boundary using Unicode semantics
#~
#~ '\B' => 'match the boundary between two word chars (\w) or two non-word chars (\W)',
#~ '\Bl' => 'match the boundary between two word chars (\w) or two non-word chars (\W); according to use locale;',
'\B' => 'match the boundary between two word chars (\w); INSIDE a "word"',
'\Bl' => 'match the boundary between two word chars (\w); according to use locale;; INSIDE a "word"',
'\Bu' => 'match the boundary between two word chars (\\p{Word}); according to unicode semantics; INSIDE a "word"',
'\Ba' => 'match the boundary between two word chars (\\p{Word}); according to ASCII-restrict; INSIDE a "word"',
#~ 2013-06-14-17:20:08 doesn't affect capture groups
#~ $ perl -Mre=debug -le " $_=q{12345}; m{(.{4}\K)\K(.)}; print $1,$2 "
"\\K" => [
'A zero-width positive look-behind assertion.',
'"(?<=pattern)" "\\K"',
'L<perlre/Look-Around Assertions>',
'"keep" everything matched prior to the \K and do not include it in $& ',
'match left of \K and discard (not-Keep) from $& ',
],
'?=' => [ '(?=pattern)', 'L<perlre/(?=pattern)>', 'A zero-width positive look-ahead assertion.', 'For example, C</\w+(?=\t)/> matches a word followed by a tab, without including the tab in C<$&>.' ],
'?<=' => [
'(?<=pattern)',
'L<perlre/(?<=pattern)>',
'C<(?<=pattern)> C<\K>',
'A zero-width positive look-behind assertion.',
'For example, C</(?<=\t)\w+/> matches a word that follows a tab, without including the tab in C<$&>.',
'Works only for fixed-width look-behind.',
],
'?!' => [ '(?!pattern)', 'L<perlre/(?!pattern)>',
'A zero-width negative look-ahead assertion.',
#~ q{For example C</foo(?!bar)/> matches any occurrence of "foo" that isn't followed by "bar". },
q{For example C</foo(?!bar)/> matches any occurrence of "foo" that is NOT followed by "bar". },
'Note however that look-ahead and look-behind are NOT the same thing. ',
'You cannot use this for look-behind.',
],
'?<!' => [ '(?<!pattern)', 'L<perlre/(?<!pattern)>',
'A zero-width negative look-behind assertion.',
'For example C</(?<!bar)foo/> matches any occurrence of "foo" that does not follow "bar".',
'Works only for fixed-width look-behind.',
],
#~ 'errn?<!' => 'ERROR die Variable length lookbehind not implemented in regex (** maybe false positive, detection not bulletproof, not hanlde (?i:a) )',
#~ 'errn?<=' => 'ERROR die Variable length lookbehind not implemented in regex (** maybe false positive, detection not bulletproof, not hanlde (?i:a) )',
'errn?<!' => 'ERROR die Variable length lookbehind not implemented in regex; you used variable length alterations like "a|aa" or you used variable-length quantifiers like "*", "+" or "?"',
'errn?<=' => 'ERROR die Variable length lookbehind not implemented in regex; you used variable length alterations like "a|aa" or you used variable-length quantifiers like "*", "+" or "?"',
#~ 2013-06-13-04:15:45
# flags
#~ 2013-07-26-16:38:29
#~ 'mods.i' => '/i case-insensitive',
#~ 'mods.-i' => '?-i: case-sensitive',
#~ 'mods.m' => '/m with ^ and $ matching start and end of line',
#~ 'mods.-m' => '?-m: with ^ and $ matching normally (start and end of string)',
#~ 'mods.s' => '/s with . matching \n',
#~ 'mods.-s' => '?-s: with . not matching \n',
#~ 'mods.x' => '/x disregarding whitespace and comments',
#~ 'mods.-x' => '?-x: matching whitespace and # normally',
#~ 'mods.u' => '/u sets the character set to Unicode.',
'mods.i' => '(?i) case-insensitive',
'mods.-i' => '(?-i) case-sensitive',
'mods.m' => '(?m) with ^ and $ matching start and end of line',
'mods.-m' => '(?-m) with ^ and $ matching normally (start and end of string)',
'mods.s' => '(?s) with . matching \n',
'mods.-s' => '(?-s) with . not matching \n',
'mods.x' => '(?x) disregarding whitespace and comments',
'mods.-x' => '(?-x) matching whitespace and # normally',
'mods/x' => '/x disregarding whitespace and comments',
'match_semantics.u' => '(?u) sets the character set to Unicode.',
#~ http://search.cpan.org/~rjbs/perl-5.18.0/pod/perlre.pod#/a_%28and_/aa%29
'match_semantics.a' => [ ## match_semantics
#~ '/a is ASCII-restrict (or ASCII-safe); https://metacpan.org/module/perlre#a-and-aa',
#~ '/a is ASCII-restrict (or ASCII-safe); http://search.cpan.org/dist/perl/pod/perlre.pod#/a_%28and_/aa%29',
'/a is ASCII-restrict (or ASCII-safe); L<<<http://search.cpan.org/dist/perl/pod/perlre.pod#/a_%28and_/aa%29|/a (and /aa)>>>',
'/a it causes the sequences \d, \s, \w, and the Posix character classes to match only in the ASCII range.',
'/a also sets the character set to Unicode, BUT adds several restrictions for ASCII-safe matching.',
],
#~ 'match_semantics.aa' => '/aa forbids the intermixing of ASCII and non-ASCII; ASCII-restrict-strict ; ASCII-safe-strict;',
#~ 'match_semantics.aa' => '/aa forbids the intermixing of ASCII and non-ASCII; ASCII-restrict-insensitive; Prevents this match "k" =~ /N{KELVIN SIGN}/aia',
#~ 'match_semantics.aa' => '/aa ASCII-restrict-case-insensitive; forbids the intermixing of ASCII and non-ASCII; Prevents this match "kk" =~ /\N{KELVIN SIGN}\N{U+212A}/i',
#~ _aa.pl
#~ 'match_semantics.aa' => '/aa ASCII-restrict-case-insensitive; Prevents ASCII-range from matching non-ASCII-range case-insensitively. Prevents this match "kk" =~ /\N{KELVIN SIGN}\N{U+212A}/i; https://metacpan.org/module/perlre#a-and-aa',
#~ 'match_semantics.aa' => '/aa ASCII-restrict-case-insensitive; Prevents ASCII-range from matching non-ASCII-range case-insensitively. Prevents this match "kk" =~ /\N{KELVIN SIGN}\N{U+212A}/i; http://search.cpan.org/dist/perl/pod/perlre.pod#/a_%28and_/aa%29',
'match_semantics.aa' => '/aa ASCII-restrict-case-insensitive; Prevents ASCII-range from matching non-ASCII-range case-insensitively. Prevents this match "kk" =~ /\N{KELVIN SIGN}\N{U+212A}/i; L<<<http://search.cpan.org/dist/perl/pod/perlre.pod#/a_%28and_/aa%29|/a (and /aa)>>>',
#~ because CORE::fc("\N{KELVIN SIGN}\N{U+212A}") eq "\F\N{KELVIN SIGN}\N{U+212A}\Q" eq "kk"
#~ http://search.cpan.org/~rjbs/perl-5.18.0/pod/perlre.pod#/d
'match_semantics.^' => '(?^) is (?d) is "Depends" or "Dodgy" or "Default"; L<perlunicode/The "Unicode Bug">;',
'match_semantics.d' => [
'(?d) is "Depends" or "Dodgy" or "Default"; L<perlunicode/The "Unicode Bug">;',
'(?d) is the old, problematic, pre-5.14 Default character set behavior. Its only use is to force that old behavior.',
],
'match_semantics.l' => "/l sets the character set to current locale. See L<perllocale>",
'match_semantics.u' => "/u sets the character set to Unicode.",
'mods/l' => [ "/l WARNING NOT RECOMMENDED instead use locale; ", ],
'mods/u' => [ "/u WARNING NOT RECOMMENDED instead use feature 'unicode_strings'", ],
'mods.o' => [ "ERROR warn Useless (?o) - use /o modifier in regex; /o Compile pattern only once.", ],
'mods/o' => [ "/o Compile pattern only once.", ],
'mods.p' => [ '(?p) Preserve the string matched such that ${^PREMATCH}, ${^MATCH}, and ${^POSTMATCH} are available for use after matching. GLOBAL!TRICKY!(RT#117135)',],
'mods/p' => [ '/p Preserve the string matched such that ${^PREMATCH}, ${^MATCH}, and ${^POSTMATCH} are available for use after matching. GLOBAL!TRICKY!(RT#117135)',],
#~ Note also that the p modifier is special in that its presence anywhere in a pattern has a global effect.
'mods.-a' => [ 'ERROR die Regexp modifier "a" may not appear after the "-" in regex;', ],
#~ 2013-08-12-01:50:18
#~ untrippable
#~ TODO REPORT BUG? SHOULD BE RECOGNIZED AS Token::Modifier/GroupType::Modifier
#~ 'qr{(?-^)(?-^:u}', ## #~ xRe::Structure::Capture #~ /1/C1/C58/C0 ; xRe::Token::Unknown
#~ 'mods.-^' => [ 'ERROR die Regexp modifier "^" aka "d" may not appear after the "-" in regex;', ],
'mods.-d' => [ 'ERROR die Regexp modifier "d" may not appear after the "-" in regex;', ],
'mods.-l' => [ 'ERROR die Regexp modifier "l" may not appear after the "-" in regex;', ],
'mods.-u' => [ 'ERROR die Regexp modifier "u" may not appear after the "-" in regex;', ],
'mods.-p' => [ 'THINKO warn Useless use of (?-p) in regex;', ],
'mods.-p' => [ 'THINKO warn Useless use of (?-p) in regex; You cant turn (?p) off; (?p) is global', ],
#~ evaln
'mods/s/e' => '(/e) Evaluate the right side as an expression.',
'mods/s/ee' => '(/ee) Evaluate the right side as a string then eval the result (maybe %%%% times). WARNING DANGEROUS L<perlfunc/eval>',
'mods/s/r' => '(/r) Return substitution and leave the original string untouched; not modify $foo in $foo =~ s///r',
#~ 2013-07-28-20:13:01
'mods/i' => '(/i) case-insensitive',
#~ untrippable #~ 'mods/-i' => '(/-i) case-sensitive',
'mods/m' => '(/m) with ^ and $ matching start and end of line',
#~ untrippable #~ 'mods/-m' => '(/-m) with ^ and $ matching normally (start and end of string)',
'mods/s' => '(/s) with . matching \n',
## 2013-07-26-02:44:01
#~ 'mods.twice' => 'ERROR die Regexp modifier "%%%%" may not appear twice in regex;',
'mods.nottwice' => 'ERROR die Regexp modifier "%%%%" may not appear twice in regex;',
'mods.twicemax' => 'ERROR die Regexp modifier "%%%%" may appear a maximum of twice in regex;',
'mods.exclusive' => 'ERROR die Regexp modifiers "%%%%" and "%%%%" are mutually exclusive',
#~ 'mods.unknown' => 'ERROR UNKNOWN MODIFIER "%%%%" (?%%%%)',
'mods.unknown' => 'ERROR UNKNOWN MODIFIER "%%%%" die Sequence (?%%%%...) not recognized in regex;',
'mods/unknown' => 'ERROR UNKNOWN MODIFIER "%%%%" die Having no space between pattern and following word is deprecated',
#~ 2013-07-26-03:31:02
#~ 'mods/g' => [ "/g Match globally, i.e., find all occurrences.", ],
'mods/g' => [ "/g Match globally, i.e., find all occurrences.", 'in list context (@matches=m//g) return all matches; in scalar context($count=m//g) return number of matches' ],
'mods/c' => [ "/c Do not reset search position on a failed match when /g is in effect.", ],
#~ '|' => ['OR ; Alternation (outside character class)'],
#~ '|' => ['OR ; Alternation (outside character class); match_left OR match_right'],
'|' => ['OR ; Alternation ; match_left OR match_right'],
#~ 2013-06-14-00:39:34
'parsing_failures' => [
"# ERROR WARNING PARSING FAILURE, EXPLANATIONS UNREAL ie IMAGINED ie WRONG ",
"# WARNING PARSING FAILURE, EXPLANATIONS UNREAL ",
"# WARNING PARSING FAILURE, EXPLANATIONS IMAGINED ",
"# WARNING PARSING FAILURE, EXPLANATIONS UNRELIABLE ",
"# WARNING PARSING FAILURE, EXPLANATIONS WRONG ",
"# ",
],
'matches_as_follows' => 'matches as follows:',
'the_regex' => 'The regular expression ',
'm_pat_at_add' => 'match the preceding pattern at address=',
#~ $ perl -Mre=debug -we " qr{*}" ## never used because its Token::Unknown
'quant_f_not' => 'IMPOSSIBLE ERROR die Quantifier follows nothing in regex;',
'm_recur_ata' => 'MATCH RECURSION at address=',
'f_e_r_n_exit' => "FATAL ERROR die Reference to nonexistent group in regex;",
'n_exist_group' => "FATAL ERROR die Reference to nonexistent group in regex;",
#~ 2013-06-14-04:23:22
"(DEFINE)" => [
"L<perlre/(DEFINE)>",
'define subpatterns which will be executed only by the recursion mechanism',
'It is recommended that you put DEFINE block at the end of the pattern,',
'and that you name any subpatterns defined within it.',
'the yes-pattern is never directly executed, and no no-pattern is allowed',
'Similar in spirit to (?{0}) but more efficient.',
],
'(DEFINE)pointless' => 'WARNING a (DEFINE) section without (?<NAMEd>patterns) is pointless; an empty (DEFINE) section is pointless',
#~ 2013-06-14-04:58:49
'regexset.!' => 'complement (everything NOT in following set)',
'regexset.&' => 'intersection',
'regexset.+' => 'union',
'regexset.|' => "another name for '+', hence means union",
'regexset.-' => "subtraction (matched by left operand (above), excluding right operand (below))",
'regexset.^' => [
"symmetric difference (the union minus the intersection);",
"like exclusive or;",
"set of code points that are matched by either, but not both, of the operands.",
],
#~ 2013-08-12-01:20:21 dupes
#~ 'PPIx::Regexp::Structure::RegexSet!' => 'complement (everything NOT in following set)',
#~ 'PPIx::Regexp::Structure::RegexSet&' => 'intersection',
#~ 'PPIx::Regexp::Structure::RegexSet+' => 'union',
#~ 'PPIx::Regexp::Structure::RegexSet|' => "another name for '+', hence means union",
#~ 'PPIx::Regexp::Structure::RegexSet-' => "subtraction (matched by left operand (above), excluding right operand (below))",
#~ 'PPIx::Regexp::Structure::RegexSet^' => [
#~ "symmetric difference (the union minus the intersection);",
#~ "like exclusive or;",
#~ "set of code points that are matched by either, but not both, of the operands.",
#~ ],
"PPIx::Regexp::Structure::CharClass".'^' => "Character class inversion (all characters except the following)",
"PPIx::Regexp::Node::Range".'-' => "'-' is character range operator (a-z means all characters from a to z)",
#~ '-' => "Character range (inside character class)", ## 2013-06-20-07:17:11
#~ 2013-06-14-06:07:00
#~ '{n}' => ['Match exactly n times', 'L<perlre/Quantifiers>', ],
#~ '{n,}' => ['Match at least n times', 'L<perlre/Quantifiers>', ],
#~ '{n,m}' => ['Match at least n but not more than m times', 'L<perlre/Quantifiers>', ],
#~ 2013-06-14-06:24:37
#~ '{n}' => 'Match exactly n times',
#~ '{n,}' => 'Match at least n times',
#~ '{n,m}' => 'Match at least n but not more than m times',
#~ 2013-08-03-17:05:44
'{n}' => '{n} Match exactly (%%%%) times',
'{n,}' => '{n,} Match at least (%%%%) times',
'{n,m}' => '{n,m} Match at least (%%%%) but not more than (%%%%) times',
'{,m}' => 'ERROR:P', ## \Q{,m}\E
'(n)' => [ 'Checks if the numbered capturing group has matched something.', "L<perlre/(1) (2) ...>", ],
'(<NAME>)' => ['Checks if a group with the given name has matched something.', "L<perlre/(<NAME>) ('NAME')>", ],
#~ "('NAME')" => 'Checks if a group with the given name has matched something.',
"(R)" => [
"Checks if the expression has been evaluated inside of recursion.",
"L<perlre/(R)>",
],
"(Rn)" => ["Checks if the expression has been evaluated while executing directly inside of the n-th capture group.",'L<perlre/(R1) (R2) ...>'],
"(R&NAME)" => [
"L<perlre/(R&NAME)>",
"Similar to (R1) , this predicate checks to see if we're executing directly inside of the leftmost group with a given name ",
'(this is the same logic used by (?&NAME) to disambiguate).',
'It does not check the full stack, but only the name of the innermost active recursion.',
],
"posix_inside" => "ERROR warn POSIX syntax [: :] belongs inside character classes in regex; like this [[:word:]]", ## [::][:unknown:]
"\\C" => ['Single octet, even under UTF-8. Not in [].','L<perldebguts/CANY>',],
#~ 2013-06-14-19:04:04
'\l' => ['Lowercase next character. Not in [].', "L<perlfunc/lcfirst>",, ],
'\u' => ['Uppercase next character. Not in [].', "L<perlfunc/ucfirst>" ],
'\L' => ['Lowercase till \E. Not in [].', "L<perlfunc/lc>", ],
'\U' => ['Uppercase till \E. Not in [].', "L<perlfunc/uc>", ],
'\Q' => ['quotemeta till \E. Not in [].', 'Quote (disable) pattern metacharacters till \E.', 'L<perlfunc/quotemeta>', ],
'\E' => ['Turn off \Q, \L and \U processing. Not in [].', '', ],
'\F' => ['Foldcase till \E. Not in [].', '', ],
#~ 2013-06-14-20:55:08
#~ m{\a[\b]\e
'\a' => q('\a' (alarm)),
"\b" => q('\b' (backspace)), ## NOT '\b'
'\e' => q('\e' (escape)),
'\f' => q('\f' (form feed)),
'\n' => q('\n' (newline)),
'\r' => q('\r' (carriage return)),
'\t' => q('\t' (tab)),
#~ '\X' => q{Unicode "eXtended grapheme cluster"; leter and diacritic mark; Not in [].},
'\X' => q{Unicode "eXtended grapheme cluster"; leter and diacritic mark; Multiple code points that add up to a single visual character. L<perldebguts/CLUMP>},
#~ 2013-06-16-02:57:43
'check_prefix' => 'Checks to see if the following has matched. ',
mnext_nth_capture => "MATCH the NEXT nth (%%%%) capture group from this position ",
mprev_nth_capture => "MATCH the PREVIOUS nth (%%%%) capture group from this position ",
#~ match_the_capture => 'MATCH THE (%%%%) capture ; MATCH "\\%%%%" aka (in replacement) "$%%%%" ',
match_the_capture => 'MATCH THE (%%%%) capture ; MATCH "\\%%%%" alias (in replacement) "$%%%%" ',
#~ check_the_capture => 'Checks to see if the following has matched. (%%%%) capture ; MATCH "\\%%%%" aka (in replacement) "$%%%%" ',
#~ check_the_capture => 'Checks to see if the (%%%%) capture has matched; The backreference "\\%%%%" aka (in replacement) "$%%%%" ',
check_the_capture => 'Checks to see if the (%%%%) capture has matched; The backreference "\\%%%%" alias (in replacement) "$%%%%" ',
#~ cnext_nth_capture => "Checks to see if the following has matched. The NEXT nth (%%%%) capture group from this position ",
#~ cprev_nth_capture => "Checks to see if the following has matched. The PREVIOUS nth (%%%%) capture group from this position ",
cnext_nth_capture => "Checks to see if the NEXT nth (%%%%) capture group from this position has matched.",
cprev_nth_capture => "Checks to see if the PREVIOUS nth (%%%%) capture group from this position has matched.",
#~ 'cm_recur_ata' => 'Checks to see if the following has matched RECURSION at address=%%%%',
'cm_recur_ata' => 'Checks to see if we are executing directly inside the capture at address=%%%%',
#~ 2013-06-16-04:06:54
#~ check_n_capture => 'Checks to see if the following has matched. "\g{%%%%}" aka "(?&%%%%)" aka "(?P>%%%%)"',
#~ check_n_capture => 'Checks to see if the following capture has matched: "\g{%%%%}" aka "(?&%%%%)" aka "(?P>%%%%)"',
#~ check_n_capture => 'Checks to see if we are executing directly inside the capture "\g{%%%%}" aka "(?&%%%%)" aka "(?P>%%%%)"',
check_n_capture => 'Checks to see if we are executing directly inside the capture "\g{%%%%}" alias "(?&%%%%)" alias "(?P>%%%%)"',
'\P' => ['L<perlrecharclass/Unicode Properties>','TODO warn REPORT BUG for PPIx::Regexp; \PP is \P{Prop} ; for example \PN is \P{Number}; ' ],
'\p' => ['L<perlrecharclass/Unicode Properties>','TODO warn REPORT BUG for PPIx::Regexp; \pP is \p{Prop} ; for example \pN is \p{Number}; ' ],
'eo_grouping' => 'end of grouping for %%%%',
'dodgy-u-name' => '(/d) dodgy forced to (/u) unicode semantics because \N{} found in pattern',
'dodgy-u-prop' => '(/d) dodgy forced to (/u) unicode semantics because \p{} found in pattern',
'dodgy-u-255' => '(/d) dodgy forced to (/u) unicode semantics because code point above 255 found in pattern',
#~ 'dodgy-u-rset' => '(/d) dodgy forced to (/u) unicode semantics because L<perlre/(?[ ])> found in pattern',
'dodgy-u-rset' => '(/d) dodgy forced to (/u) unicode semantics because (?[ ]) found in pattern',
); ## our %desc
#~ die scalar %desc; ## 201/512 ## 2013-08-11-18:43:53
#!/usr/bin/perl --
use strict;
use warnings;
use Data::Dump qw/ dd pp /;
use charnames (); ## for ord
use HTML::Entities();
use Unicode::UCD();
use Getopt::Long();
use PPI::Document;
use PPIx::Regexp::Dumper;
use vars qw/ $opt_pmshortcut $opt_coverage /;
unless( caller ){
Main( @ARGV );
exit( 0 );
}
sub Main { goto &MainXplain }
sub MainXplain {
my %opt;
Getopt::Long::GetOptionsFromArray(
\@_,
\%opt,
q{text|t!},
q{html!},
q{dumper|ddr!},
q{dumpee|dde!},
q{help|h!},
q{perlmonks|pmshortcut|pm|p!},
q{coverage|c!},
);
$opt{help} and return print Usage();
@_ or return print Usage();
$opt{text} or $opt{html}=1;
$opt{coverage} and $opt_coverage=1;
unshift @_, \%opt;
goto &Mexplain;
}
sub Usage {
"
Usage:
$0
$0 --help
$0 --html qr/\\d\\w/u
$0 --text qr/\\d\\w/u
$0 \\d\\w qr/\\d\\w/u s/\\d\\w/rand/gex m/\\d\\w/aax ...
# --html is default
# bare pattern becomes qr//
# remember to quote args according to your shell rules
"
}
sub Mexplain {
my $args = shift;
local $opt_pmshortcut = $$args{perlmonks};
for my $restr ( @_ ){
my $pdoc = PPI::Document->new( \$restr );
my @res = map { PPIx::Regexp->new( $_ ) } @{
$pdoc->find(
sub {
return 1 if ref($_[1]) =~ m{
PPI::Token::QuoteLike::Regexp
| PPI::Token::Regexp::Match
| PPI::Token::Regexp::Substitute
}ix;
},
)
|| []
};
@res or @res = PPIx::Regexp->new( "qr{$restr}" );
for my $re ( @res ){
if( $$args{html} ){
darnhtml( $re->xplain );
} else {
darntext( $re->xplain );
}
$$args{dumper} and dd( $re );
$$args{dumpee} and dd( $re->xplain );
}
undef @res; undef $pdoc;
}
}
sub darntext {
my( $ref ) = @_;
my @lols;
if( ref $ref eq 'HASH' ){
my $start = $ref->{start};
my $con = $ref->{start_con};
my $hr = $ref->{start_hr};
my $depth = $ref->{depth} || 0;
my $dent = ' ' x $depth;
$_ = "# $dent $_" for grep { defined $_ } @$start;
$_ = "$dent $_" for grep { defined $_ } $con;
my $conhr = [ grep { defined $_ } $con, $hr ];
my $chits = $ref->{chits};
@lols = ( $start, $conhr, $chits );
} else {
@lols = $ref;
}
for my $lol ( @lols ){
for my $l ( @$lol ){
if( ref $l ){
darntext( $l );
} else {
print "$l\n";
}
}
}
return;
}
sub darnhtml{
print "<!DOCTYPE html><html><head><title> title </title></head><body>\n";;;
#~ <base href="http://perlmonks.com/">
&darnhtmltable;
print "</body></html>\n";
}
sub darnhtmltable {
my( $ref ) = @_;
my $depth = 0;
my @lols;
if( ref $ref eq 'HASH' ){
my $start = $ref->{start};
my $con = $ref->{start_con};
my $hr = $ref->{start_hr};
$depth = $ref->{depth} || 0;
darn_table( $depth, $con, $start , $hr );
@lols = $ref->{chits};
} else {
@lols = $ref;
}
for my $lol ( @lols ){
for my $l ( @$lol ){
if( ref $l ){
darnhtmltable( $l );
} else {
darn_table( $depth, " ", [$l] );
}
}
}
return;
}
sub enent {
my $ret = HTML::Entities::encode_entities( $_[0] );
$ret =~ s{\[}{&#91;}g;
$ret =~ s{\]}{&#93;}g;
$ret =~ s{\|}{&#124;}g;
return $ret;
}
sub darn_table {
my( $depth, $con, $desc, $hr ) = @_;
defined $con or $con= ' ';
print "<table>\n";
#~ print "<table border=1>\n";
print "<tr>";
print "<td><pre>", ' ' x ($depth+3) , enent($con),"</pre></td>\n";
print '<td>', '&nbsp;' x ($depth+3) , "</td>\n";
print "<td>";
#~ shift @$desc;;; ### ditch address= TODO make it { address => '' }
#~ shift @$desc;;; ### ditch token xRE:: TODO makeit { token => [ '','', ] }
my( @three ) = splice @$desc, 0, 3; ## make it a tooltip?!??!??!?
#~ unshift @$desc, '# '.join ' ; ', @three;
unshift @$desc, 3==@three ? ( '# '.join ' ; ', @three ) : ( @three );
## TO?DO? http://www.thecssninja.com/css/css-tree-menu# Pure CSS collapsible tree menu | The CSS Ninja - All things CSS, JavaScript & HTML
local$_; for(@$desc){
if( m{\sat\saddress=\s*(\S+)} ){
$_ = sprintf q{<a href="#%s">%s</a>}, enent("$1"), enent($_);
} elsif( m{\baddress=\s*(\S+)} ){
$_ = sprintf q{<a name="%s">%s</a>}, enent("$1"), enent($_);
#~ $_ = sprintf q{<a name="%s"></a>}, enent("$1"), ;
#~ } elsif( m{L<(.*)>} ){
#~ } elsif( m{L<{1,3}(.*)>{1,3}} ){
#~ } elsif( m{L<+\b(.*)\b>+} ){
#~ } elsif( m{L<++(.*?)>+} ){
#~ } elsif( m{L(?:<<<|<)(.*?)(?:>>>|>)}x ){
} elsif( m{L(?:<<<|<)(.*[^>])(?:>>>|>)}x ){
my $odoc = $1;
$odoc =~ s/^\s+|\s+$//g;
my $doc = enent($odoc);
#~ $doc =~ s{^(.+?)/(.+)$}{$1#$2};
if( my( $one, $frag ) = $doc =~ m{^(.+?)/(.+)$} ){
$frag =~ s/\s/-/g;
$frag =~ s{\[}{%5b}g;
$frag =~ s{\]}{%5d}g;
$frag =~ s{\|}{%7c}g;
$doc = "$one#$frag";
}
#~ my $href = "http://perlmonks.com/?node=doc://$doc";
#~ $href = enent($odoc) if $odoc =~ m{^http://}i;
#~ $_ = qq{<a href="$href">}.enent($_).'</a>';
#~
my $href = "http://perlmonks.com/?node=doc://$doc";
if( $odoc =~ m{^http://}i ){
my( $oleft, $oright ) = split /\|/, $odoc;
#~ $href = enent($odoc) ;
$href = enent($oleft) ;
$_ = qq{<a href="$href">}.enent($_).'</a>';
} else {
if( $opt_pmshortcut ){
$_ = "[doc://$doc|".enent($_)."]";
} else {
my $href = "http://perlmonks.com/?node=doc://$doc";
$href = enent($odoc) if $odoc =~ m{^http://}i;
$_ = qq{<a href="$href">}.enent($_).'</a>';
}
}
#~ } elsif( m{^\s*.*?(?:\bdie\b|\bwarn)}i ){
#~ $_ = '<b>'.enent($_).'</b>';
} else {
$_ = enent($_);
}
$_ = "<b>$_</b>" if /\bdie\b|\bwarn/i;
}
if( @$desc>1 and $desc->[1] !~ /^#/ ){ $_="# $_" for @$desc; }
$con and $con=~/\x22,\s*$/ and push @$desc, '#<b>'.enent($con).'</b>';
$hr and push @$desc, $hr;
print map { "$_<br>\n" } @$desc;
print "</td></tr></table>\n";
}
#~ package PPIx::Regexp::Element; ## dumb
sub PPIx::Regexp::Element::xplain_desc {
goto &PPIx::Regexp::Node::xplain_desc
}
#~ package PPIx::Regexp::Node; ## dumb
#~ 2013-07-23-05:37:02 dumb for umlclass.bat
{ package PPIx::Regexp::Element; package PPIx::Regexp::Node; package PPIx::Regexp::Structure::Capture; package PPIx::Regexp::Structure::CharClass; package PPIx::Regexp::Structure::Code; package PPIx::Regexp::Structure::Modifier; package PPIx::Regexp::Structure::Quantifier; package PPIx::Regexp::Structure::Replacement; package PPIx::Regexp::Structure; package PPIx::Regexp::Token::Backreference; package PPIx::Regexp::Token::Backtrack; package PPIx::Regexp::Token::CharClass::POSIX; package PPIx::Regexp::Token::CharClass::Simple; package PPIx::Regexp::Token::Code; package PPIx::Regexp::Token::Comment; package PPIx::Regexp::Token::Condition; package PPIx::Regexp::Token::Delimiter; package PPIx::Regexp::Token::Greediness; package PPIx::Regexp::Token::GroupType::Modifier; package PPIx::Regexp::Token::GroupType::Switch; package PPIx::Regexp::Token::Interpolation; package PPIx::Regexp::Token::Literal; package PPIx::Regexp::Token::Modifier; package PPIx::Regexp::Token::Operator; package PPIx::Regexp::Token::Quantifier; package PPIx::Regexp::Token::Recursion; package PPIx::Regexp::Token::Structure; package PPIx::Regexp::Token::Unknown; package PPIx::Regexp::Token::Whitespace; package PPIx::Regexp::Token; package PPIx::Regexp; package main; }
our %desc; BEGIN { require '_desc.pl'; }
sub PPIx::Regexp::Node::xplain_desc {
my @ret = &PPIx::Regexp::Node::xplain_desc_real;
#~ @ret and warn "# @_ => ",pp(\@ret), "\n";
if( @ret ){ ## successfull descriptions, stuff we actually used
my $self = shift;
#~ warn "# $self\n", pp( \@_ ), " => ", pp(\@ret), ",\n\n";
$opt_coverage and warn "# $self\n", pp( \@_ ), " => ", pp(\@ret), ",\n\n";
}
return wantarray ? @ret : join('', @ret );
}
sub PPIx::Regexp::Node::xplain_desc_real {
#~ sub PPIx::Regexp::Node::xplain_desc {
#~ warn "@_\n"; ## grogelinginadequate
my( $self, $key , @repos ) = @_;
%desc or require '_desc.pl';
if( my $ret = $desc{ $key } ){
if( ref $ret ){
return @$ret;
} else {
my $ix = 0; ## yick
@repos and $ret =~ s/%%%%/my$r=$repos[$ix++]; defined $r?$r:'%%%%'/ge;;
return $ret;
}
}
return;
}
sub PPIx::Regexp::Element::xplain_start {
goto &PPIx::Regexp::Node::xplain_start
}
sub PPIx::Regexp::Node::xplain_start {
my( $self, %args ) = @_;
my $depth = $args{depth} || 0;
my @ret;
push @ret, 'address='. $self->address;
push @ret, $self->xplain_desc( ref $self );;
push @ret, $self->xplain_perl_version;;
#~ push @ret, 'address='. $self->address;
if( defined( my $ord = eval { $self->ordinal } ) ){
#~ my $unicode10 = Unicode::UCD::charinfo( $ord )->{unicode10} ;
my $unicode10 = unicode10( $ord );
#~ $unicode10 or warn pp( {crapola => Unicode::UCD::charinfo( $ord ) } ); ## 2013-08-18-04:12:49
push @ret,
#~ join ' aka ',
join ' alias ',
"ordinal= ord( chr( $ord ) )",
sprintf('\\N{U+%04.4X}', $ord ),
sprintf('\%03o', $ord ),
( $unicode10 ? $unicode10 : () ),
chr( $ord ),
;;;;;;;;;
}
if( not $args{no_mods} ){
if( $self->xmods_susceptible ){
push @ret, xplain_modifiers( $self );
}
push @ret, 'is_case_sensitive' if eval { $self->is_case_sensitive };
}
my $con = eval { $self->{content} };
my $xmods = eval { $self->xmods };
my $semantics = eval { $$xmods{match_semantics} };
#~ msixpodualgcer
my @cons = grep { $$xmods{$_} } qw{ m s i x p o d u a l g c e r };
if( $con and not $args{no_con_desc} ){
@cons = map { $con.$_ } grep { defined $_ } @cons, $semantics, '';;
#~ push @cons, ref($self).$con; ## TODO REFUDGE
#~ warn pp { con => $con, semantics => $semantics, cons => \@cons };
#~ 2013-08-11-19:51:28
#~ warn "\n\n", pp( [ qw/FUDGE REFUDGE /, con => $con, semantics => $semantics, cons => \@cons ],[]),"\n\n";
my @desc;
for my $con ( @cons ){
@desc = $self->xplain_desc( $con );;;
if( @desc ){
last;
}
}
@desc and push @ret, @desc;
}
return \@ret;
}
sub PPIx::Regexp::Element::address { goto &PPIx::Regexp::Node::address }
our $__rootaddr = 0;
sub PPIx::Regexp::Node::address {
my( $self, %args ) = @_;
my $addr = $$self{_xaddr} || '';
return $addr if $addr;
my @pops = $self;
my $prev = $self;
while( $prev = $prev->parent ){
unshift @pops, $prev;
}
my $root = shift @pops;
my $rootaddr = eval { $root->{__rootaddr}||=++$__rootaddr } ;
$rootaddr ||= sprintf(' 0+%x',$root);
#~ return join '',
$addr = join '',
'/'.$rootaddr,
( map {
my ( $method, $inx ) = $_->_my_inx();
$method = uc$1 if $method =~ m{^(.)};
"/$method$inx";
} @pops ), ' ';
#~ $$self{_xaddr}=$addr;
return $addr;
}
sub PPIx::Regexp::xplain {
my( $self, %args ) = @_;
$self->{xfailures}=0;
$self->xmods_propagate;
my $depth = $args{depth} || 0;
my @ret;
my $ret = { depth => $depth, start => [@ret], chits => \@ret };
undef @ret;
for my $child ( eval { $self->children } ){
if( eval{ $child->children } ){ #$haskids
push @ret, $child->xplain( %args, depth => $depth + 3 );
} else {
push @ret, $child->xplain( %args, depth => $depth + 2 ); ## GRRRRRR
}
}
{ ## the_ter_rible xfailures
my @ter = (
"my \$regstr = join '', ",
);;;
my $source = $self->source ;
my $sourceref = ref $source ;
$sourceref = '' if not defined $sourceref;
$source = '' if not defined $source;
if( $source ){
my $rr = $self->xplain_desc("the_regex");
if( $sourceref ){
$rr .= "($sourceref)";
my $con = $source->content;
$con or $con = $self->content;;
$con or $con = "";
$con or die dd $self;
$source = $con;
}
$rr .=":\n\n";
$rr .= $source;
$rr .="\n\n" . $self->xplain_desc("matches_as_follows")."\n";
$rr =~ s{^}{# }gm;
push @ter, split /\n/, $rr;
}
#~ if( my $fail = eval { $self->failures } ){
if( my $totalfail = eval { $self->failures + $self->{xfailures} } ){
push @ter, pp( $self );
#~ push @ret,"# failures=$fail";
my $fail = eval { $self->failures } ||0;
my $xfail = eval { $self->{xfailures} } ||0;
push @ter,"# failures=$totalfail == fail($fail) + xfail($xfail)";
push @ter, $self->xplain_desc("parsing_failures");
}
push @ter, join '', "#r: ", join ' / ', grep { defined $_ } ref( $self ), $sourceref;
push @ter, join '', "#r= ",Data::Dump::quote( $self->content );
push @ter, '# ';
unshift @{ $$ret{start} }, @ter;
}
if( my(@why) = xdodgy_unicode_override( $self ) ){
push @{$ret[-1]{start}}, map { $self->xplain_desc($_) } @why; ## yick
}
push @ret, { depth => $depth, start => [";;;;;;;;;;\n\n"], };
return $ret;;
} ## end of sub PPIx::Regexp::xplain
sub PPIx::Regexp::Node::parent_modifiers {
my( $self ) = @_;
my $root = $self->root;
my $kid = $root->last_element;
return eval { $kid->modifiers };
}
sub PPIx::Regexp::Element::root {
my( $self ) = @_;
my $root = $self;
while( my $newp = $root->_parent ){
$root = $newp ;
}
return $root;
}
sub PPIx::Regexp::Element::xplain_perl_version {
my( $self ) = @_;
my @ret;
if( my $val = eval { $self->perl_version_introduced } ){
#~ if ( $val ne '5.000' ){
if ( $val > 5.006 ){ ## otherwise its waaay too much :)
push @ret, "perl_version_introduced=$val";
}
}
if( my $val = eval { $self->perl_version_removed } ){
push @ret, "warning perl_version_removed =$val";
}
return @ret;
}
sub PPIx::Regexp::Structure::Replacement::xplain {
#~ push @_ , ( in_replacement => 1, hr_length => 6 );
push @_ , ( in_replacement => 1, );
goto &PPIx::Regexp::Structure::xplain;; ## cause it deletes start_con
}
sub PPIx::Regexp::Structure::xplain {
#~ push @_, ( hr_length => 6 );
my $ret = &PPIx::Regexp::Node::xplain;
delete $ret->{start_con};
#~ $$ret{start_hr} = join '', "# ", '-' x 6 ;
$$ret{start_hr} = join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
return $ret;
}
sub PPIx::Regexp::Structure::Quantifier::xn_comma_m { ## n_comma_m {n,m}
my( $self ) = @_;
my( $n, $comma, $m ) = map { eval { $_->{content} } } $self->children;
my @ncm = ('n',',','m'); # {n,m}
defined $n or $ncm[0]='';
defined $comma or $ncm[1]='';
defined $m or $ncm[2]='';
return $n, $comma, $m , @ncm;
}
sub PPIx::Regexp::Structure::Quantifier::xplain {
my( $self ) = @_;
push @_, ( no_con_desc => 1 ); #jic
my $ret = &PPIx::Regexp::Token::Quantifier::xplain;
my( $n, $comma, $m , @ncm ) = $self->xn_comma_m;
push @{$$ret{start}},
$self->xplain_desc( join('', '{',@ncm,'}'), 'n', 'm' ),
$self->xplain_desc( join('', '{',@ncm,'}'), $n, $m ),
'L<perlre/Quantifiers>',
;;;;;;;
return $ret;
}
sub PPIx::Regexp::Token::Quantifier::xplain {
my( $self, %args ) = @_;
my $ret = $self->PPIx::Regexp::Node::xplain( %args, no_elements => 1 );
my $pp = $self->preceding_pattern_address;;
my $ns = $self->next_sibling;
my $nsc = eval { $ns->{content} };
my $nsgreedy = eval { $ns->isa("PPIx::Regexp::Token::Greediness") };
if( not($nsgreedy) or ( $nsgreedy and $nsc ne '?') ){
push @{$$ret{start}}, $self->xplain_desc('most_possible');
}elsif( $nsc eq '?'){
push @{$$ret{start}}, $self->xplain_desc('least_possible');
}
my $ps = $self->previous_sibling;
if( eval { $ps->isa("PPIx::Regexp::Structure::Capture") } ){
my $number = eval { $ps->number } ;
my $name = eval { $ps->name };
my $only = "";
$number and $only .= qq{"\$$number"};
$name and $only .= qq{ or "\$+{$name}"};;
push @{$$ret{start}}, $self->xplain_desc('only_last_n', $only );
}
push @{$$ret{start}},
$pp ? $self->xplain_desc( 'm_pat_at_add' ) . $pp
: $self->xplain_desc( 'quant_f_not' ) ;;;
#~ $$ret{start_hr} = join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
#~ $nsgreedy or $$ret{start_hr} = join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
$$ret{start_hr} = $nsgreedy ? '' : join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
$ret;
}
sub PPIx::Regexp::Token::Greediness::xplain {
my( $self, %args ) = @_;
$args{no_con_desc}=1;
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
push @{$$ret{start}}, $self->xplain_desc( ref($self).$self->{content} ); ## grr
push @{$$ret{start}}, $self->xplain_desc( 'm_pat_at_add' ) . $self->preceding_pattern_address;
$ret;
}
sub PPIx::Regexp::Element::preceding_pattern_address {
my( $self, %args ) = @_;
#~ return eval { $self->previous_sibling->address }; ## its always this isn't it? TODO figure it out
## bug, for greediness previous_sibling is quantifier, so go again
my $ps = $self->previous_sibling;
if( $ps->isa('PPIx::Regexp::Token::Quantifier' ) ){
$ps = $ps->previous_sibling;
}
return eval { $ps->address };
}
sub PPIx::Regexp::Structure::Code::xplain {
my( $self, %args ) = @_;
$args{no_elements}=1;
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
my $type = $self->type->content; ## PPIx::Regexp::Token::GroupType::Code
my $key = "(${type}{ code })";
push @{$$ret{start}}, $self->xplain_desc( $key );
return $ret;
}
sub PPIx::Regexp::Token::Structure::xplain {
push @_, ( no_elements => 1 , no_mods => 1, fudgeme => 0, );
goto &PPIx::Regexp::Node::xplain;
}
sub PPIx::Regexp::Token::Code::xplain {
my( $self, %args ) = @_;
$args{no_elements} = 1 if $self->ancestor_isa('PPIx::Regexp::Structure::Replacement');
return $self->PPIx::Regexp::Node::xplain( %args );
}
#~ 2013-06-14-05:22:25
#~ TO??DO?? unicharproptoregexrange-unicode-regex-range-character-class-ucd.pl
sub PPIx::Regexp::Token::CharClass::Simple::xplain {
my( $self , %args ) = @_;
$args{no_mods}=1;
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
if( my $con = eval{$self->{content}} ){
if( $con =~ /^\\[pP]\{/ ){
push @{$$ret{start}}, "L<perluniprops/$con>";;; ## autogenerated
} else {
push @{$$ret{start}}, "L<perlrecharclass/$con>";;;
push @{$$ret{start}}, "L<perlrebackslash/$con>";;;
}
push @{$$ret{start}}, xplain_modifiers( $self, { GONERS => [qw/ i /] } );
}
$$ret{start_hr} = join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
return $ret;
}
sub PPIx::Regexp::Token::CharClass::POSIX::xplain {
push @_, ( no_con_desc => 0 );
goto &PPIx::Regexp::Token::CharClass::Simple::xplain;
}
sub PPIx::Regexp::Structure::CharClass::xplain {
my( $self ) = @_;
push @_, ( no_con_desc => 1 );
my $ret = &PPIx::Regexp::Structure::xplain;
if( $self->children > 1 ){
my $colons = join '', map { $_->content } $self->schild(0), $self->schild(-1) ;
if( $colons eq '::'){
#~ if( $colons eq '::' and $self->children > 1 ){
push @{$$ret{start}}, $self->xplain_desc('posix_inside');
}
}
return $ret;
}
sub PPIx::Regexp::Token::Interpolation::xplain {
#~ goto &PPIx::Regexp::Token::xplain ## start_content
my( $self ) = @_;
my $ret = &PPIx::Regexp::Token::xplain;
my $key = $self->ancestor_isa('PPIx::Regexp::Structure::Replacement')
? 'PPIx::Regexp::Token::Interpolation-Substitution'
: 'PPIx::Regexp::Token::Interpolation-Regexp' ;
push @{$$ret{start}}, $self->xplain_desc( $key );
return $ret;
}
sub PPIx::Regexp::Token::Literal::xplain { goto &PPIx::Regexp::Token::xplain; }
sub PPIx::Regexp::Token::xplain {
my( $self, %args ) = @_;
my $ret = $self->PPIx::Regexp::Node::xplain( %args, no_elements => 1, no_mods => 0, );
my $con = $self->{content};
if( $con =~ m{\\g\d+} ){
push @{$$ret{start}}, (
"TODO warn REPORT BUG \\g10 UNRECOGNIZED AS A PPIx::Regexp::Token::Backreference misparsed as PPIx::Regexp::Token::Literal (\\g10 is not \\10, can't be treated as octal)",
$self->xplain_desc('n_exist_group')
);
}
{ no warnings 'uninitialized';
if( $self->ancestor_isa('PPIx::Regexp::Structure::CharClass') and $con =~ m{^\\\d+$} and eval { $self->next_sibling->{content} eq '-' } ){
push @{$$ret{start}}, 'ERROR warn TODO REPORTBUG octals NOT PARSED AS PPIx::Regexp::Node::Range';
}
}
return $ret;
}
sub PPIx::Regexp::Token::Recursion::xplain {
#~ my( $self ) = @_;
#~ my $ret = &PPIx::Regexp::Node::xplain;
my( $self , %args ) = @_;
## guessing ## $ perl -le " warn int m{(lc(?i:(?1)|end)+)}smx for qw/ lcend lclcend lclcEND lcLCend /"
#~ $ perl -e " print int m{(lc(?i:(?1)|end)+)}smx for qw/ lcend lclcend lclcEND lcLCend /"
#~ 1110
$args{no_mods}=1;
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
my $absolute = eval { $self->absolute } ;
my $number = eval { $self->number } ;
if( $number ){
my $padd = eval{ $self->find_recursion_number( $absolute )->address };
if( my ( $direction ) = $number =~ /^([\-\+])/ ){
my $desckey = $direction eq '+' ? 'mnext_nth_capture' : 'mprev_nth_capture';
push @{$$ret{start}}, $self->xplain_desc( $desckey, $number );
}
$absolute and push @{$$ret{start}}, $self->xplain_desc('match_the_capture', ($absolute) x 3 );
if( $padd ){
push @{$$ret{start}}, $self->xplain_desc('m_recur_ata').$padd;
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group')
}
}
if( my $name = eval { $self->name } ){
push @{$$ret{start}}, 'name='.$name.join(' alias ','', '"(?&'.$name.')"', '"(?P>'.$name.')"' );
my $padd = eval{ $self->find_recursion_number( $name )->address };
if( $padd ){
push @{$$ret{start}}, $self->xplain_desc('m_recur_ata').$padd;
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group')
}
}
return $ret;
}
sub PPIx::Regexp::Token::Backreference::xplain {
my( $self ) = @_;
push @_, ( no_mods => 0 ); ## susceptible except /x
my $ret = &PPIx::Regexp::Node::xplain;
my $absolute = eval { $self->absolute } ;
my $number = eval { $self->number } ;
if( $number ){
my $padd = eval{ $self->find_recursion_number( $absolute )->address };
if( my ( $direction ) = $number =~ /^([\-\+])/ ){
my $desckey = $direction eq '+' ? 'mnext_nth_capture' : 'mprev_nth_capture';
push @{$$ret{start}}, $self->xplain_desc( $desckey, $number );
}
$absolute and push @{$$ret{start}}, $self->xplain_desc('match_the_capture', ($absolute) x 3 );
if( $padd ){
push @{$$ret{start}}, $self->xplain_desc('m_recur_ata').$padd;
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group');;
}
}
if( my $name = eval { $self->name } ){
push @{$$ret{start}}, 'L<perlre/(?P=NAME)>';
#~ push @{$$ret{start}}, 'L<perldebguts/NREF>'; ## TODO?? NREFF? NREFFL? NREFFU? NREFFA? unlinkable
push @{$$ret{start}}, 'MATCH "\\g{'.$name.'}"'.join(' alias ','', '"\\k<'.$name.'>"', '"(?&'.$name.')"', '"(?P>'.$name.')"' );
if( my $capture = $self->find_recursion_number( $name ) ){
push @{$$ret{start}}, $self->xplain_desc('m_recur_ata').$capture ->address;
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group');
}
}
return $ret;
}
sub PPIx::Regexp::Token::Condition::xplain {
#~ push @_, ( no_con_desc => 0 ); ## want it, its not no doubling
#~ my( $self ) = @_;
#~ my $ret = &PPIx::Regexp::Node::xplain;
my( $self , %args ) = @_;
$args{no_mods}=1; ## 2013-08-05-04:38:50
delete $args{no_con_desc}; ## 2013-08-11-19:19:21
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
my $absolute = eval { $self->absolute } ;
my $number = eval { $self->number } ;
my $name = eval { $self->name } ;
my $con = eval { $self->content } ;
my $check_prefix = $self->xplain_desc('check_prefix');
if( my( $left, $right) = $con =~ m{
^
\(
( # $1
<
|
'
|
R\&?
)? ## first alteration is optional
( # $2
[^\)]*
)
\)
}x ){
my $gotr = defined $left ? ( $left =~ /^R/ ) : 0 ;
my $desc = join '',
( $gotr ? $left : () ),
( $number ? 'n' : () ),
( $name ? 'NAME' : () ),
;;;;;;;;;;;;;
if($name and not $gotr) {
$desc = "(<$desc>)" ;
} else {
$desc = "($desc)";
}
push @{$$ret{start}}, $self->xplain_desc( $desc );
}
if( $number ){
my $padd = eval{ $self->find_recursion_number( $absolute )->address };
#~ 2013-08-11-20:40:29 apparently this will never happen because (?(1)) is legal but (?(-1)) is not legal
if( my ( $direction ) = $number =~ /^([\-\+])/ ){
my $desckey = $direction eq '+' ? 'cnext_nth_capture' : 'cprev_nth_capture';
push @{$$ret{start}}, $self->xplain_desc( $desckey, $number );
}
$absolute and push @{$$ret{start}}, $self->xplain_desc('check_the_capture', ($absolute) x 3 );
if( $padd ){
push @{$$ret{start}}, $self->xplain_desc('cm_recur_ata', $padd );
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group')
}
}
if( $name ){
push @{$$ret{start}}, 'L<perlre/(?(condition)yes-pattern)>';
#~ push @{$$ret{start}}, $check_prefix.'"\\g{'.$name.'}"'.join(' aka ','', '"(?&'.$name.')"', '"(?P>'.$name.')"' );
push @{$$ret{start}}, $self->xplain_desc('check_n_capture', ($name) x 3 );
if( my $capture = $self->find_recursion_number( $name ) ){
push @{$$ret{start}}, $self->xplain_desc('cm_recur_ata', $capture ->address );
} else {
push @{$$ret{start}}, $self->xplain_desc('n_exist_group');
}
}
if( $con eq '(DEFINE)'){
## grr #~ Can't locate object method "find" via package "PPIx::Regexp::Token::Condition"
#~ if( not $self->find(sub{ return 1 if $_[0]->isa('PPIx::Regexp::Structure::NamedCapture'); return 0; }, ) ){
#~ if( not $self->find_first(sub{ return 1 if $_[0]->isa('PPIx::Regexp::Structure::NamedCapture'); return 0; }, ) ){
### belongs in parent?
if( not $self->parent->find_first(sub{ return 1 if $_[0]->isa('PPIx::Regexp::Structure::NamedCapture'); return 0; }, ) ){
push @{$$ret{start}}, $self->xplain_desc('(DEFINE)pointless');
}
}
return $ret;
} ## end of fudgy sub PPIx::Regexp::Token::Condition::xplain
sub PPIx::Regexp::Element::find_recursion_number {
goto &PPIx::Regexp::Node::find_recursion_number
}
sub PPIx::Regexp::Node::find_recursion_number {
my( $self, $n ) = @_;
$self->root->find_first( sub {
my $type = $_[1]->isa('PPIx::Regexp::Structure::NamedCapture')
|| $_[1]->isa('PPIx::Regexp::Structure::Capture')
|| $_[1]->isa('PPIx::Regexp::Token::GroupType::NamedCapture')
;;;
if( $type ){
if( my $number = eval{$_[1]->number} ){
return 1 if $number eq $n ;## found a good one
}
if( my $name = eval{$_[1]->name} ){
return 1 if $name eq $n ;## found a good one
}
}
return 0; ## keep searching
}, );
}
sub PPIx::Regexp::Structure::Capture::xplain {
my( $self ) = @_;
my $ret = &PPIx::Regexp::Node::xplain;
my $number = eval { $self->number } ;
my $name = eval { $self->name };
$name and $name = 'name='.$name.join(' alias ','', '"\\g{'.$name.'}"', '"\\k<'.$name.'>"', '"(?&'.$name.')"', '"(?P>'.$name.')"' , qq{"\$+{$name}"} );;
$number and $number = 'number='.$number.' alias "$'.$number.'" or "\\'.$number.'"';
$number and push @{$$ret{start}}, $number ;
$name and push @{$$ret{start}}, $name;
delete $ret->{start_con};
$$ret{start_hr} = join '', "# ", '-' x ( 2 * $$ret{depth} ) ;
## the fudgyness goes on
$number and push @{$ret->{chits}[-1]{start}}, $self->xplain_desc('eo_grouping', $number );
$name and push @{$ret->{chits}[-1]{start}}, $self->xplain_desc('eo_grouping', $name );
return $ret;
}
sub PPIx::Regexp::Element::xplain { goto &PPIx::Regexp::Node::xplain }
sub PPIx::Regexp::Node::xplain {
my( $self, %args ) = @_;
my $depth = $args{depth} || 0;
my $start = $self->xplain_start( %args );
my @chits;
my $ret = { depth => $depth, start => $start , chits => \@chits };
$$ret{start_hr} = join '', "# ", '-' x ( $args{hr_length} || 66 );
$$ret{start_con} = Data::Dump::pp( $self->content ).',';
if( $args{no_elements} ){
delete $$ret{chits};
return $ret;
}
for my $start ( eval { $self->start } ){
push @chits, $start->xplain( %args, depth => $depth );
}
for my $type ( eval { $self->type } ){
if( my @exp = $type->xplain( %args, depth => $depth + 1 ) ){
push @chits, @exp;
}
}
for my $child ( eval { $self->children } ){
if( eval{ $child->children } ){ #$haskids
push @chits, $child->xplain( %args, depth => $depth + 3 );
} else {
push @chits, $child->xplain( %args, depth => $depth + 2 );
}
}
for my $finish ( eval { $self->finish } ){
push @chits, $finish->xplain( %args, depth => $depth );
}
if( not @$ret{chits} ){
delete $$ret{chits};
}
return $ret;
} ## end of sub PPIx::Regexp::Node::xplain
sub PPIx::Regexp::Structure::Modifier::xplain {
push @_, no_mods => 1;
goto &PPIx::Regexp::Structure::xplain
}
sub PPIx::Regexp::Token::GroupType::Modifier::xplain {
my( $self , %args ) = @_;
my $ret = $self->PPIx::Regexp::Node::xplain( %args, no_elements => 1 , no_mods => 0 );
if( my @mods = eval { $self->modifiers } ){ ## yick
my %mods = @mods; %mods and push@{$$ret{start}}, join(' ', 'mods(', map { " $_ = $mods{$_} "} keys%mods ).')';;
my @exp = xplain_modifiers( $self );
@exp and push @{$$ret{start}}, @exp;
}
##fudgy
return $ret;
}
sub PPIx::Regexp::Token::Modifier::xplain {
my( $self , %args ) = @_;
delete $args{no_mods};
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
push @{$$ret{start}}, xplain_modifiers( $self );
my $con = eval{ $self->{content}};
if( length $con and $self != $self->root->modifier and $con =~ m{^\(.+\)$}sm ){
push @{$$ret{start}}, $self->xplain_desc( "token_modifier_propagates_right" );; ## 2013-07-30-03:02:24 TODO MORE RET KEYS
}
$ret;
}
sub PPIx::Regexp::Token::Operator::xplain {
my( $self , %args ) = @_;
$args{no_elements} = 1;
$args{no_mods} = 1;
$args{no_con_desc} = 1;
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
my @desc;
if( @desc = eval { $self->xplain_desc( ref($self->parent). $self->{content} ) } )
{
push @{$$ret{start}}, @desc ;
my $refpc = ref($self->parent). $self->{content};
my ( $method, $inx ) = $self->_my_inx();
if( $refpc eq "PPIx::Regexp::Structure::CharClass".'^' and $method ne 'type' ){
push @{$$ret{start}}, "ERROR warn TODO REPORT BUG THIS IS LITERAL ^ NOT NEGATION";
}
if( $refpc eq "PPIx::Regexp::Structure::CharClass".'^'
and $method eq 'type'
and eval { $self->parent->child( 0 )->content eq ']' }
){
push @{$$ret{start}}, "ERROR warn TODO REPORT BUG A LONE ^ IN A CHARCLASS IS A IS LITERAL ^ NOT NEGATION";
}
} elsif( $self->ancestor_isa('PPIx::Regexp::Structure::RegexSet') and @desc = eval { $self->xplain_desc( 'regexset.'.$self->{content} ) } )
{
push @{$$ret{start}}, @desc ;
} elsif( @desc = eval { $self->xplain_desc( $self->{content} ) } )
{
push @{$$ret{start}}, @desc ;
}
return $ret;
}
sub PPIx::Regexp::Element::ancestor_isa {
my( $self, $type ) = @_;
my $root = $self;
while( my $newp = $root->_parent ){
return 1 if $type eq ref $newp;
$root = $newp ;
}
return 0;
}
sub PPIx::Regexp::Token::Backtrack::xplain {
my( $self ) = @_;
push @_, ( no_con_desc => 1 ); ## no doubling
my $ret = &PPIx::Regexp::Node::xplain;
my( $con ) = $ret->{start_con} =~ m{ \( ( [^\)]+ ) \) }x;
my( $verb, $arg ) = split /\:/, $con;
if( $verb and $arg ){
if( my @desc = $self->xplain_desc( "($verb:NAME)" ) ){
push @{$$ret{start}}, @desc;
} else {
push @{$$ret{start}}, $self->xplain_desc( "(*UNKNOWN:NAME)", $verb, $arg ) ;
}
}
if( $verb ){
if( my @desc = $self->xplain_desc( "($verb)" ) ){
push @{$$ret{start}}, @desc;
} else {
push @{$$ret{start}}, $self->xplain_desc( "(*UNKNOWN)", $verb ) ;
}
}
$ret;
}
sub PPIx::Regexp::Token::Unknown::xplain {
my( $self ) = @_;
my $ret = &PPIx::Regexp::Node::xplain;
push @{$$ret{start}}, $self->xplain_desc( ref($self).$self->{content} ); ## grr
if( '?' eq $self->{content} ){
push @{$$ret{start}}, $self->xplain_desc( 'quant_f_not' ); ## meh
}
return $ret;
}
sub PPIx::Regexp::Token::GroupType::Switch::xplain {
push @_, ( no_con_desc => 1 ); ## no doubling
goto &PPIx::Regexp::Node::xplain;
}
sub PPIx::Regexp::Token::Delimiter::xplain {
push @_, ( no_con_desc => 1 ); ## s\\\sex \ with parent modifiers becomes \s
goto &PPIx::Regexp::Node::xplain;
}
#~ $ perl -Mre=debug -wle " qr/[a-z]/i"
#~ $ perl lolabe.pl -t -ddr " qr/[a-z]/i"
#~ $ perl lolabe.pl -t " qr/[a-z]/i"
sub unicode10 {
my $it = Unicode::UCD::charinfo( $_[0] );
my $uc10 = $it->{unicode10} || $it->{name};
return $uc10;
}
sub PPIx::Regexp::Node::Range::xplain {
#~ my( $self ) = @_;
#~ my $ret = &PPIx::Regexp::Node::xplain;
my( $self , %args ) = @_;
$args{no_elements} = 1; ## doesn't interfere with wxPPI
my $ret = $self->PPIx::Regexp::Node::xplain( %args );
my( $left, $right ) = map{ $self->schild($_)->ordinal } 0 , -1;
#~ push @{$$ret{start}}, sprintf("code points %d to %d", $left, $right );
push @{$$ret{start}}, sprintf("code points ord(chr( %d )) to ord(chr( %d ))", $left, $right );
#~ push @{$$ret{start}}, sprintf("code points \\%03o to \\%03o", $left, $right );
#~ push @{$$ret{start}}, sprintf("code points '\\N{U+%04.4X}' to '\\N{U+%04.4X}'", $left, $right );
push @{$$ret{start}}, sprintf('code points "\N{U+%04.4X}" to "\N{U+%04.4X}"', $left, $right );
$left = unicode10( $left );
$right = unicode10( $right );
if( $left and $right ){
push @{$$ret{start}}, sprintf('characters "%s" to "%s"', $left, $right );
}
$ret;
}
#~ sub PPIx::Regexp::Token::Comment::xplain { goto &PPIx::Regexp::Token::Whitespace::xplain }
sub PPIx::Regexp::Token::Comment::xplain {
my( $self, %args ) = @_;
$args{no_con_desc}=1;
my $ret = $self->PPIx::Regexp::Token::xplain( %args );
}
sub PPIx::Regexp::Token::Whitespace::xplain {
#~ return Data::Dump::pp( $_[0]->content ).','; ## content
return { start_con => Data::Dump::pp( $_[0]->content ).',' }; ## content
}
sub PPIx::Regexp::Element::xmods { goto &PPIx::Regexp::Node::xmods }
sub PPIx::Regexp::Node::xmods {
my $xmods = $_[0]->{xmods};
return $xmods
? wantarray
? %{ $xmods }
: $xmods
: ();
}
sub PPIx::Regexp::Element::xmods_susceptible { goto &PPIx::Regexp::Node::xmods_susceptible }
sub PPIx::Regexp::Node::xmods_susceptible {
use List::Util();
return List::Util::first {
$_[0]->isa($_)
} qw/ PPIx::Regexp::Token::Literal
PPIx::Regexp::Token::Reference
PPIx::Regexp::Token::CharClass
PPIx::Regexp::Token::Interpolation
PPIx::Regexp::Token::Assertion
/
;;;;
}
#~ 2013-07-26-03:43:03
#~ sub PPIx::Regexp::is_qr { goto &PPIx::Regexp::is_compile }
#~ sub PPIx::Regexp::is_compile { return !! $_[0]->root->child( 0 )->content eq 'qr' }
#~ sub PPIx::Regexp::is_substitute { return !! $_[0]->root->replacement }
#~ sub PPIx::Regexp::is_match { return !( $_[0]->is_substitute && $_[0]->is_compile ) }
#~ #~ is_compile { eval { $_[0]->root->source->isa('PPI::Token::QuoteLike::Regexp') } }
#~ #~ is_match { eval { $_[0]->root->source->isa('PPI::Token::Regexp::Match') } }
#~ #~ is_substitute { eval { $_[0]->root->source->isa('PPI::Token::Regexp::Substitute') } }
#~ sub PPIx::Regexp::is_compile { return !! $_[0]->child( 0 )->content eq 'qr' }
sub PPIx::Regexp::is_compile { return !! $_[0]->type->content eq 'qr' }
sub PPIx::Regexp::is_substitute { return !! $_[0]->replacement }
sub PPIx::Regexp::is_match { return !( $_[0]->is_substitute && $_[0]->is_compile ) }
#~ sub PPIx::Regexp::xtype {
#~ return $_[0]->is_substitute
#~ ? 's'
#~ : $_[0]->is_compile
#~ ? 'qr'
#~ : 'm'
#~ ;;;;;;;;
#~ }
sub PPIx::Regexp::xtype {
return
$_[0]->is_substitute ? 's'
: $_[0]->is_compile ? 'qr'
: 'm';
}
#~ 2013-07-28-16:31:40
#~ (?see-n) would return
#~ my( $modorder, $modcount ) = [ qw/ s e e n / ], { s => 1 , e => 2, n => 0 }
#~
#~ if( exists $modcount->{e} and my $count = $modcount->{e} ){
#~ ## we saw it, and it was on this many times
#~ }
#~
#~
sub PPIx::Regexp::Token::Modifier::xmods_explode {
my $notroot = int eval { $_[0] != $_[0]->root->modifier };
my $con = $_[0]->{content};
my @mods;
my %mods;
$con =~ s{
^
(?:
\Q(?\E
|
\Q?\E
)
|
(?:
\Q:\E
|\Q)\E
|\Q:)\E
) $
}{}gx;
my( $onners, $offers ) = ( split( '-', $con ), '','' );
my @onners = $onners =~ m/(.)/g;
my @offers = $offers =~ m/(.)/g;
$mods{$_}++ for @onners;
#~ delete @mods{@offers} ; ## OFFERS TRUMP ONNERS
$mods{$_}=0 for @offers ; ## OFFERS TRUMP ONNERS
@mods = ( @onners, @offers );
if( $notroot ){
if( my @goners = grep { exists $mods{ $_ } } '?', ':', '(', ')', ){ ## JIC
delete @mods{ @goners } ;
$_[0]->root->{xfailures}+=int@goners ; ## GRR
}
}
if( ( my @two = grep { $mods{$_} } qw/ a d l u / ) > 1 ){
$_[0]->root->{xfailures}+=int@two; ## GRR
}
return \@mods, \%mods;
}
### walks tree, can->modifiers
sub PPIx::Regexp::xmods_propagate {
my( $node , $depth, $xmods ) = @_;
$depth ||= 0;
$xmods ||= {};
if( $node->isa('PPIx::Regexp') ){
my($arraymods, $hashmods ) = $node->modifier->xmods_explode;
%{$xmods} = %{$hashmods};
$node->root->{xmods} = {%{$xmods}}; ## markit
delete $xmods->{o}; ## do not propagate
PPIx::Regexp::xmods_propagate( $node->regular_expression, 0, $xmods );
} else {
my @kids = eval { $node->elements };
if( not @kids ){
@kids = map { eval { $node->$_ } } qw{ start type children finish };
}
for my $kid ( @kids ){
if( $kid->can('modifiers') ){
my( $arraymods, $hashmods ) = $kid->xmods_explode;
my %newmods = $kid->xmerge_mods( $xmods, $hashmods );
if( $kid->isa( 'PPIx::Regexp::Token::GroupType::Modifier' ) ){
$xmods = \%newmods; ### propagate to children (?i:)
} else { ## propagate to siblings (?i)
%{$xmods} = $kid->xmerge_mods( $xmods, $hashmods );
}
#~ if( my $p = delete $xmods->{p} ){ ## TODO NOT WORKING
#~ $node->root->{xmods}{p} = $p; ## cause (?p)/p is global, propagates UP
#~ }
}
if( $kid->xmods_susceptible ){
delete $xmods->{o}; ## do not propagate, mods.o, error but not UNKNOWN , grrr
$kid->{xmods}={%{$xmods}};
}
PPIx::Regexp::xmods_propagate( $kid , $depth + 1, $xmods );
}
}
return;
}
###
sub PPIx::Regexp::Element::xmerge_mods {
my( $self , $old, $new ) = @_;
my %mods = %$old;
while( my( $mod, $on ) = each %$new ){
if( $on ){
$mods{$mod} = $on;
} else {
#~ $mods{$mod} = !!0; #off
$mods{$mod} = 0; #off
}
}
if( my $ms = $mods{match_semantics} ){
$mods{ $ms } ||= 1;
}
### ^ a d l u p match_semantics proper merging propagation
### if new ones, nuke existing ones
if( my @mss = grep { $new->{ $_ } } qw/ ^ a d l u / ){
delete @mods{ qw/ ^ a d l u / };
for my $ms ( @mss ){
if( my $msv = $new->{ $ms } ){ ## and propagate new one (cause ONE)
$mods{ $ms } = $msv;
#~ $mods{ match_semantics } = $ms;
$mods{ match_semantics } = $ms eq '^' ? 'd' : $ms;
last;
}
}
}
return wantarray ? %mods : \%mods;
}
## for ->can('modifiers') explodes {content} and explains all options, even the mistakes
## for non-can-modifiers explains xmods, but not UNKNOWNS (mistakes without descriptions)
## increments xfailures (grr)
sub xplain_modifiers {
my( $self , $OPTS ) = @_;
undef $OPTS if not ref $OPTS;
my @ret;
my $can_modifiers = $self->can('modifiers');
my $specialEEE = 0;
my %seen;
my %mods = eval { $self->xmods };
my @mods = keys %mods;
if( !%mods and $can_modifiers ){
my( $arraymods, $hashmods ) = $self->xmods_explode;
@mods = @$arraymods;
%mods = %$hashmods;
}
#~ (?adlupimsx-imsx) <<< perlre
#~ (?^alupimsx) <<< perlre
#~ (?^msixp..ual) <<< perlre
#~ (?msixp.dual-imsx) <<< perlre
#~ qr/STRING/msixpodual <<< perlop
#~ m/PATTERN/msixpodualgc <<< perlop
#~ m?PATTERN?msixpodualgc <<< perlop
#~ s/PATTERN/REPLACEMENT/msixpodualgcer <<< perlop
my @prefix = ( 'mods.', 'match_semantics.' ); ## for inline, for (?adlupimsx-imsx)
if( $self == eval { $self->root->modifier } ){
if( $self->root->is_substitute ){
@prefix = ( 'mods/s/', 'mods/', 'match_semantics.' , ); # s///eee
$specialEEE ++;
} else {
@prefix = ( 'mods/', 'match_semantics.', ); ## m//x qr//x
}
} else {
#~ delete @mods{qw/ p /}; ## (?p) is global like /p is global in 5.16, so JIC don't propagate this past root
#~ 2013-07-30-03:13:33 mistake is mistake, propagate it, don't explain it except in PPIx::Regexp::Token::Modifier
}
delete $mods{match_semantics}; ## not used by xplain_modifiers
if( grep { $self->isa($_) } qw/ PPIx::Regexp::Token::Literal PPIx::Regexp::Token::CharClass / ){
delete @mods{qw/ p m s x /};
## LITERALS are susceptible to ^ a d l u for case-insensitivity BUT NOT p m s x
}
#~ if( grep { $self->isa($_) } qw/ PPIx::Regexp::Token::CharClass::Simple / ){
#~ delete @mods{qw/ i /}; ## \d \w aren't susceptible to case , never case sensitive
#~ }
#~ \p{Latin} is susceptible, grrr
#~ \p{Latin} is perl_version_introduced=5.006001
#~ BUT \p{Latin} currently doesn't get called modifiers
#~ 2013-07-22-03:29:41 GUESSING!!!!
if( grep { $self->isa($_) } qw/ PPIx::Regexp::Token::Recursion / ){
#~ delete @mods{qw/ p match_semantics /};
#~ 2013-08-03-16:40:48
#~ $ perl -e " print int m{(lc(?i:(?1)|end)+)}smx for qw/ lcend lclcend lclcEND lcLCend /"
#~ 1110
undef %mods;
}
#~ 2013-08-03-16:43:40
## susceptible to /i , not /x, cause its a string-literal not a pattern
#~ $ perl -e " print int m{(lc)(?i:\1)} for qw/ lclc lcLC / "
#~ 11
### might interact somehow with a/aa that needs explaining
if( grep { $self->isa( $_ ) } qw/ PPIx::Regexp::Token::Backreference / ){
delete @mods{qw[ x m s p a d l u ]};
}
delete @mods{ eval{ @{ $OPTS->{GONERS} } } }; ###### m/\w/i
@mods = grep { exists $mods{$_ } } @mods;
### THE LEGITIMATE DOUBLES (/aa, /ee)
if( my $count = $mods{a} ){
if( $count > 1 ){
push @ret, $self->xplain_desc("match_semantics.aa" ); ## TODO more
}
}
if( $specialEEE and $can_modifiers and $mods{e} and 1 != ( my $count = $mods{e} ) ){
push @ret, $self->xplain_desc( 'mods/s/ee', $count-1 );
}
MODSLOOP:
for my $mod ( @mods ){
my $count = $mods{ $mod } ;
my $sufix = $count ? $mod : '-'.$mod;
next if not defined $count; ## exists $mods{ $mod } ## cause /(?-x:i)/x
for my $prefix( @prefix ){
next MODSLOOP if $seen{$mod};
if( my @desc = $self->xplain_desc( "$prefix$sufix" ) ){
push @ret, @desc;
$seen{$mod}++;
next MODSLOOP;
}
}
if( $can_modifiers and !exists$seen{$mod} ){
$self->root->{xfailures} ++;
push @ret, $self->xplain_desc( "$prefix[-2]unknown", $sufix, $sufix ); ## ICK!!!
}
} ## end MODSLOOP
if( $can_modifiers ){
for my $twice ( qw/ d l u / ){
if( exists $mods{$twice} and $mods{$twice} > 1 ){
push @ret, $self->xplain_desc("mods.nottwice", $twice ); ## perlbug ## $ perl -e " m/(?ad)/ "
}
}
if( $mods{a} ){
if( $mods{a} > 2 or int( grep { exists $mods{$_} } qw/ d l u / ) ){
push @ret, $self->xplain_desc("mods.twicemax", "a" ); ## $ perl -e " m/(?aaa)/ "
}
}
if( ( my @two = grep { $mods{$_} } qw/ a d l u / ) > 1 ){
$self->root->{xfailures}++; ## YUCK
while( @two > 1 ){
push @ret, $self->xplain_desc("mods.exclusive", @two[0,1] );
splice @two, 1,1;
}
}
}
return @ret;
} ## end of sub xplain_modifiers
#~ 2013-08-03-16:59:33
#~ todo sub ... xis_variable determines if fixed width pattern or variable
#~ todo sub ... xlength ditches structure/modifiers to return length of literals and charclasses
#~ todo sub ... xsets nodes/groups literals seperated by operators
#~ todo sub ... xis_fixed_width determines if fixed width
#~ todo sub ... xis_variable_width
#~ todo sub ... quantized
#~ todo sub ... quantified group quantified nodes, establish children/finish quantifier relationship,
#~ no extra indentation levels, no address changes???
#~ todo PPIx::Regexp::Structure::Quantized
#~ todo PPIx::Regexp::Structure::String (2+ literal tokens)
#~ todo PPIx::Regexp::Structure::Literals (2+ literal tokens)
#~ 2013-08-05-01:31:56 false positive on /(?<!a|(i:a))/
#~ 2013-08-11-03:08:48 false positive no more
sub PPIx::Regexp::Node::xis_fixed_width {
my( $self ) = @_;
my $is_variable = 0;
#~ $is_variable++ if $self->find_first( sub { return 1 if $_[1]->isa('PPIx::Regexp::Token::Quantifier') ; }, );
my $problems = $self->find( sub {
return 1 if grep { $_[1]->isa( $_ ) } qw/
PPIx::Regexp::Token::Quantifier
PPIx::Regexp::Structure::Quantifier
PPIx::Regexp::Token::Reference
/;
return 0;
},
);
$problems ||= [];
for my $prob ( @$problems ){
if( $prob->can('xn_comma_m') ){
my( $n, $comma, $m , @ncm ) = $prob->xn_comma_m;
if( $n != $m ){
$is_variable++;
}
} else {
$is_variable++;
}
}
$is_variable and return ! $is_variable; ## SAVE SOME WORK
#~ perl lolabe.pl -t -ddr " m/(?<!a|aa)E/" >2
#~ perl lolabe.pl -t -ddr " m/(?<!a?)E/" >2
#~ perl lolabe.pl -t -ddr " m/(?<!a{2,3})E/" >2
#~ perl lolabe.pl -t -ddr " m/(?<!a|aa)E/; m/(?<!a?)E/; m/(?<!a{2,3})E/; m/(?<=a|(?i:a)|a)/" >2
#~ perl lolabe.pl -t -ddr " m/(?<=a|(?i:a)|a)/" >2
#~ perl lolabe.pl -t -ddr " m/\d++\d{1,2}(?<=a|(?i:a)|a)/" >2
#~ perl lolabe.pl -t -ddr " m/(?<!a|(?-i:(?i:a)))(?<=a|(?i:a))/" >2
#~ 2013-08-11-02:29:24
#~ variable PPIx::Regexp::Token::Reference #~ $ perl -Mre=debug -wle "m/(.)(?<=a|\1)/"
#~ 2013-08-11-02:44:30 2013-08-11-03:04:35 gah incremental
#~ perl lolabe.pl -t -ddr " m/(?<!a|(?-i:(?i:a)))(?<=a|(?i:a))/" >2
#~ 2013-08-11-03:19:56
#~ typo #~ perl lolabe.pl -t -ddr " m/(?<!a|(?-i:(?i:(?[a]))))(?<=a|(?i:a))/" >2
#~ #~ perl lolabe.pl -t -ddr " m/(?<!a|(?-i:(?i:(?[[a]]))))(?<=a|(?i:a))/" >2
#~ #~ perl lolabe.pl -t -ddr " m/(?<!a|(?-i:(?i:(?[\w]))))(?<=a|(?i:a))/" >2
#~ #~ perl lolabe.pl -t -ddr " m/(?<!aa|(?[[a]])(?-i:(?i:(?[\w]))))(?<=a|(?i:a))/" >2
#~ 2013-08-11-03:40:23
#~ #~ perl lolabe.pl -t -ddr " m/(?<!aaa|\w(?[[a]])(?-i:(?i:(?[\w]))))(?<=a|(?i:a))/" >2
my @lengths = ( 0 );
#~ for my $kid ( $self->elements ){
for my $kid ( $self->children ){
if( $kid->isa('PPIx::Regexp::Token::Operator') ){ ## assume alteration
push @lengths, 0;
} else {
#~ $lengths[-1] += length( $kid->content );
$lengths[-1] += length( $kid->content ) - xstf_length( $kid );
#~ my $lc = length( $kid->content ) ;
#~ my $xc = xstf_length( $kid );
#~ $kid->{xlc}=$lc;
#~ $kid->{xxc}=$xc;
#~ $lengths[-1] += $lc - $xc;
#~ warn pp { damn =>scalar( $kid->content), lc => $lc , xc => $xc } ;
#~ warn pp( my $c = $kid->content );
#~ $lengths[-1] += length( $c );
}
}
$self->{xxls}=[@lengths];
#~ warn pp\@lengths;
while( @lengths > 1 ){
if( $lengths[-1] != $lengths[-2] ){
$is_variable++;
}
pop @lengths;
}
## grr, too many tokens can_be_quantified
#~ PPIx::Regexp::Token::Structure can_be_quantified even though its structural
#~ PPIx::Regexp::Token::Operator can_be_quantified even though its an OPERATOR
return ! $is_variable;
}
sub PPIx::Regexp::Structure::Assertion::xplain {
my( $self, %args ) = @_;
delete $args{no_elements};
my $ret = $self->PPIx::Regexp::Structure::xplain( %args );
if( not $self->xis_fixed_width ){
push @{$$ret{start}}, $self->xplain_desc('errn'.$self->type->content);
}
return $ret;
}
#~ dodgy , force unicode if: 1) string is utf8 2) pattern is utf8 3) pattern mentions codepoint above 255 4)pat uses unicode name \N{} 5)pat uses unicode property \p{} 6) pat uses RegexSet (?[ ])
#~ force unicode semantics if
#~ #~ 1 the target string is encoded in UTF-8; or
#~ #~ 2 the pattern is encoded in UTF-8; or
#~ #~ 3 the pattern explicitly mentions a code point that is above 255 (say by \x{100} ); or
#~ #~ 4 the pattern uses a Unicode name (\N{...} ); or
#~ #~ 5 the pattern uses a Unicode property (\p{...} ); or
#~ #~ 6 the pattern uses (?[ ])
sub xdodgy_unicode_override {
my( $self ) = @_;
#~ perl lolabe.pl -t -ddr "m/\pN\p{N}\x{100}\N{Kelvin}/d" >2
my $unicode = 0;
my %why;
$self->find( sub {
#~ $unicode += grep { $_[1]->isa( $_ ) } qw/ PPIx::Regexp::Structure::RegexSet / ;
if( $_[1]->isa('PPIx::Regexp::Structure::RegexSet') ){
$unicode++;
$why{'dodgy-u-rset'}++;
}elsif( $_[1]->isa('PPIx::Regexp::Token::Literal') ){
my $ord = $_[1]->ordinal;
#~ if( not defined $ord or $ord > 255 or ( $_[1]->{content} =~ m{^\\[pPN]} ) ){
if( defined $ord and $ord > 255 ){
$unicode++;
#~ push @why, 'dodgy-u-255';
$why{'dodgy-u-255'}++;
}elsif( $_[1]->{content} =~ m{^\\[pP]} ){
$unicode++;
$why{'dodgy-u-prop'}++;
}elsif( $_[1]->{content} =~ m{^\\[N]} ){
$why{'dodgy-u-name'}++;
$unicode++;
}
}elsif( $_[1]->isa('PPIx::Regexp::Token::CharClass::Simple') ){
if( my $con = eval{$self->{content}} ){
if( $con =~ /^\\[pP]\{/ ){
$why{'dodgy-u-prop'}++;
$unicode++;
}
}
}
return 0;
} );
## pattern encoded in UTF-8 check
#~ return $unicode ;
return keys %why;
}
1;
sub PPIx::Regexp::Token::GroupType::NamedCapture::xplain {
my( $self, %args ) = @_;
return {
depth => $args{depth},
start => [
'address='. $self->address,
$self->xplain_desc( ref $self),
],
start_con => Data::Dump::pp( $self->content ).',',
start_hr => join '', "# ", '-' x ( $args{hr_length} || 66 ),
};
}
#~ 2013-08-11-01:50:27
#~ PPIx::Regexp::Token::Structure
#~ PPIx::Regexp::Token::GroupType
#~ sub is_start { $_[0]->address =~ m{/[S]\d+$}i }
#~ sub is_type { $_[0]->address =~ m{/[T]\d+$}i }
#~ sub is_finish { $_[0]->address =~ m{/[F]\d+$}i }
#~ 2013-08-11-01:57:59
## cant use Find because of PPIx::Regexp::Structure::Quantifier \d{2,3} 2,3 are contents
## and don't want to think about that
#~ 2013-08-11-02:11:54
#~ stf PPIx::Regexp::Structure::Assertion
#~ stf PPIx::Regexp::Structure::BranchReset
#~ stf PPIx::Regexp::Structure::Capture
#~ C #~ stf+operators PPIx::Regexp::Structure::CharClass
#~ c PPIx::Regexp::Structure::Code
#~ 0E PPIx::Regexp::Structure::Main
#~ stf PPIx::Regexp::Structure::Modifier
#~ c #~ stf PPIx::Regexp::Structure::Quantifier
#~ stf PPIx::Regexp::Structure::Subexpression
#~ stf PPIx::Regexp::Structure::Switch
#~ stf PPIx::Regexp::Structure::Unknown
#~ 2013-08-11-03:18:57
#~ c0 PPIx::Regexp::Structure::RegexSet
##
#~ sspm PPIx::Regexp::Structure::Assertion PPIx::Regexp::Structure::BranchReset PPIx::Regexp::Structure::Capture PPIx::Regexp::Structure::CharClass PPIx::Regexp::Structure::Code PPIx::Regexp::Structure::Main PPIx::Regexp::Structure::Modifier PPIx::Regexp::Structure::Quantifier PPIx::Regexp::Structure::Subexpression PPIx::Regexp::Structure::Switch PPIx::Regexp::Structure::Unknown
##
#~ 2013-08-11-02:32:41
##
#~ s length of start
#~ t length of type
#~ f length of finish
#~ c length of content
#~ 0 length 0
#~ Q length is VARIABLE (ITS A QUANTIFIER)
#~ E error (shouldn't encounter this)
##
#~ first letter is length to be removed to be subtracted to obtain real-literal-fixed-width-length
#~ second letter is length to be added to obtain real-literal-fixed-width-length
#~ E is an error means its not fixed-width or its impossible situation so forget it
##
#~ c0 PPIx::Regexp::Token::Assertion
#~ c0 PPIx::Regexp::Token::Backtrack
#~ cQ PPIx::Regexp::Token::CharClass
#~ cE PPIx::Regexp::Token::Code
#~ c0 PPIx::Regexp::Token::Comment
#~ c0 PPIx::Regexp::Token::Control
#~ cEQ PPIx::Regexp::Token::Greediness
#~ c0 PPIx::Regexp::Token::GroupType
#~ c1 PPIx::Regexp::Token::Literal
#~ cE PPIx::Regexp::Token::Modifier
#~ c0 PPIx::Regexp::Token::Operator
#~ cE PPIx::Regexp::Token::Quantifier
#~ cE PPIx::Regexp::Token::Reference
#~ c0 PPIx::Regexp::Token::Structure
#~ c0 PPIx::Regexp::Token::Unknown
#~ cE PPIx::Regexp::Token::Unmatched
#~ c0 PPIx::Regexp::Token::Whitespace
#~ 2013-08-11-03:42:02
#~ c1 PPIx::Regexp::Token::CharClass::Simple
##
#~ sspm PPIx::Regexp::Token::Assertion PPIx::Regexp::Token::Backtrack PPIx::Regexp::Token::CharClass PPIx::Regexp::Token::Code PPIx::Regexp::Token::Comment PPIx::Regexp::Token::Control PPIx::Regexp::Token::Greediness PPIx::Regexp::Token::GroupType PPIx::Regexp::Token::Literal PPIx::Regexp::Token::Modifier PPIx::Regexp::Token::Operator PPIx::Regexp::Token::Quantifier PPIx::Regexp::Token::Reference PPIx::Regexp::Token::Structure PPIx::Regexp::Token::Unknown PPIx::Regexp::Token::Unmatched PPIx::Regexp::Token::Whitespace
#~ 2013-08-11-02:39:11
#~ length of structural elements to be removed
#~ to be subtracted from length of content
#~ to obtain real-literal-fixed-width-length
sub xstf_length {
my( $node , $depth ) = @_;
$depth ||= 0;
my $length = 0;
my @kids = eval { $node->elements };
if( not @kids ){
@kids = map { eval { $node->$_ } } qw{ start type children finish };
}
my $count_content = grep {
!! $node->isa($_)
} qw'
PPIx::Regexp::Structure::CharClass
PPIx::Regexp::Structure::Code
PPIx::Regexp::Structure::Quantifier
PPIx::Regexp::Structure::RegexSet
';;;;
my $count_once = grep {
!! $node->isa($_)
} qw'
PPIx::Regexp::Structure::CharClass
PPIx::Regexp::Structure::RegexSet
PPIx::Regexp::Token::CharClass
PPIx::Regexp::Token::Literal
';;;;
if( $count_once and not @kids and not $count_content ){ ## ICK! \w has length 2 but quantifies as 1
return length( $node->content )-1; ## \x{} has length > 2 but quantifies as 1
}
if( $count_content ){
return $length + length( $node->content ) - $count_once; ## cause charclasses/regexsets quantify as 1
}
for my $kid ( @kids ){
if( $kid->address =~ m{/[STF]\d+\s$}i ){
$length += length $kid->content;
#~ } elsif( $kid->children ) { ## inadequate
} elsif( $kid->isa('PPIx::Regexp::Structure') ) { ## should be stf type
$length += xstf_length( $kid, $depth+1 );
}
}
return $length;
} ## end of sub xstf_length
#~ self-fulfilling enlightenment or gratifying ignorance.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment