Last active
August 29, 2015 14:15
-
-
Save ezzatron/ea9a5ae5328c5d5b62dd to your computer and use it in GitHub Desktop.
Port of Git wildmatch to PHP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
const WM_CASEFOLD = 1; | |
const WM_PATHNAME = 2; | |
const WM_ABORT_MALFORMED = 2; | |
const WM_NOMATCH = 1; | |
const WM_MATCH = 0; | |
const WM_ABORT_ALL = -1; | |
const WM_ABORT_TO_STARSTAR = -2; | |
const NEGATE_CLASS = '!'; | |
const NEGATE_CLASS2 = '^'; | |
function CC_EQ($clazz, $len, $litmatch) | |
{ | |
return $len == strlen($litmatch) && 0 === strncmp($clazz, $litmatch, $len); | |
} | |
function ISASCII($char) | |
{ | |
return ord($char) < 128; | |
} | |
function ISBLANK($c) | |
{ | |
return ' ' === $c || "\t" === $c; | |
} | |
function ISGRAPH($c) | |
{ | |
return (ISASCII($c) && ctype_graph($c)); | |
} | |
function ISPRINT($c) | |
{ | |
return (ISASCII($c) && ctype_print($c)); | |
} | |
function ISDIGIT($c) | |
{ | |
return (ISASCII($c) && ctype_digit($c)); | |
} | |
function ISALNUM($c) | |
{ | |
return (ISASCII($c) && ctype_alnum($c)); | |
} | |
function ISALPHA($c) | |
{ | |
return (ISASCII($c) && ctype_alpha($c)); | |
} | |
function ISCNTRL($c) | |
{ | |
return (ISASCII($c) && ctype_cntrl($c)); | |
} | |
function ISLOWER($c) | |
{ | |
return (ISASCII($c) && ctype_lower($c)); | |
} | |
function ISPUNCT($c) | |
{ | |
return (ISASCII($c) && ctype_punct($c)); | |
} | |
function ISSPACE($c) | |
{ | |
return (ISASCII($c) && ctype_space($c)); | |
} | |
function ISUPPER($c) | |
{ | |
return (ISASCII($c) && ctype_upper($c)); | |
} | |
function ISXDIGIT($c) | |
{ | |
return (ISASCII($c) && ctype_xdigit($c)); | |
} | |
function charAt($index, $length, $string) | |
{ | |
return $index < $length ? $string[$index] : null; | |
} | |
function nextChar(&$index, $length, $string) | |
{ | |
return charAt(++$index, $length, $string); | |
} | |
function is_glob_special($char) { | |
switch ($char) { | |
case '*': | |
case '?': | |
case '[': | |
case '\\': | |
return true; | |
} | |
return false; | |
} | |
function dowild($p, $text, $flags) | |
{ | |
$p_ch = null; | |
$p_len = strlen($p); | |
$p_idx = 0; | |
$text_len = strlen($text); | |
$text_idx = 0; | |
for ( | |
; ($p_ch = charAt($p_idx, $p_len, $p)) != null; ++$text_idx, ++$p_idx | |
) { | |
$matched = null; | |
$match_slash = false; | |
$negated = false; | |
$t_ch = charAt($text_idx, $text_len, $text); | |
$prev_ch = null; | |
if (null === $t_ch && '*' != $p_ch) { | |
return WM_ABORT_ALL; | |
} | |
if ($flags & WM_CASEFOLD) { | |
if (ISUPPER($t_ch)) { | |
$t_ch = strtolower($t_ch); | |
} | |
if (ISUPPER($p_ch)) { | |
$p_ch = strtolower($p_ch); | |
} | |
} | |
switch ($p_ch) { | |
case '\\': | |
/* Literal match with following character. Note that the test | |
* in "default" handles the p[1] == '\0' failure case. */ | |
$p_ch = nextChar($p_idx, $p_len, $p); | |
/* FALLTHROUGH */ | |
default: | |
if ($t_ch != $p_ch) { | |
return WM_NOMATCH; | |
} | |
continue; | |
case '?': | |
/* Match anything but '/'. */ | |
if (($flags & WM_PATHNAME) && $t_ch == '/') { | |
return WM_NOMATCH; | |
} | |
continue; | |
case '*': | |
if ('*' == nextChar($p_idx, $p_len, $p)) { | |
$prev_p_idx = $p_idx - 2; | |
$prev_p = $prev_p_idx < 0 ? null : $p[$prev_p_idx]; | |
while ('*' == nextChar($p_idx, $p_len, $p)) {} | |
$current_p = charAt($p_idx, $p_len, $p); | |
if (!($flags & WM_PATHNAME)) { | |
/* without WM_PATHNAME, '*' == '**' */ | |
$match_slash = true; | |
} elseif ( | |
(null === $prev_p || '/' == $prev_p) && | |
( | |
null === $current_p || | |
'/' == $current_p || | |
( | |
'\\' == $current_p && | |
'/' == charAt($p_idx + 1, $p_len, $p) | |
) | |
) | |
) { | |
/* | |
* Assuming we already match 'foo/' and are at | |
* <star star slash>, just assume it matches | |
* nothing and go ahead match the rest of the | |
* pattern with the remaining string. This | |
* helps make foo/<*><*>/bar (<> because | |
* otherwise it breaks C comment syntax) match | |
* both foo/bar and foo/a/bar. | |
*/ | |
if ( | |
'/' == $current_p && | |
WM_MATCH == dowild( | |
substr($p, $p_idx + 1), | |
substr($text, $text_idx), | |
$flags | |
) | |
) { | |
return WM_MATCH; | |
} | |
$match_slash = true; | |
} else { | |
return WM_ABORT_MALFORMED; | |
} | |
} else { | |
/* without WM_PATHNAME, '*' == '**' */ | |
$match_slash = $flags & WM_PATHNAME ? false : true; | |
} | |
$current_p = charAt($p_idx, $p_len, $p); | |
if (null === $current_p) { | |
/* Trailing "**" matches everything. Trailing "*" matches | |
* only if there are no more slash characters. */ | |
if (!$match_slash) { | |
if (false !== strpos($text, '/', $text_idx)) { | |
return WM_NOMATCH; | |
} | |
} | |
return WM_MATCH; | |
} elseif (!$match_slash && '/' == $current_p) { | |
/* | |
* _one_ asterisk followed by a slash | |
* with WM_PATHNAME matches the next | |
* directory | |
*/ | |
$slash_idx = strpos($text, '/', $text_idx); | |
if (false === $slash_idx) { | |
return WM_NOMATCH; | |
} | |
$text_idx = $slash_idx; | |
/* the slash is consumed by the top-level for loop */ | |
break; | |
} | |
while (null !== $t_ch) { | |
/* | |
* Try to advance faster when an asterisk is | |
* followed by a literal. We know in this case | |
* that the the string before the literal | |
* must belong to "*". | |
* If match_slash is false, do not look past | |
* the first slash as it cannot belong to '*'. | |
*/ | |
if (!is_glob_special($current_p)) { | |
$p_ch = $current_p; | |
if (($flags & WM_CASEFOLD) && ISUPPER($p_ch)) { | |
$p_ch = strtolower($p_ch); | |
} | |
$t_ch = charAt($text_idx, $text_len, $text); | |
while ( | |
null !== $t_ch && | |
($match_slash || '/' != $t_ch) | |
) { | |
if (($flags & WM_CASEFOLD) && ISUPPER($t_ch)) { | |
$t_ch = strtolower($t_ch); | |
} | |
if ($t_ch == $p_ch) { | |
break; | |
} | |
$t_ch = nextChar($text_idx, $text_len, $text); | |
} | |
if ($t_ch != $p_ch) { | |
return WM_NOMATCH; | |
} | |
} | |
$sub_p = substr($p, $p_idx); | |
$sub_text = substr($text, $text_idx); | |
$matched = $matched = dowild($sub_p, $sub_text, $flags); | |
if (WM_NOMATCH != $matched) { | |
if (!$match_slash || $matched != WM_ABORT_TO_STARSTAR) { | |
return $matched; | |
} | |
} elseif (!$match_slash && '/' == $t_ch) { | |
return WM_ABORT_TO_STARSTAR; | |
} | |
$t_ch = nextChar($text_idx, $text_len, $text); | |
} | |
return WM_ABORT_ALL; | |
case '[': | |
$p_ch = nextChar($p_idx, $p_len, $p); | |
if ($p_ch == NEGATE_CLASS2) { | |
$p_ch = NEGATE_CLASS; | |
} | |
$negated = $p_ch == NEGATE_CLASS; | |
if ($negated) { | |
/* Inverted character class. */ | |
$p_ch = nextChar($p_idx, $p_len, $p); | |
} | |
$prev_ch = null; | |
$matched = 0; | |
do { | |
if (null === $p_ch) { | |
return WM_ABORT_ALL; | |
} | |
$next_p_ch = charAt($p_idx + 1, $p_len, $p); | |
if ('\\' == $p_ch) { | |
$p_ch = nextChar($p_idx, $p_len, $p); | |
if (null === $p_ch) { | |
return WM_ABORT_ALL; | |
} | |
if ($t_ch == $p_ch) { | |
$matched = 1; | |
} | |
} elseif ( | |
'-' == $p_ch && | |
$prev_ch && | |
$next_p_ch && | |
']' != $next_p_ch | |
) { | |
$p_ch = nextChar($p_idx, $p_len, $p); | |
if ('\\' == $p_ch) { | |
$p_ch = nextChar($p_idx, $p_len, $p); | |
if (null === $p_ch) { | |
return WM_ABORT_ALL; | |
} | |
} | |
if ($t_ch <= $p_ch && $t_ch >= $prev_ch) { | |
$matched = 1; | |
} elseif (($flags & WM_CASEFOLD) && ISLOWER($t_ch)) { | |
$t_ch_upper = strtoupper($t_ch); | |
if ( | |
$t_ch_upper <= $p_ch && | |
$t_ch_upper >= $prev_ch | |
) { | |
$matched = 1; | |
} | |
} | |
/* This makes "prev_ch" get set to null. */ | |
$p_ch = null; | |
} elseif ('[' == $p_ch && ':' == $next_p_ch) { | |
$s_idx = null; | |
$i = null; | |
$s_idx = $p_idx += 2; | |
/*SHARED ITERATOR*/ | |
while (true) { | |
$p_ch = charAt($p_idx, $p_len, $p); | |
if (null === $p_ch || ']' == $p_ch) { | |
break; | |
} | |
++$p_idx; | |
} | |
if (null === $p_ch) { | |
return WM_ABORT_ALL; | |
} | |
$i = $p_idx - $s_idx - 1; | |
$p_minus_1_ch = charAt($p_idx - 1, $p_len, $p); | |
if ($i < 0 || ':' != $p_minus_1_ch) { | |
/* Didn't find ":]", so treat like a normal set. */ | |
$p_idx = $s_idx - 2; | |
$p_ch = '['; | |
if ($t_ch == $p_ch) { | |
$matched = 1; | |
} | |
continue; | |
} | |
$s = substr($p, $s_idx); | |
if (CC_EQ($s, $i, "alnum")) { | |
if (ISALNUM($t_ch)) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "alpha")) { | |
if (ISALPHA($t_ch)) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "blank")) { | |
if (ISBLANK($t_ch)) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "cntrl")) { | |
if (ISCNTRL($t_ch)) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "digit")) { | |
if (ISDIGIT($t_ch)) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "graph")) { | |
if (ISGRAPH($t_ch)) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "lower")) { | |
if (ISLOWER($t_ch)) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "print")) { | |
if (ISPRINT($t_ch)) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "punct")) { | |
if (ISPUNCT($t_ch)) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "space")) { | |
if (ISSPACE($t_ch)) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "upper")) { | |
if (ISUPPER($t_ch)) { | |
$matched = 1; | |
} elseif ( | |
($flags & WM_CASEFOLD) && ISLOWER($t_ch) | |
) { | |
$matched = 1; | |
} | |
} elseif (CC_EQ($s, $i, "xdigit")) { | |
if (ISXDIGIT($t_ch)) { | |
$matched = 1; | |
} | |
} else { | |
/* malformed [:class:] string */ | |
return WM_ABORT_ALL; | |
} | |
/* This makes "prev_ch" get set to null. */ | |
$p_ch = null; | |
} elseif ($t_ch == $p_ch) { | |
$matched = 1; | |
} | |
} while ( | |
(($prev_ch = $p_ch) || true) && | |
(']' != ($p_ch = nextChar($p_idx, $p_len, $p))) | |
); | |
if ( | |
(true && $matched) == $negated || | |
(($flags & WM_PATHNAME) && '/' == $t_ch) | |
) { | |
return WM_NOMATCH; | |
} | |
continue; | |
} | |
} | |
return $text_idx < $text_len ? WM_NOMATCH : WM_MATCH; | |
} | |
function wildmatch($pattern, $text, $flags = 0) | |
{ | |
return dowild($pattern, $text, $flags); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment