Skip to content

Instantly share code, notes, and snippets.

@ezzatron
Last active August 29, 2015 14:15
Show Gist options
  • Save ezzatron/ea9a5ae5328c5d5b62dd to your computer and use it in GitHub Desktop.
Save ezzatron/ea9a5ae5328c5d5b62dd to your computer and use it in GitHub Desktop.
Port of Git wildmatch to PHP
<?php
const WM_CASEFOLD = 1;
const WM_PATHNAME = 2;
const WM_ABORT_MALFORMED = 2;
const WM_NOMATCH = 1;
const WM_MATCH = 0;
const WM_ABORT_ALL = -1;
const WM_ABORT_TO_STARSTAR = -2;
const NEGATE_CLASS = '!';
const NEGATE_CLASS2 = '^';
function CC_EQ($clazz, $len, $litmatch)
{
return $len == strlen($litmatch) && 0 === strncmp($clazz, $litmatch, $len);
}
function ISASCII($char)
{
return ord($char) < 128;
}
function ISBLANK($c)
{
return ' ' === $c || "\t" === $c;
}
function ISGRAPH($c)
{
return (ISASCII($c) && ctype_graph($c));
}
function ISPRINT($c)
{
return (ISASCII($c) && ctype_print($c));
}
function ISDIGIT($c)
{
return (ISASCII($c) && ctype_digit($c));
}
function ISALNUM($c)
{
return (ISASCII($c) && ctype_alnum($c));
}
function ISALPHA($c)
{
return (ISASCII($c) && ctype_alpha($c));
}
function ISCNTRL($c)
{
return (ISASCII($c) && ctype_cntrl($c));
}
function ISLOWER($c)
{
return (ISASCII($c) && ctype_lower($c));
}
function ISPUNCT($c)
{
return (ISASCII($c) && ctype_punct($c));
}
function ISSPACE($c)
{
return (ISASCII($c) && ctype_space($c));
}
function ISUPPER($c)
{
return (ISASCII($c) && ctype_upper($c));
}
function ISXDIGIT($c)
{
return (ISASCII($c) && ctype_xdigit($c));
}
function charAt($index, $length, $string)
{
return $index < $length ? $string[$index] : null;
}
function nextChar(&$index, $length, $string)
{
return charAt(++$index, $length, $string);
}
function is_glob_special($char) {
switch ($char) {
case '*':
case '?':
case '[':
case '\\':
return true;
}
return false;
}
function dowild($p, $text, $flags)
{
$p_ch = null;
$p_len = strlen($p);
$p_idx = 0;
$text_len = strlen($text);
$text_idx = 0;
for (
; ($p_ch = charAt($p_idx, $p_len, $p)) != null; ++$text_idx, ++$p_idx
) {
$matched = null;
$match_slash = false;
$negated = false;
$t_ch = charAt($text_idx, $text_len, $text);
$prev_ch = null;
if (null === $t_ch && '*' != $p_ch) {
return WM_ABORT_ALL;
}
if ($flags & WM_CASEFOLD) {
if (ISUPPER($t_ch)) {
$t_ch = strtolower($t_ch);
}
if (ISUPPER($p_ch)) {
$p_ch = strtolower($p_ch);
}
}
switch ($p_ch) {
case '\\':
/* Literal match with following character. Note that the test
* in "default" handles the p[1] == '\0' failure case. */
$p_ch = nextChar($p_idx, $p_len, $p);
/* FALLTHROUGH */
default:
if ($t_ch != $p_ch) {
return WM_NOMATCH;
}
continue;
case '?':
/* Match anything but '/'. */
if (($flags & WM_PATHNAME) && $t_ch == '/') {
return WM_NOMATCH;
}
continue;
case '*':
if ('*' == nextChar($p_idx, $p_len, $p)) {
$prev_p_idx = $p_idx - 2;
$prev_p = $prev_p_idx < 0 ? null : $p[$prev_p_idx];
while ('*' == nextChar($p_idx, $p_len, $p)) {}
$current_p = charAt($p_idx, $p_len, $p);
if (!($flags & WM_PATHNAME)) {
/* without WM_PATHNAME, '*' == '**' */
$match_slash = true;
} elseif (
(null === $prev_p || '/' == $prev_p) &&
(
null === $current_p ||
'/' == $current_p ||
(
'\\' == $current_p &&
'/' == charAt($p_idx + 1, $p_len, $p)
)
)
) {
/*
* Assuming we already match 'foo/' and are at
* <star star slash>, just assume it matches
* nothing and go ahead match the rest of the
* pattern with the remaining string. This
* helps make foo/<*><*>/bar (<> because
* otherwise it breaks C comment syntax) match
* both foo/bar and foo/a/bar.
*/
if (
'/' == $current_p &&
WM_MATCH == dowild(
substr($p, $p_idx + 1),
substr($text, $text_idx),
$flags
)
) {
return WM_MATCH;
}
$match_slash = true;
} else {
return WM_ABORT_MALFORMED;
}
} else {
/* without WM_PATHNAME, '*' == '**' */
$match_slash = $flags & WM_PATHNAME ? false : true;
}
$current_p = charAt($p_idx, $p_len, $p);
if (null === $current_p) {
/* Trailing "**" matches everything. Trailing "*" matches
* only if there are no more slash characters. */
if (!$match_slash) {
if (false !== strpos($text, '/', $text_idx)) {
return WM_NOMATCH;
}
}
return WM_MATCH;
} elseif (!$match_slash && '/' == $current_p) {
/*
* _one_ asterisk followed by a slash
* with WM_PATHNAME matches the next
* directory
*/
$slash_idx = strpos($text, '/', $text_idx);
if (false === $slash_idx) {
return WM_NOMATCH;
}
$text_idx = $slash_idx;
/* the slash is consumed by the top-level for loop */
break;
}
while (null !== $t_ch) {
/*
* Try to advance faster when an asterisk is
* followed by a literal. We know in this case
* that the the string before the literal
* must belong to "*".
* If match_slash is false, do not look past
* the first slash as it cannot belong to '*'.
*/
if (!is_glob_special($current_p)) {
$p_ch = $current_p;
if (($flags & WM_CASEFOLD) && ISUPPER($p_ch)) {
$p_ch = strtolower($p_ch);
}
$t_ch = charAt($text_idx, $text_len, $text);
while (
null !== $t_ch &&
($match_slash || '/' != $t_ch)
) {
if (($flags & WM_CASEFOLD) && ISUPPER($t_ch)) {
$t_ch = strtolower($t_ch);
}
if ($t_ch == $p_ch) {
break;
}
$t_ch = nextChar($text_idx, $text_len, $text);
}
if ($t_ch != $p_ch) {
return WM_NOMATCH;
}
}
$sub_p = substr($p, $p_idx);
$sub_text = substr($text, $text_idx);
$matched = $matched = dowild($sub_p, $sub_text, $flags);
if (WM_NOMATCH != $matched) {
if (!$match_slash || $matched != WM_ABORT_TO_STARSTAR) {
return $matched;
}
} elseif (!$match_slash && '/' == $t_ch) {
return WM_ABORT_TO_STARSTAR;
}
$t_ch = nextChar($text_idx, $text_len, $text);
}
return WM_ABORT_ALL;
case '[':
$p_ch = nextChar($p_idx, $p_len, $p);
if ($p_ch == NEGATE_CLASS2) {
$p_ch = NEGATE_CLASS;
}
$negated = $p_ch == NEGATE_CLASS;
if ($negated) {
/* Inverted character class. */
$p_ch = nextChar($p_idx, $p_len, $p);
}
$prev_ch = null;
$matched = 0;
do {
if (null === $p_ch) {
return WM_ABORT_ALL;
}
$next_p_ch = charAt($p_idx + 1, $p_len, $p);
if ('\\' == $p_ch) {
$p_ch = nextChar($p_idx, $p_len, $p);
if (null === $p_ch) {
return WM_ABORT_ALL;
}
if ($t_ch == $p_ch) {
$matched = 1;
}
} elseif (
'-' == $p_ch &&
$prev_ch &&
$next_p_ch &&
']' != $next_p_ch
) {
$p_ch = nextChar($p_idx, $p_len, $p);
if ('\\' == $p_ch) {
$p_ch = nextChar($p_idx, $p_len, $p);
if (null === $p_ch) {
return WM_ABORT_ALL;
}
}
if ($t_ch <= $p_ch && $t_ch >= $prev_ch) {
$matched = 1;
} elseif (($flags & WM_CASEFOLD) && ISLOWER($t_ch)) {
$t_ch_upper = strtoupper($t_ch);
if (
$t_ch_upper <= $p_ch &&
$t_ch_upper >= $prev_ch
) {
$matched = 1;
}
}
/* This makes "prev_ch" get set to null. */
$p_ch = null;
} elseif ('[' == $p_ch && ':' == $next_p_ch) {
$s_idx = null;
$i = null;
$s_idx = $p_idx += 2;
/*SHARED ITERATOR*/
while (true) {
$p_ch = charAt($p_idx, $p_len, $p);
if (null === $p_ch || ']' == $p_ch) {
break;
}
++$p_idx;
}
if (null === $p_ch) {
return WM_ABORT_ALL;
}
$i = $p_idx - $s_idx - 1;
$p_minus_1_ch = charAt($p_idx - 1, $p_len, $p);
if ($i < 0 || ':' != $p_minus_1_ch) {
/* Didn't find ":]", so treat like a normal set. */
$p_idx = $s_idx - 2;
$p_ch = '[';
if ($t_ch == $p_ch) {
$matched = 1;
}
continue;
}
$s = substr($p, $s_idx);
if (CC_EQ($s, $i, "alnum")) {
if (ISALNUM($t_ch)) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "alpha")) {
if (ISALPHA($t_ch)) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "blank")) {
if (ISBLANK($t_ch)) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "cntrl")) {
if (ISCNTRL($t_ch)) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "digit")) {
if (ISDIGIT($t_ch)) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "graph")) {
if (ISGRAPH($t_ch)) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "lower")) {
if (ISLOWER($t_ch)) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "print")) {
if (ISPRINT($t_ch)) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "punct")) {
if (ISPUNCT($t_ch)) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "space")) {
if (ISSPACE($t_ch)) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "upper")) {
if (ISUPPER($t_ch)) {
$matched = 1;
} elseif (
($flags & WM_CASEFOLD) && ISLOWER($t_ch)
) {
$matched = 1;
}
} elseif (CC_EQ($s, $i, "xdigit")) {
if (ISXDIGIT($t_ch)) {
$matched = 1;
}
} else {
/* malformed [:class:] string */
return WM_ABORT_ALL;
}
/* This makes "prev_ch" get set to null. */
$p_ch = null;
} elseif ($t_ch == $p_ch) {
$matched = 1;
}
} while (
(($prev_ch = $p_ch) || true) &&
(']' != ($p_ch = nextChar($p_idx, $p_len, $p)))
);
if (
(true && $matched) == $negated ||
(($flags & WM_PATHNAME) && '/' == $t_ch)
) {
return WM_NOMATCH;
}
continue;
}
}
return $text_idx < $text_len ? WM_NOMATCH : WM_MATCH;
}
function wildmatch($pattern, $text, $flags = 0)
{
return dowild($pattern, $text, $flags);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment