Skip to content

Instantly share code, notes, and snippets.

@ayanamist
Created June 25, 2012 16:11
Show Gist options
  • Save ayanamist/2989518 to your computer and use it in GitHub Desktop.
Save ayanamist/2989518 to your computer and use it in GitHub Desktop.
Better implementation of shExpMatch
function shExpMatch(url, pattern) {
var pCharCode;
var isAggressive = false;
var pIndex;
var urlIndex = 0;
var lastIndex;
var patternLength = pattern.length;
var urlLength = url.length;
for (pIndex = 0; pIndex < patternLength; pIndex += 1) {
pCharCode = pattern.charCodeAt(pIndex); // use charCodeAt for performance, see http://jsperf.com/charat-charcodeat-brackets
if (pCharCode === 63) { // use if instead of switch for performance, see http://jsperf.com/switch-if
if (isAggressive) {
urlIndex += 1;
}
isAggressive = false;
urlIndex += 1;
}
else if (pCharCode === 42) {
if (pIndex === patternLength - 1) {
return urlIndex <= urlLength;
}
else {
isAggressive = true;
}
}
else {
if (isAggressive) {
lastIndex = urlIndex;
urlIndex = url.indexOf(String.fromCharCode(pCharCode), lastIndex + 1);
if (urlIndex < 0) {
if (url.charCodeAt(lastIndex) !== pCharCode) {
return false;
}
urlIndex = lastIndex;
}
isAggressive = false;
}
else {
if (urlIndex >= urlLength || url.charCodeAt(urlIndex) !== pCharCode) {
return false;
}
}
urlIndex += 1;
}
}
return urlIndex === urlLength;
}
<html>
<head>
<title>Unit Test</title>
<script src="shExpMatch.js" type="text/javascript"></script>
<body>
<script type="text/javascript">
function shExpMatchOrig(url, pattern) {
pattern = pattern.replace(/\./g, '\\.');
pattern = pattern.replace(/\*/g, '.*');
pattern = pattern.replace(/\?/g, '.');
var newRe = new RegExp('^'+pattern+'$');
return newRe.test(url);
}
function unittest(url, pattern) {
var result = shExpMatch(url, pattern);
var expected = shExpMatchOrig(url, pattern);
var output = "shExpMatch(\"" + url + "\",\"" + pattern + "\")=>" + result + ((result !== expected) ? (" expect " + expected) : "");
document.write(output + "<br />");
console.log(output);
}
function test() {
unittest("ababa", "*aba?");
unittest("?a","a");
unittest("asbasd", "*?*");
unittest("a?","a");
unittest("a","a?");
unittest("a","*a*");
unittest("","*?*");
unittest("1aba", "?*a");
unittest("ababa","*?aba*");
unittest("?a?a*a?a?", "baba|abab");
unittest("http://platform.twitter.com/widgets.js", "*.t?tter.com/*");
unittest("http://platform.twitter.com/widgets.js", "*.tw?tter.com/*");
unittest("http://platform.twitter.com/widgets.js", "*.t.com/*");
unittest("http://platform.twitter.com/widgets.js", "*.twitter.com/*");
}
test();
</script>
</body>
</html>
@ayanamist
Copy link
Author

This is a new implementation of shExpMatch function used in pac script with brandly fast performance
See http://jsperf.com/shexpmatch/4

@benjichaz
Copy link

what are the license of this function implementation ?

@linnaea
Copy link

linnaea commented Jun 7, 2015

This implementation is broken, shExpMatch("aaaa", "a*a") should return true.

@meoow
Copy link

meoow commented Jul 25, 2016

@linnaea
The code design has a flaw, the problem is at line 29: urlIndex = url.indexOf(String.fromCharCode(pCharCode), lastIndex + 1); where it acts kind of like the lazy mode search for regular expression which matches the first occurrence instead of as many as possible.

@meoow
Copy link

meoow commented Jul 25, 2016

Here is my implementation for shExpMatch. I don't know if my version is considered "better implementation", but the code passed @ayanamist's unittest, and also behaves correctly for testing shExpMatch("aaaa", "a*a").
The reason I "reinvented the wheel" is because a piece of software I use supports reading pac file but doesn't implement the shExpMatch function (which is quite bizzarre), I ended up have to implement it by myself.

function shExpMatch(url, pat) {

  var pcharcode0;
  var ucharcode0;
  var pcharcode1;

  if (pat.length === 0) {
    if (url.length === 0) {
      return true;
    } else {
      return false;
    }
  }

  if (pat.length === url.length &&
      pat.indexOf('*') < 0 &&
      pat.indexOf('?') < 0) {
    return pat === url;
  }

  ucharcode0 = url.charCodeAt(0);
  pcharcode0 = pat.charCodeAt(0);

  if (pcharcode0 === 42) { // pat[0] === '*'
    pcharcode1 = pat.charCodeAt(1);
    if (isNaN(pcharcode1)) {
      return true;
    } else if (pcharcode1 === 42) { // pat[1] === '*' skip continuous '*'
      return shExpMatch(url, pat.substr(1));
    } else {
      if (url.length === 0) {
        return false;
      }
      if (pcharcode1 === ucharcode0 || pcharcode1 === 63) {
        if (shExpMatch(url.substr(1), pat)) {
          return true;
        } else {
          return shExpMatch(url.substr(1), pat.substr(2));
        }
      } else {
        return shExpMatch(url.substr(1), pat);
      }
    }
  } else if (pcharcode0 === 63) {
    if (url.length === 0) {
      return false;
    }
    return shExpMatch(url.substr(1),pat.substr(1));
  } else {
    if (url.length === 0) {
      return false;
    }
    if (ucharcode0 === pcharcode0) {
      return shExpMatch(url.substr(1),pat.substr(1));
    } else {
      return false;
    }
  }

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment