Skip to content

Instantly share code, notes, and snippets.

@AVGP
Created September 8, 2021 14:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AVGP/3b7bd11052399f3832684160071ee04f to your computer and use it in GitHub Desktop.
Save AVGP/3b7bd11052399f3832684160071ee04f to your computer and use it in GitHub Desktop.
Experimental code to match robots.txt rules against a given path.
const patterns = [
/*
'/path/yolo.png',
'/path',
'/something',
'/path/hello.png$',
'/path/hello$',
'/a*bc'
*/
'/a*a'
];
const path = '/ab'
//'/aabaabc'; // we need to get this working
//'/aabaabcebced'; // and this one too :D
//'/aabaabxcde'; // this should not work...
function doesRuleMatch(pattern, path) {
const patternChars = pattern.split('');
// walk through the pattern, character by character...
for(let patternPos = 0; patternPos < patternChars.length; patternPos++) {
// if $ appears at the end of the pattern and the path length equals the pattern length, the pattern matches the path.
if(patternChars[patternPos] === '$' && patternPos === (patternChars.length - 1) && path.length == patternPos) {
return true;
}
// if a wildcard character appears in the pattern..
if (patternChars[patternPos] === '*') {
// when the wildcard is the last character of the pattern, the pattern matches the path
if(patternPos == patternChars.length - 1) {
return true;
}
// skip to the next character in the pattern
const subPatternStartPos = patternPos + 1;
let subPatternPos = subPatternStartPos;
let subPatternChar = patternChars[subPatternPos];
// find a position where it matches the path
for(let pathPos = patternPos; pathPos < path.length; pathPos++) {
// if the current path character is not the next pattern character and we're not looking at the first subpattern character,
// then reset to the first subpattern character and try this path character again.
// if the current path character is the next pattern character
// then increase the subpatternPos, update subPatternChar and if there are no more subpattern characters, return true.
if(path[pathPos] !== subPatternChar && subPatternPos > subPatternStartPos) {
pathPos--; // check this path character again
subPatternPos = subPatternStartPos; // set subPatternPos back to the first pattern position after the wildcard
subPatternChar = patternChars[subPatternPos]; // reset subPatternChar to the first pattern character after the wildcard
} else if(path[pathPos] === subPatternChar) {
subPatternPos++;
// if no more pattern characters are left, we know the pattern matches the path
if(subPatternPos == patternChars.length) {
return true;
}
subPatternChar = patternChars[subPatternPos]; // if there are pattern chars left over, we continue our search
}
}
}
else if(patternChars[patternPos] !== path[patternPos]) {
// if the pattern contains a different character than the path at the current position, the pattern doesn't match.
return false;
}
}
return true;
}
console.log(patterns.map((pattern) => doesRuleMatch(pattern, path)));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment