// Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp. | |
// Captures within lookbehind are not included in match results. Lazy | |
// repetition in lookbehind may lead to unexpected results. | |
(function (XRegExp) { | |
function prepareLb(lb) { | |
// Allow mode modifier before lookbehind | |
var parts = /^((?:\(\?[\w$]+\))?)\(\?<([=!])([\s\S]*)\)$/.exec(lb); | |
return { | |
// $(?!\s) allows use of (?m) in lookbehind | |
lb: XRegExp(parts ? parts[1] + "(?:" + parts[3] + ")$(?!\\s)" : lb), | |
// Positive or negative lookbehind. Use positive if no lookbehind group | |
type: parts ? parts[2] === "=" : !parts | |
}; | |
} | |
XRegExp.execLb = function (str, lb, regex) { | |
var pos = 0, match, leftContext; | |
lb = prepareLb(lb); | |
while (match = XRegExp.exec(str, regex, pos)) { | |
leftContext = str.slice(0, match.index); | |
if (lb.type === lb.lb.test(leftContext)) { | |
return match; | |
} | |
pos = match.index + 1; | |
} | |
return null; | |
}; | |
XRegExp.testLb = function (str, lb, regex) { | |
return !!XRegExp.execLb(str, lb, regex); | |
}; | |
XRegExp.searchLb = function (str, lb, regex) { | |
var match = XRegExp.execLb(str, lb, regex); | |
return match ? match.index : -1; | |
}; | |
XRegExp.matchAllLb = function (str, lb, regex) { | |
var matches = [], pos = 0, match, leftContext; | |
lb = prepareLb(lb); | |
while (match = XRegExp.exec(str, regex, pos)) { | |
leftContext = str.slice(0, match.index); | |
if (lb.type === lb.lb.test(leftContext)) { | |
matches.push(match[0]); | |
pos = match.index + (match[0].length || 1); | |
} else { | |
pos = match.index + 1; | |
} | |
} | |
return matches; | |
}; | |
XRegExp.replaceLb = function (str, lb, regex, replacement) { | |
var output = "", pos = 0, lastEnd = 0, match, leftContext; | |
lb = prepareLb(lb); | |
while (match = XRegExp.exec(str, regex, pos)) { | |
leftContext = str.slice(0, match.index); | |
if (lb.type === lb.lb.test(leftContext)) { | |
// Doesn't work correctly if lookahead in regex looks outside of the match | |
output += str.slice(lastEnd, match.index) + XRegExp.replace(match[0], regex, replacement); | |
lastEnd = match.index + match[0].length; | |
if (!regex.global) { | |
break; | |
} | |
pos = match.index + (match[0].length || 1); | |
} else { | |
pos = match.index + 1; | |
} | |
} | |
return output + str.slice(lastEnd); | |
}; | |
}(XRegExp)); | |
// Test it... | |
console.log(XRegExp.execLb("Fluffy cat", "(?i)(?<=fluffy\\W+)", XRegExp("(?i)(?<first>c)at"))); | |
// -> ["cat", "c"] | |
// Result has named backref: result.first -> "c" | |
console.log(XRegExp.execLb("Fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i)); | |
// -> null | |
console.log(XRegExp.testLb("Fluffy cat", "(?i)(?<=fluffy\\W+)", /cat/i)); | |
// -> true | |
console.log(XRegExp.testLb("Fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i)); | |
// -> false | |
console.log(XRegExp.searchLb("Catwoman's fluffy cat", "(?i)(?<=fluffy\\W+)", /cat/i)); | |
// -> 18 | |
console.log(XRegExp.searchLb("Catwoman's fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i)); | |
// -> 0 | |
console.log(XRegExp.matchAllLb("Catwoman's cats are fluffy cats", "(?i)(?<=fluffy\\W+)", /cat\w*/i)); | |
// -> ["cats"] | |
console.log(XRegExp.matchAllLb("Catwoman's cats are fluffy cats", "(?i)(?<!fluffy\\W+)", /cat\w*/i)); | |
// -> ["Catwoman", "cats"] | |
console.log(XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<=fluffy\\W+)", /cat/ig, "dog")); | |
// -> "Catwoman's fluffy dog is a cat" | |
console.log(XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<!fluffy\\W+)", /cat/ig, "dog")); | |
// -> "dogwoman's fluffy cat is a dog" | |
console.log(XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<!fluffy\\W+)", /cat/ig, function ($0) { | |
var first = $0.charAt(0); | |
return first === first.toUpperCase() ? "Dog" : "dog"; | |
})); | |
// -> "Dogwoman's fluffy cat is a dog" |
This comment has been minimized.
This comment has been minimized.
I've used this code in my blog post JavaScript Regex Lookbehind Redux. |
This comment has been minimized.
This comment has been minimized.
Nice. Until now I didn't know JS regex didn't support lookbehind. Do you know if it supports lookahead assertions? I was testing some regex on the console just last week and couldn't explain why I got a (false) positive match in JS, but not in other languages. I ended up changing strategies and ditching the assertion. Until now, I'd pretty much assumed that JS had a full regex implementation. |
This comment has been minimized.
This comment has been minimized.
Yes, JavaScript-the-standard supports lookahead, as do all browsers going back to IE 5.5. |
This comment has been minimized.
This comment has been minimized.
I want to match the entire tag that contains some haml databinding script in an attribute Can this be achieved using matchLb? I don't understand how to match the string starting with < but ending with > and not !>. |
This comment has been minimized.
This comment has been minimized.
sorry. to continue : e.g. <table id="r<!=id!>"> I want to match the to the < not preceded by a !. Is this possible? |
This comment has been minimized.
This comment has been minimized.
Ha! I should have 'known' it would be you to have an alternative for JS ;-) |
This comment has been minimized.
This comment has been minimized.
Are you able to please provide a method for splitting that supports lookbehind? Thanks! |
This comment has been minimized.
This comment has been minimized.
I made a small node wrapper for these. Thanks for providing them! |
This comment has been minimized.
This comment has been minimized.
@ghost +1 I really need |
This comment has been minimized.
This is an alternate version of my previous lookbehind simulation that accepted leading lookbehind and the main regex as one combined pattern. This version does not require the XRegExp.matchRecursive addon.
This code is released under the MIT License.