Instantly share code, notes, and snippets.

Embed
What would you like to do?
Simulating lookbehind in JavaScript (take 2)
// Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp.
// Captures within lookbehind are not included in match results. Lazy
// repetition in lookbehind may lead to unexpected results.
(function (XRegExp) {
function prepareLb(lb) {
// Allow mode modifier before lookbehind
var parts = /^((?:\(\?[\w$]+\))?)\(\?<([=!])([\s\S]*)\)$/.exec(lb);
return {
// $(?!\s) allows use of (?m) in lookbehind
lb: XRegExp(parts ? parts[1] + "(?:" + parts[3] + ")$(?!\\s)" : lb),
// Positive or negative lookbehind. Use positive if no lookbehind group
type: parts ? parts[2] === "=" : !parts
};
}
XRegExp.execLb = function (str, lb, regex) {
var pos = 0, match, leftContext;
lb = prepareLb(lb);
while (match = XRegExp.exec(str, regex, pos)) {
leftContext = str.slice(0, match.index);
if (lb.type === lb.lb.test(leftContext)) {
return match;
}
pos = match.index + 1;
}
return null;
};
XRegExp.testLb = function (str, lb, regex) {
return !!XRegExp.execLb(str, lb, regex);
};
XRegExp.searchLb = function (str, lb, regex) {
var match = XRegExp.execLb(str, lb, regex);
return match ? match.index : -1;
};
XRegExp.matchAllLb = function (str, lb, regex) {
var matches = [], pos = 0, match, leftContext;
lb = prepareLb(lb);
while (match = XRegExp.exec(str, regex, pos)) {
leftContext = str.slice(0, match.index);
if (lb.type === lb.lb.test(leftContext)) {
matches.push(match[0]);
pos = match.index + (match[0].length || 1);
} else {
pos = match.index + 1;
}
}
return matches;
};
XRegExp.replaceLb = function (str, lb, regex, replacement) {
var output = "", pos = 0, lastEnd = 0, match, leftContext;
lb = prepareLb(lb);
while (match = XRegExp.exec(str, regex, pos)) {
leftContext = str.slice(0, match.index);
if (lb.type === lb.lb.test(leftContext)) {
// Doesn't work correctly if lookahead in regex looks outside of the match
output += str.slice(lastEnd, match.index) + XRegExp.replace(match[0], regex, replacement);
lastEnd = match.index + match[0].length;
if (!regex.global) {
break;
}
pos = match.index + (match[0].length || 1);
} else {
pos = match.index + 1;
}
}
return output + str.slice(lastEnd);
};
}(XRegExp));
// Test it...
console.log(XRegExp.execLb("Fluffy cat", "(?i)(?<=fluffy\\W+)", XRegExp("(?i)(?<first>c)at")));
// -> ["cat", "c"]
// Result has named backref: result.first -> "c"
console.log(XRegExp.execLb("Fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i));
// -> null
console.log(XRegExp.testLb("Fluffy cat", "(?i)(?<=fluffy\\W+)", /cat/i));
// -> true
console.log(XRegExp.testLb("Fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i));
// -> false
console.log(XRegExp.searchLb("Catwoman's fluffy cat", "(?i)(?<=fluffy\\W+)", /cat/i));
// -> 18
console.log(XRegExp.searchLb("Catwoman's fluffy cat", "(?i)(?<!fluffy\\W+)", /cat/i));
// -> 0
console.log(XRegExp.matchAllLb("Catwoman's cats are fluffy cats", "(?i)(?<=fluffy\\W+)", /cat\w*/i));
// -> ["cats"]
console.log(XRegExp.matchAllLb("Catwoman's cats are fluffy cats", "(?i)(?<!fluffy\\W+)", /cat\w*/i));
// -> ["Catwoman", "cats"]
console.log(XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<=fluffy\\W+)", /cat/ig, "dog"));
// -> "Catwoman's fluffy dog is a cat"
console.log(XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<!fluffy\\W+)", /cat/ig, "dog"));
// -> "dogwoman's fluffy cat is a dog"
console.log(XRegExp.replaceLb("Catwoman's fluffy cat is a cat", "(?i)(?<!fluffy\\W+)", /cat/ig, function ($0) {
var first = $0.charAt(0);
return first === first.toUpperCase() ? "Dog" : "dog";
}));
// -> "Dogwoman's fluffy cat is a dog"
@slevithan

This comment has been minimized.

Show comment
Hide comment
@slevithan

slevithan Apr 14, 2012

This is an alternate version of my previous lookbehind simulation that accepted leading lookbehind and the main regex as one combined pattern. This version does not require the XRegExp.matchRecursive addon.

This code is released under the MIT License.

Owner

slevithan commented Apr 14, 2012

This is an alternate version of my previous lookbehind simulation that accepted leading lookbehind and the main regex as one combined pattern. This version does not require the XRegExp.matchRecursive addon.

This code is released under the MIT License.

@slevithan

This comment has been minimized.

Show comment
Hide comment
@slevithan

slevithan Apr 16, 2012

I've used this code in my blog post JavaScript Regex Lookbehind Redux.

Owner

slevithan commented Apr 16, 2012

I've used this code in my blog post JavaScript Regex Lookbehind Redux.

@BrynM

This comment has been minimized.

Show comment
Hide comment
@BrynM

BrynM Apr 17, 2012

Nice. Until now I didn't know JS regex didn't support lookbehind. Do you know if it supports lookahead assertions? I was testing some regex on the console just last week and couldn't explain why I got a (false) positive match in JS, but not in other languages. I ended up changing strategies and ditching the assertion.

Until now, I'd pretty much assumed that JS had a full regex implementation.

BrynM commented Apr 17, 2012

Nice. Until now I didn't know JS regex didn't support lookbehind. Do you know if it supports lookahead assertions? I was testing some regex on the console just last week and couldn't explain why I got a (false) positive match in JS, but not in other languages. I ended up changing strategies and ditching the assertion.

Until now, I'd pretty much assumed that JS had a full regex implementation.

@slevithan

This comment has been minimized.

Show comment
Hide comment
@slevithan

slevithan Apr 17, 2012

Yes, JavaScript-the-standard supports lookahead, as do all browsers going back to IE 5.5.

Owner

slevithan commented Apr 17, 2012

Yes, JavaScript-the-standard supports lookahead, as do all browsers going back to IE 5.5.

@JoeWarwick

This comment has been minimized.

Show comment
Hide comment
@JoeWarwick

JoeWarwick Aug 15, 2012

I want to match the entire tag that contains some haml databinding script in an attribute
e.g.
'

'

Can this be achieved using matchLb? I don't understand how to match the string starting with < but ending with > and not !>.

JoeWarwick commented Aug 15, 2012

I want to match the entire tag that contains some haml databinding script in an attribute
e.g.
'

'

Can this be achieved using matchLb? I don't understand how to match the string starting with < but ending with > and not !>.

@JoeWarwick

This comment has been minimized.

Show comment
Hide comment
@JoeWarwick

JoeWarwick Aug 15, 2012

sorry. to continue :

e.g.

<table id="r<!=id!>">

I want to match the to the < not preceded by a !.

Is this possible?

JoeWarwick commented Aug 15, 2012

sorry. to continue :

e.g.

<table id="r<!=id!>">

I want to match the to the < not preceded by a !.

Is this possible?

@edwardbeckett

This comment has been minimized.

Show comment
Hide comment
@edwardbeckett

edwardbeckett Dec 27, 2014

Ha! I should have 'known' it would be you to have an alternative for JS ;-)

edwardbeckett commented Dec 27, 2014

Ha! I should have 'known' it would be you to have an alternative for JS ;-)

@ghost

This comment has been minimized.

Show comment
Hide comment
@ghost

ghost Apr 10, 2015

Are you able to please provide a method for splitting that supports lookbehind? Thanks!

ghost commented Apr 10, 2015

Are you able to please provide a method for splitting that supports lookbehind? Thanks!

@beaugunderson

This comment has been minimized.

Show comment
Hide comment
@beaugunderson

beaugunderson Aug 6, 2015

I made a small node wrapper for these. Thanks for providing them!

beaugunderson commented Aug 6, 2015

I made a small node wrapper for these. Thanks for providing them!

@mpyw

This comment has been minimized.

Show comment
Hide comment
@mpyw

mpyw Sep 27, 2018

@ghost +1

I really need XRegExp.splitLb

mpyw commented Sep 27, 2018

@ghost +1

I really need XRegExp.splitLb

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment