Created
December 12, 2014 01:28
-
-
Save gbirke/2cc2370135b665eee3ef to your computer and use it in GitHub Desktop.
Regex named group capturing in JavaScript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// These three functions are for creating a map between named groups in RegExp objects | |
// cleaning the named groups from regular expressions and to assign the captured items according to the map. | |
function getMap(rx) { | |
var braceMatch = /(?:^|[^\\])\((?!\?)(\:<(\w+)>)?/g, | |
braceMap = {}, | |
braceCount = 0, | |
source = (rx instanceof RegExp) ? rx.source : rx, | |
match; | |
while ((match = braceMatch.exec(rx)) !== null) { | |
braceCount++; | |
if (match[2]) { | |
braceMap[braceCount] = match[2]; | |
} | |
} | |
return braceMap; | |
} | |
function mapCaptures(map, captures) { | |
var idx, result = {}; | |
if (captures === null) { | |
return null; | |
} | |
for (idx in map) { | |
result[map[idx]] = captures[idx]; | |
} | |
return result; | |
} | |
function cleanRegExp(rx) { | |
var isRegex = rx instanceof RegExp, | |
source = isRegex ? rx.source : rx, | |
cleaned = source.replace(/(^|[^\\])\(:<\w+>/g, "$1("), | |
opts = ""; | |
if (isRegex) { | |
opts += (rx.global ? "g" : "") + | |
(rx.ignoreCase ? "i" : "") + | |
(rx.multiline ? "m" : ""); | |
return new RegExp(cleaned, opts); | |
} | |
else { | |
return cleaned; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Mocha unit test for the functions | |
var should = require("should"); | |
describe("Named Groups", function() { | |
describe("#getMap", function() { | |
it("should return empty map for no pattern", function(){ | |
getMap("").should.be.empty | |
}); | |
it("should return empty map for unnamed captures", function(){ | |
getMap("a(b)").should.be.empty | |
}); | |
it("should return maps for named captures", function(){ | |
getMap("a (:<foo>b)").should.eql({1:"foo"}); | |
getMap("a (:<foo>b) (:<bar>c)").should.eql({1:"foo", 2: "bar"}); | |
getMap("a (:<foo>b) (x) (:<bar>c)").should.eql({1:"foo", 3: "bar"}); | |
}); | |
it("should ignore escaped braces", function(){ | |
getMap("a \\(b) (:<foo>x)").should.eql({1:"foo"}); | |
}); | |
it("should ignore noncapturing braces", function(){ | |
getMap("a (?:b) (:<foo>x)").should.eql({1:"foo"}); | |
}); | |
it("recognize mappings at the beginning of the pattern", function(){ | |
getMap("(:<foo>x)").should.eql({1:"foo"}); | |
}); | |
it("should accept RegExp objects", function(){ | |
getMap(new RegExp("a (:<foo>b)")).should.eql({1:"foo"}); | |
getMap(new RegExp("")).should.be.empty; | |
}); | |
}); | |
describe("#mapCaptures", function() { | |
// Testing only arrays instead of regex return object | |
it("should capture every result", function() { | |
mapCaptures({1:"foo"}, ["haha", "ho"]).should.eql({"foo":"ho"}); | |
}); | |
it("should return null on null result", function() { | |
(mapCaptures({1:"foo"}, null) == null).should.be.ok; | |
}); | |
}); | |
describe("#cleanRegExp", function() { | |
it("should remove group names", function() { | |
cleanRegExp("a (:<foo>b)").should.eql("a (b)"); | |
}); | |
it("should remove group names at the beginning of the pattern", function() { | |
cleanRegExp("(:<foo>b)").should.eql("(b)"); | |
}); | |
it("should leave escaped braces followed by group names intact", function() { | |
cleanRegExp("a \\(:<foo>b)").should.eql("a \\(:<foo>b)"); | |
}); | |
it("should handle RegExp objects", function() { | |
cleanRegExp(new RegExp("a (:<foo>b)", "g")).should.eql(new RegExp("a (b)", "g")); | |
}); | |
}); | |
describe("integration test", function() { | |
var rx = /(:<foo>...) (:<bar>...) (:<baz>...)/g, | |
str = "aaa bbb ccc ddd eee fff", | |
map = getMap(rx), | |
cleanRx = cleanRegExp(rx), | |
result = cleanRx.exec(str), | |
capture = mapCaptures(map, result); | |
capture.should.eql({"foo":"aaa", "bar":"bbb", "baz":"ccc"}); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment