Skip to content

Instantly share code, notes, and snippets.

@gbirke
Created December 12, 2014 01:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gbirke/2cc2370135b665eee3ef to your computer and use it in GitHub Desktop.
Save gbirke/2cc2370135b665eee3ef to your computer and use it in GitHub Desktop.
Regex named group capturing in JavaScript
// These three functions are for creating a map between named groups in RegExp objects
// cleaning the named groups from regular expressions and to assign the captured items according to the map.
function getMap(rx) {
var braceMatch = /(?:^|[^\\])\((?!\?)(\:<(\w+)>)?/g,
braceMap = {},
braceCount = 0,
source = (rx instanceof RegExp) ? rx.source : rx,
match;
while ((match = braceMatch.exec(rx)) !== null) {
braceCount++;
if (match[2]) {
braceMap[braceCount] = match[2];
}
}
return braceMap;
}
function mapCaptures(map, captures) {
var idx, result = {};
if (captures === null) {
return null;
}
for (idx in map) {
result[map[idx]] = captures[idx];
}
return result;
}
function cleanRegExp(rx) {
var isRegex = rx instanceof RegExp,
source = isRegex ? rx.source : rx,
cleaned = source.replace(/(^|[^\\])\(:<\w+>/g, "$1("),
opts = "";
if (isRegex) {
opts += (rx.global ? "g" : "") +
(rx.ignoreCase ? "i" : "") +
(rx.multiline ? "m" : "");
return new RegExp(cleaned, opts);
}
else {
return cleaned;
}
}
// Mocha unit test for the functions
var should = require("should");
describe("Named Groups", function() {
describe("#getMap", function() {
it("should return empty map for no pattern", function(){
getMap("").should.be.empty
});
it("should return empty map for unnamed captures", function(){
getMap("a(b)").should.be.empty
});
it("should return maps for named captures", function(){
getMap("a (:<foo>b)").should.eql({1:"foo"});
getMap("a (:<foo>b) (:<bar>c)").should.eql({1:"foo", 2: "bar"});
getMap("a (:<foo>b) (x) (:<bar>c)").should.eql({1:"foo", 3: "bar"});
});
it("should ignore escaped braces", function(){
getMap("a \\(b) (:<foo>x)").should.eql({1:"foo"});
});
it("should ignore noncapturing braces", function(){
getMap("a (?:b) (:<foo>x)").should.eql({1:"foo"});
});
it("recognize mappings at the beginning of the pattern", function(){
getMap("(:<foo>x)").should.eql({1:"foo"});
});
it("should accept RegExp objects", function(){
getMap(new RegExp("a (:<foo>b)")).should.eql({1:"foo"});
getMap(new RegExp("")).should.be.empty;
});
});
describe("#mapCaptures", function() {
// Testing only arrays instead of regex return object
it("should capture every result", function() {
mapCaptures({1:"foo"}, ["haha", "ho"]).should.eql({"foo":"ho"});
});
it("should return null on null result", function() {
(mapCaptures({1:"foo"}, null) == null).should.be.ok;
});
});
describe("#cleanRegExp", function() {
it("should remove group names", function() {
cleanRegExp("a (:<foo>b)").should.eql("a (b)");
});
it("should remove group names at the beginning of the pattern", function() {
cleanRegExp("(:<foo>b)").should.eql("(b)");
});
it("should leave escaped braces followed by group names intact", function() {
cleanRegExp("a \\(:<foo>b)").should.eql("a \\(:<foo>b)");
});
it("should handle RegExp objects", function() {
cleanRegExp(new RegExp("a (:<foo>b)", "g")).should.eql(new RegExp("a (b)", "g"));
});
});
describe("integration test", function() {
var rx = /(:<foo>...) (:<bar>...) (:<baz>...)/g,
str = "aaa bbb ccc ddd eee fff",
map = getMap(rx),
cleanRx = cleanRegExp(rx),
result = cleanRx.exec(str),
capture = mapCaptures(map, result);
capture.should.eql({"foo":"aaa", "bar":"bbb", "baz":"ccc"});
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment