Skip to content

Instantly share code, notes, and snippets.

@mynameisflorian
Created December 22, 2018 23:30
Show Gist options
  • Save mynameisflorian/ff32c93be8b381bf5e913cf58b7a3c2c to your computer and use it in GitHub Desktop.
Save mynameisflorian/ff32c93be8b381bf5e913cf58b7a3c2c to your computer and use it in GitHub Desktop.
RegExp Composer w/ capture data
"use strict";
/*
exp( ...arguments ) --> new Expression( ...arguments )
- each supplied arguments can be an Expression, RegExp, capture descriptor (see Capturing), or string
Capturing
- An argument wrapped in a single-property object is captured
exp( { month }, sep, { day }, sep, { year } )
--> { month: 1, day, 23, year: 1945 }
exp({ petType: dog.or( cat ) })
Maybe
.maybe() -- transforms expression so that it is optional
name = exp( firstName, exp( sep, lastName ).maybe() )
Or
.or( exp )
exp( "woof" ).or( "meow" );
Repeating
.repeat( 0 ) -- repeat 0 or more times
.repeat( 1 ) -- repeat 1 or more times
.repeat( n ) -- repeat exactly n times
.repeat( min, max ) -- repeat between min and max times
exp( "m", exp( "e" ).repeat(), "ow" )
month = digit.repeat( 1, 2 );
Compiling
.compile() -- returns "capture array"
The capture array is used by the parser to collect data from RegExp::exec's returned array. The indexes line up with each other.
[0] "root capture object" -- represents the entire match
[n] --> { source, key, repeatCaptures }
To create a regular expression from the capture array, use new RegExp( captures[0].source )
*/
main: {
console.clear();
function exp( ...args ){
return Object.freeze( new Expression( ...args ) );
}
function getSource( exp, wrap = false ){
const [ source, wrapped ] = _getSource( exp );
if( wrap && !wrapped )
return wrap + source + ")";
else
return source;
}
function _getSource( exp ){
//returns [ source<string>, wrapped<boolean> ]
if( exp instanceof RegExp )
return [ exp.source, false ];
if( exp instanceof Expression )
return [ exp[0].source, false ];
if( exp instanceof Object ){
const value = Object.entries( exp )[ 0 ][ 1 ];
return [ getSource( value, "(" ), true ];
}
if( typeof exp === "string" )
//escape regExp characters
return [ exp.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'), false ];
}
function getCaptureObjects( exp ){
//returns array of capture objects, or an empty array if there are none
if( exp instanceof Expression ){
const [ first, ...rest ] = Array.from(exp);
return rest;
}
if( exp instanceof RegExp )
return [];
if( exp instanceof Object ){
const [ key, value ] = Object.entries( exp )[ 0 ];
const source = getSource( value, "(" );
const repeatCaptures = value instanceof Expression ? value[0].repeatCaptures : null;
const captures = getCaptureObjects( value );
const rootCapture = repeatCaptures
? Object.freeze({ key, source, repeatCaptures })
: Object.freeze({ key, source });
return [ rootCapture, ...captures ];
}
if( typeof exp === "string" )
return [];
}
class Expression extends Array{
constructor( ...args ){
const captures = args.flatMap( getCaptureObjects );
const source = args.reduce( ( src, exp ) => src + getSource( exp ), "" );
super( Object.freeze({ source }), ...captures );
//note: this is frozen by exp( ... )
}
compile(){
return Array.from( this );
}
or( exp ){
const newExpression = new Expression( this, exp );
newExpression[ 0 ] = Object.freeze({
...newExpression[ 0 ],
source: getSource( this, "(?:" ) + "|" + getSource( exp, "(?:" ),
});
return Object.freeze( newExpression );
}
maybe(){
const newExpression = new Expression( this );
newExpression[ 0 ] = Object.freeze({
...newExpression[ 0 ],
source: getSource( this, "(?:" ) + "?",
});
return Object.freeze( newExpression );
}
repeat( min = 1, max = min ){
const repeatString =
min === 1 && max === 1
? "+"
: min === 0 && max === 0
? "*"
: `{${min},${max}}`;
const captures = Array.from( this );
const newExpression = new Expression( this );
//remove keys and other data from the newExpression's capture list
//regexp results from variables inside loops are unreliable.
//they are captured via repeatCaptures, which can be parsed separately
captures.forEach( ( { source }, i ) => {
newExpression[i] = Object.freeze({ source });
});
//Overwrite the root capture object
newExpression[ 0 ] = Object.freeze({
...newExpression[ 0 ],
source: getSource( this, "(?:" ) + repeatString,
repeatCaptures: captures,
})
return Object.freeze( newExpression );
}
}
//debugging/example parser
function parse( captures, string, re, suffix = "" ){
const regExp = re || new RegExp( "^" + captures[0].source + suffix )
const results = regExp.exec( string );
//if regExp failed, return null
if( results === null )
return null;
return Object.assign(
//setup base object w/ __regExp hidden in the prototype (used for debugging)
Object.create({ __regExp: regExp }),
//zip captures & results into single-property objects and spread into above Object.assign
//using captures.map like captues.zip( fn, results )
...captures.map( ( { key ,repeatCaptures }, i ) => {
//setup innerRegExp, if needed
const innerRegExp = repeatCaptures
? new RegExp( repeatCaptures[0].source, "g" )
: null;
//get result from results array, handle repeat() expressions via repeatCaptures (only needed if key is supplied)
const result = repeatCaptures && key
? results[i]
.match( innerRegExp ) //use innerRegExp to process result into array of matches
.map( (value) => parse( repeatCaptures, string, innerRegExp ) ) //parse each match using repeatCaptures & innerRegExp against the whole captured string. with the "g" tag, successive exec calls advance in the string
.filter( obj => Object.keys( obj ).length ) //remove empty objects
: results[i];
return key //&& result
? { [ key ]: result }
: null;
})
);
return obj;
}
//debugging tester
function test( exp, string, message = string ){
const captures = exp.compile();
const result = parse( captures, string );
const regExp = result ? result.__regExp : null;
console.groupCollapsed( "TEST", message, Boolean( result ), result );
//Debug table
if( result )
console.table(
regExp
.exec( string )
.map( ( value, i ) => ({ "regExp result": value, ...exp[i] }) )
);
else
console.table( captures );
console.log( "- input string:", string );
console.log( "- RexExp:", regExp );
console.groupEnd();
}
//////////////////////////////////////////////////////////////////////////
const uriWord = exp( /[A-Za-z0-9\-_.!~*'()]+/ );
const queryParamater = exp(
{ key: uriWord },
exp(
"=",
{ value: uriWord },
).maybe(),
exp( "&" ).maybe(),
);
const query = exp(
"?",
exp({ queryParamaters: queryParamater.repeat( 0 ) })
);
//query.compile();
//--> [ { source: "..." }, { source: "...", key: "queryParamaters", repeatCaptures: [ ... ] }, { source: "..." }, { source: "..." } ]
// repeatCaptures: [ { source: "..." }, { source: "...", key: "key" }, { source: "...", key: "value" } ]
//parse( query.compile(), "?a=1&b=2" );
//--> { queryParamaters: [ { key: "a", value: "1" }, { key: "b", value: "2" } ] }
test( query, "?a=1&b&c=3" );
//More tests below...
break main;
const a = exp( "a" );
const b = exp( "b" );
const c = exp( "c" );
test(
exp({ value: a.or( { b } ).repeat() }),
"aababq",
"or-repeat w/ partial inner capture",
)
// break main;
const ab = exp( { a }, { b } );
const abc = exp( ab, { c } );
test( abc, "abcd", "{a}{b}{c}" );
test(
exp({ abc: exp( a, b, c ) }),
"abcd",
"abc: {a,b,c}"
);
test(
a.repeat( 2 ),
"aaa",
"a .r(2)"
)
test(
exp({
values: exp({
value: a.or( b )
})
.repeat()
}),
"abbca",
"{a|b} .r()",
);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment