Created
December 22, 2018 23:30
-
-
Save mynameisflorian/ff32c93be8b381bf5e913cf58b7a3c2c to your computer and use it in GitHub Desktop.
RegExp Composer w/ capture data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
/* | |
exp( ...arguments ) --> new Expression( ...arguments ) | |
- each supplied arguments can be an Expression, RegExp, capture descriptor (see Capturing), or string | |
Capturing | |
- An argument wrapped in a single-property object is captured | |
exp( { month }, sep, { day }, sep, { year } ) | |
--> { month: 1, day, 23, year: 1945 } | |
exp({ petType: dog.or( cat ) }) | |
Maybe | |
.maybe() -- transforms expression so that it is optional | |
name = exp( firstName, exp( sep, lastName ).maybe() ) | |
Or | |
.or( exp ) | |
exp( "woof" ).or( "meow" ); | |
Repeating | |
.repeat( 0 ) -- repeat 0 or more times | |
.repeat( 1 ) -- repeat 1 or more times | |
.repeat( n ) -- repeat exactly n times | |
.repeat( min, max ) -- repeat between min and max times | |
exp( "m", exp( "e" ).repeat(), "ow" ) | |
month = digit.repeat( 1, 2 ); | |
Compiling | |
.compile() -- returns "capture array" | |
The capture array is used by the parser to collect data from RegExp::exec's returned array. The indexes line up with each other. | |
[0] "root capture object" -- represents the entire match | |
[n] --> { source, key, repeatCaptures } | |
To create a regular expression from the capture array, use new RegExp( captures[0].source ) | |
*/ | |
main: { | |
console.clear(); | |
function exp( ...args ){ | |
return Object.freeze( new Expression( ...args ) ); | |
} | |
function getSource( exp, wrap = false ){ | |
const [ source, wrapped ] = _getSource( exp ); | |
if( wrap && !wrapped ) | |
return wrap + source + ")"; | |
else | |
return source; | |
} | |
function _getSource( exp ){ | |
//returns [ source<string>, wrapped<boolean> ] | |
if( exp instanceof RegExp ) | |
return [ exp.source, false ]; | |
if( exp instanceof Expression ) | |
return [ exp[0].source, false ]; | |
if( exp instanceof Object ){ | |
const value = Object.entries( exp )[ 0 ][ 1 ]; | |
return [ getSource( value, "(" ), true ]; | |
} | |
if( typeof exp === "string" ) | |
//escape regExp characters | |
return [ exp.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'), false ]; | |
} | |
function getCaptureObjects( exp ){ | |
//returns array of capture objects, or an empty array if there are none | |
if( exp instanceof Expression ){ | |
const [ first, ...rest ] = Array.from(exp); | |
return rest; | |
} | |
if( exp instanceof RegExp ) | |
return []; | |
if( exp instanceof Object ){ | |
const [ key, value ] = Object.entries( exp )[ 0 ]; | |
const source = getSource( value, "(" ); | |
const repeatCaptures = value instanceof Expression ? value[0].repeatCaptures : null; | |
const captures = getCaptureObjects( value ); | |
const rootCapture = repeatCaptures | |
? Object.freeze({ key, source, repeatCaptures }) | |
: Object.freeze({ key, source }); | |
return [ rootCapture, ...captures ]; | |
} | |
if( typeof exp === "string" ) | |
return []; | |
} | |
class Expression extends Array{ | |
constructor( ...args ){ | |
const captures = args.flatMap( getCaptureObjects ); | |
const source = args.reduce( ( src, exp ) => src + getSource( exp ), "" ); | |
super( Object.freeze({ source }), ...captures ); | |
//note: this is frozen by exp( ... ) | |
} | |
compile(){ | |
return Array.from( this ); | |
} | |
or( exp ){ | |
const newExpression = new Expression( this, exp ); | |
newExpression[ 0 ] = Object.freeze({ | |
...newExpression[ 0 ], | |
source: getSource( this, "(?:" ) + "|" + getSource( exp, "(?:" ), | |
}); | |
return Object.freeze( newExpression ); | |
} | |
maybe(){ | |
const newExpression = new Expression( this ); | |
newExpression[ 0 ] = Object.freeze({ | |
...newExpression[ 0 ], | |
source: getSource( this, "(?:" ) + "?", | |
}); | |
return Object.freeze( newExpression ); | |
} | |
repeat( min = 1, max = min ){ | |
const repeatString = | |
min === 1 && max === 1 | |
? "+" | |
: min === 0 && max === 0 | |
? "*" | |
: `{${min},${max}}`; | |
const captures = Array.from( this ); | |
const newExpression = new Expression( this ); | |
//remove keys and other data from the newExpression's capture list | |
//regexp results from variables inside loops are unreliable. | |
//they are captured via repeatCaptures, which can be parsed separately | |
captures.forEach( ( { source }, i ) => { | |
newExpression[i] = Object.freeze({ source }); | |
}); | |
//Overwrite the root capture object | |
newExpression[ 0 ] = Object.freeze({ | |
...newExpression[ 0 ], | |
source: getSource( this, "(?:" ) + repeatString, | |
repeatCaptures: captures, | |
}) | |
return Object.freeze( newExpression ); | |
} | |
} | |
//debugging/example parser | |
function parse( captures, string, re, suffix = "" ){ | |
const regExp = re || new RegExp( "^" + captures[0].source + suffix ) | |
const results = regExp.exec( string ); | |
//if regExp failed, return null | |
if( results === null ) | |
return null; | |
return Object.assign( | |
//setup base object w/ __regExp hidden in the prototype (used for debugging) | |
Object.create({ __regExp: regExp }), | |
//zip captures & results into single-property objects and spread into above Object.assign | |
//using captures.map like captues.zip( fn, results ) | |
...captures.map( ( { key ,repeatCaptures }, i ) => { | |
//setup innerRegExp, if needed | |
const innerRegExp = repeatCaptures | |
? new RegExp( repeatCaptures[0].source, "g" ) | |
: null; | |
//get result from results array, handle repeat() expressions via repeatCaptures (only needed if key is supplied) | |
const result = repeatCaptures && key | |
? results[i] | |
.match( innerRegExp ) //use innerRegExp to process result into array of matches | |
.map( (value) => parse( repeatCaptures, string, innerRegExp ) ) //parse each match using repeatCaptures & innerRegExp against the whole captured string. with the "g" tag, successive exec calls advance in the string | |
.filter( obj => Object.keys( obj ).length ) //remove empty objects | |
: results[i]; | |
return key //&& result | |
? { [ key ]: result } | |
: null; | |
}) | |
); | |
return obj; | |
} | |
//debugging tester | |
function test( exp, string, message = string ){ | |
const captures = exp.compile(); | |
const result = parse( captures, string ); | |
const regExp = result ? result.__regExp : null; | |
console.groupCollapsed( "TEST", message, Boolean( result ), result ); | |
//Debug table | |
if( result ) | |
console.table( | |
regExp | |
.exec( string ) | |
.map( ( value, i ) => ({ "regExp result": value, ...exp[i] }) ) | |
); | |
else | |
console.table( captures ); | |
console.log( "- input string:", string ); | |
console.log( "- RexExp:", regExp ); | |
console.groupEnd(); | |
} | |
////////////////////////////////////////////////////////////////////////// | |
const uriWord = exp( /[A-Za-z0-9\-_.!~*'()]+/ ); | |
const queryParamater = exp( | |
{ key: uriWord }, | |
exp( | |
"=", | |
{ value: uriWord }, | |
).maybe(), | |
exp( "&" ).maybe(), | |
); | |
const query = exp( | |
"?", | |
exp({ queryParamaters: queryParamater.repeat( 0 ) }) | |
); | |
//query.compile(); | |
//--> [ { source: "..." }, { source: "...", key: "queryParamaters", repeatCaptures: [ ... ] }, { source: "..." }, { source: "..." } ] | |
// repeatCaptures: [ { source: "..." }, { source: "...", key: "key" }, { source: "...", key: "value" } ] | |
//parse( query.compile(), "?a=1&b=2" ); | |
//--> { queryParamaters: [ { key: "a", value: "1" }, { key: "b", value: "2" } ] } | |
test( query, "?a=1&b&c=3" ); | |
//More tests below... | |
break main; | |
const a = exp( "a" ); | |
const b = exp( "b" ); | |
const c = exp( "c" ); | |
test( | |
exp({ value: a.or( { b } ).repeat() }), | |
"aababq", | |
"or-repeat w/ partial inner capture", | |
) | |
// break main; | |
const ab = exp( { a }, { b } ); | |
const abc = exp( ab, { c } ); | |
test( abc, "abcd", "{a}{b}{c}" ); | |
test( | |
exp({ abc: exp( a, b, c ) }), | |
"abcd", | |
"abc: {a,b,c}" | |
); | |
test( | |
a.repeat( 2 ), | |
"aaa", | |
"a .r(2)" | |
) | |
test( | |
exp({ | |
values: exp({ | |
value: a.or( b ) | |
}) | |
.repeat() | |
}), | |
"abbca", | |
"{a|b} .r()", | |
); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment