Created
August 7, 2017 12:21
-
-
Save bennadel/80e466eefba45f072fe32b24a80a48fd to your computer and use it in GitHub Desktop.
Creating A ColdFusion Closure-Based Wrapper To Java's Pattern Matcher For Regular Expression String Replacement
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<cfscript> | |
// Create a poem with majority lower-case characters. | |
content = " | |
roses are red, | |
violets are blue, | |
ColdFusion is the bee's knees, | |
and so are you! | |
"; | |
// Let's transform the poem into one in which the first character of each line is | |
// upper-cased. In this case, we are taking into account that each line may contain | |
// leading white-space. | |
// -- | |
// NOTE: In the following pattern, I could have made the [a-z] group mandatory and | |
// the omitted the isNull() check. However, I wanted to demonstrate that the captured | |
// group could be optional and would be passed-in as undefined. | |
replacement = jreReplaceEach( | |
content, | |
"(?m)^(\s*)([a-z])?", | |
function ( $0, leadingSpaces, firstCharacter ) { | |
if ( isNull( firstCharacter ) ) { | |
return( $0 ); | |
} else { | |
return( ucase( firstCharacter ) ); | |
} | |
} | |
); | |
// NOTE: Using "text" format so we can see the white-space. | |
writeDump( var = replacement, format = "text" ); | |
// ------------------------------------------------------------------------------- // | |
// ------------------------------------------------------------------------------- // | |
/** | |
* I use Java's Pattern / Matcher libraries to replace matched patterns using the | |
* given operator function. | |
* | |
* @targetText I am the text being scanned. | |
* @patternText I am the Java Regular Expression pattern used to locate matches. | |
* @operator I am the Function or Closure used to provide the match replacements. | |
* @output false | |
*/ | |
public string function jreReplaceEach( | |
required string targetText, | |
required string patternText, | |
required function operator | |
) { | |
var matcher = createObject( "java", "java.util.regex.Pattern" ) | |
.compile( javaCast( "string", patternText ) ) | |
.matcher( javaCast( "string", targetText ) ) | |
; | |
var buffer = createObject( "java", "java.lang.StringBuffer" ).init(); | |
// Iterate over each pattern match in the target text. | |
while ( matcher.find() ) { | |
// When preparing the arguments for the operator, we need to construct an | |
// argumentCollection structure in which the argument index is the numeric | |
// key of the argument offset. In order to simplify overlaying the pattern | |
// group matching over the arguments array, we're simply going to keep an | |
// incremented offset every time we add an argument. | |
var operatorArguments = {}; | |
var operatorArgumentOffset = 1; // Will be incremented with each argument. | |
var groupCount = matcher.groupCount(); | |
// NOTE: Calling .group(0) is equivalent to calling .group(), which will | |
// return the entire match, not just a capturing group. | |
for ( var i = 0 ; i <= groupCount ; i++ ) { | |
operatorArguments[ operatorArgumentOffset++ ] = matcher.group( javaCast( "int", i ) ); | |
} | |
// Including the match offset and the original content for parity with the | |
// JavaScript String.replace() function on which this algorithm is based. | |
// -- | |
// NOTE: We're adding 1 to the offset since ColdFusion starts offsets at 1 | |
// where as Java starts offsets at 0. | |
operatorArguments[ operatorArgumentOffset++ ] = ( matcher.start() + 1 ); | |
operatorArguments[ operatorArgumentOffset++ ] = targetText; | |
var replacement = operator( argumentCollection = operatorArguments ); | |
// In the event the operator doesn't return a value, we'll assume that the | |
// intention is to replace the match with nothing. | |
if ( isNull( replacement ) ) { | |
replacement = ""; | |
} | |
// Since the operator is providing the replacement text based on the | |
// individual parts found in the match, we are going to assume that any | |
// embedded group reference is coincidental and should be consumed as a | |
// string literal. | |
matcher.appendReplacement( | |
buffer, | |
matcher.quoteReplacement( javaCast( "string", replacement ) ) | |
); | |
} | |
matcher.appendTail( buffer ); | |
return( buffer.toString() ); | |
} | |
</cfscript> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment