Created
February 5, 2022 13:46
-
-
Save bennadel/83e0b5caaf4a4d7df9577bb4a9f29f53 to your computer and use it in GitHub Desktop.
Normalizing 0xA0 (No-Break Space) And Other Special Characters Within ColdFusion Form Posts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
component { | |
/** | |
* I get called once at the start of each incoming ColdFusion request. | |
*/ | |
public void function onRequestStart() { | |
for ( var key in form ) { | |
if ( isSimpleValue( form[ key ] ) ) { | |
form[ key ] = textNormalization.normalizeText( form[ key ] ); | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
component { | |
/** | |
* I apply all the normalization methods to the given value and return the result. | |
*/ | |
public string function normalizeText( required string value ) { | |
var result = trim( value ); | |
result = normalizeLineEndings( result ); | |
result = normalizeSpaces( result ); | |
result = normalizeDoubleQuotes( result ); | |
result = normalizeSingleQuotes( result ); | |
result = normalizeDashes( result ); | |
result = normalizeBullets( result ); | |
return( result ); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* The site - https://unicode-table.com/ - is great for looking up Unicode values. | |
*/ | |
component | |
output = false | |
hint = "I provide methods for normalizing special characters within text values." | |
{ | |
// -- | |
// PUBLIC METHODS. | |
// -- | |
/** | |
* I replace special bullets with the standard asterisk. | |
*/ | |
public string function normalizeBullets( required string value ) { | |
return( | |
jreReplace( | |
value, | |
"(?x)[ | |
\u2022 ## Bullet. | |
\u2023 ## Triangular Bullet. | |
\u2043 ## Hyphen Bullet. | |
\u2219 ## Bullet Operator. | |
\u25aa ## Black Small Square Emoji. | |
\u25cb ## White Circle. | |
\u25cf ## Black Circle. | |
\u25e6 ## White Bullet. | |
]", | |
"*" | |
) | |
); | |
return( content ); | |
} | |
/** | |
* I replace like-sized dashes with standard dashes. | |
*/ | |
public string function normalizeDashes( required string value ) { | |
return( | |
jreReplace( | |
value, | |
"(?x)[ | |
\u2013 ## En Dash. | |
\u2212 ## Minus Sign. | |
]", | |
"-" | |
) | |
); | |
} | |
/** | |
* I replace "smart double quotes" with standard double quotes. | |
*/ | |
public string function normalizeDoubleQuotes( required string value ) { | |
return( | |
jreReplace( | |
value, | |
"(?x)[ | |
\u201c ## Left Double Quotation Mark. | |
\u201d ## Right Double Quotation Mark. | |
\u201e ## Double Low-9 Quotation Mark. | |
\u201f ## Double High-Reversed-9 Quotation Mark. | |
\u275d ## Heavy Double Turned Comma Quotation Mark Ornament. | |
\u275e ## Heavy Double Comma Quotation Mark Ornament. | |
\u2e42 ## Double Low-Reversed-9 Quotation Mark. | |
\u301d ## Reversed Double Prime Quotation Mark. | |
\u301e ## Double Prime Quotation Mark. | |
\u301f ## Low Double Prime Quotation Mark. | |
\uff02 ## Fullwidth Quotation Mark. | |
]", | |
"""" | |
) | |
); | |
} | |
/** | |
* I convert all the line-breaks to NewLine characters. | |
*/ | |
public string function normalizeLineEndings( required string value ) { | |
return( jreReplace( value, "\r\n?", chr( 10 ) ) ); | |
} | |
/** | |
* I replace "smart single quotes" with standard single quotes. | |
*/ | |
public string function normalizeSingleQuotes( required string value ) { | |
return( | |
jreReplace( | |
value, | |
"(?x)[ | |
\u2018 ## Left Single Quotation Mark. | |
\u2019 ## Right Single Quotation Mark. | |
\u201a ## Single Low-9 Quotation Mark. | |
\u201b ## Single High-Reversed-9 Quotation Mark. | |
\u275b ## Heavy Single Turned Comma Quotation Mark Ornament. | |
\u275c ## Heavy Single Comma Quotation Mark Ornament. | |
\u275f ## Heavy Low Single Comma Quotation Mark Ornament. | |
]", | |
"'" | |
) | |
); | |
} | |
/** | |
* I convert any special spaces to regular spaces. | |
*/ | |
public string function normalizeSpaces( required string value ) { | |
return( | |
jreReplace( | |
value, | |
"(?x)[ | |
\u00a0 ## No-Break Space. | |
\u2000 ## En Quad (space that is one en wide). | |
\u2001 ## Em Quad (space that is one em wide). | |
\u2002 ## En Space. | |
\u2003 ## Em Space. | |
\u2004 ## Thick Space. | |
\u2005 ## Mid Space. | |
\u2006 ## Six-Per-Em Space. | |
\u2007 ## Figure Space. | |
\u2008 ## Punctuation Space. | |
\u2009 ## Thin Space. | |
\u200a ## Hair Space. | |
\u200b ## Zero Width Space. | |
\u2028 ## Line Separator. | |
\u2029 ## Paragraph Separator. | |
\u202f ## Narrow No-Break Space. | |
\ufeff ## Zero Width No-Break Space. | |
]", | |
" " | |
) | |
); | |
} | |
/** | |
* I apply all the normalization methods to the given value and return the result. | |
*/ | |
public string function normalizeText( required string value ) { | |
var result = trim( value ); | |
result = normalizeLineEndings( result ); | |
result = normalizeSpaces( result ); | |
result = normalizeDoubleQuotes( result ); | |
result = normalizeSingleQuotes( result ); | |
result = normalizeDashes( result ); | |
result = normalizeBullets( result ); | |
return( result ); | |
} | |
// -- | |
// PRIVATE METHODS. | |
// -- | |
/** | |
* I use Java's Pattern engine to perform a RegEx replace on the given input. | |
*/ | |
private string function jreReplace( | |
required string input, | |
required string pattern, | |
string replacement = "" | |
) { | |
var result = javaCast( "string", input ).replaceAll( | |
javaCast( "string", pattern ), | |
javaCast( "string", replacement ) | |
); | |
return( result ); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment