Skip to content

Instantly share code, notes, and snippets.

@bennadel
Created February 5, 2022 13:46
Show Gist options
  • Save bennadel/83e0b5caaf4a4d7df9577bb4a9f29f53 to your computer and use it in GitHub Desktop.
Save bennadel/83e0b5caaf4a4d7df9577bb4a9f29f53 to your computer and use it in GitHub Desktop.
Normalizing 0xA0 (No-Break Space) And Other Special Characters Within ColdFusion Form Posts
component {
/**
* I get called once at the start of each incoming ColdFusion request.
*/
public void function onRequestStart() {
for ( var key in form ) {
if ( isSimpleValue( form[ key ] ) ) {
form[ key ] = textNormalization.normalizeText( form[ key ] );
}
}
}
}
component {
/**
* I apply all the normalization methods to the given value and return the result.
*/
public string function normalizeText( required string value ) {
var result = trim( value );
result = normalizeLineEndings( result );
result = normalizeSpaces( result );
result = normalizeDoubleQuotes( result );
result = normalizeSingleQuotes( result );
result = normalizeDashes( result );
result = normalizeBullets( result );
return( result );
}
}
/**
* The site - https://unicode-table.com/ - is great for looking up Unicode values.
*/
component
output = false
hint = "I provide methods for normalizing special characters within text values."
{
// --
// PUBLIC METHODS.
// --
/**
* I replace special bullets with the standard asterisk.
*/
public string function normalizeBullets( required string value ) {
return(
jreReplace(
value,
"(?x)[
\u2022 ## Bullet.
\u2023 ## Triangular Bullet.
\u2043 ## Hyphen Bullet.
\u2219 ## Bullet Operator.
\u25aa ## Black Small Square Emoji.
\u25cb ## White Circle.
\u25cf ## Black Circle.
\u25e6 ## White Bullet.
]",
"*"
)
);
return( content );
}
/**
* I replace like-sized dashes with standard dashes.
*/
public string function normalizeDashes( required string value ) {
return(
jreReplace(
value,
"(?x)[
\u2013 ## En Dash.
\u2212 ## Minus Sign.
]",
"-"
)
);
}
/**
* I replace "smart double quotes" with standard double quotes.
*/
public string function normalizeDoubleQuotes( required string value ) {
return(
jreReplace(
value,
"(?x)[
\u201c ## Left Double Quotation Mark.
\u201d ## Right Double Quotation Mark.
\u201e ## Double Low-9 Quotation Mark.
\u201f ## Double High-Reversed-9 Quotation Mark.
\u275d ## Heavy Double Turned Comma Quotation Mark Ornament.
\u275e ## Heavy Double Comma Quotation Mark Ornament.
\u2e42 ## Double Low-Reversed-9 Quotation Mark.
\u301d ## Reversed Double Prime Quotation Mark.
\u301e ## Double Prime Quotation Mark.
\u301f ## Low Double Prime Quotation Mark.
\uff02 ## Fullwidth Quotation Mark.
]",
""""
)
);
}
/**
* I convert all the line-breaks to NewLine characters.
*/
public string function normalizeLineEndings( required string value ) {
return( jreReplace( value, "\r\n?", chr( 10 ) ) );
}
/**
* I replace "smart single quotes" with standard single quotes.
*/
public string function normalizeSingleQuotes( required string value ) {
return(
jreReplace(
value,
"(?x)[
\u2018 ## Left Single Quotation Mark.
\u2019 ## Right Single Quotation Mark.
\u201a ## Single Low-9 Quotation Mark.
\u201b ## Single High-Reversed-9 Quotation Mark.
\u275b ## Heavy Single Turned Comma Quotation Mark Ornament.
\u275c ## Heavy Single Comma Quotation Mark Ornament.
\u275f ## Heavy Low Single Comma Quotation Mark Ornament.
]",
"'"
)
);
}
/**
* I convert any special spaces to regular spaces.
*/
public string function normalizeSpaces( required string value ) {
return(
jreReplace(
value,
"(?x)[
\u00a0 ## No-Break Space.
\u2000 ## En Quad (space that is one en wide).
\u2001 ## Em Quad (space that is one em wide).
\u2002 ## En Space.
\u2003 ## Em Space.
\u2004 ## Thick Space.
\u2005 ## Mid Space.
\u2006 ## Six-Per-Em Space.
\u2007 ## Figure Space.
\u2008 ## Punctuation Space.
\u2009 ## Thin Space.
\u200a ## Hair Space.
\u200b ## Zero Width Space.
\u2028 ## Line Separator.
\u2029 ## Paragraph Separator.
\u202f ## Narrow No-Break Space.
\ufeff ## Zero Width No-Break Space.
]",
" "
)
);
}
/**
* I apply all the normalization methods to the given value and return the result.
*/
public string function normalizeText( required string value ) {
var result = trim( value );
result = normalizeLineEndings( result );
result = normalizeSpaces( result );
result = normalizeDoubleQuotes( result );
result = normalizeSingleQuotes( result );
result = normalizeDashes( result );
result = normalizeBullets( result );
return( result );
}
// --
// PRIVATE METHODS.
// --
/**
* I use Java's Pattern engine to perform a RegEx replace on the given input.
*/
private string function jreReplace(
required string input,
required string pattern,
string replacement = ""
) {
var result = javaCast( "string", input ).replaceAll(
javaCast( "string", pattern ),
javaCast( "string", replacement )
);
return( result );
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment