Skip to content

Instantly share code, notes, and snippets.

@SweptThrone
Last active February 24, 2022 23:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SweptThrone/8095062ce402bc7bc11d646167afd27b to your computer and use it in GitHub Desktop.
Save SweptThrone/8095062ce402bc7bc11d646167afd27b to your computer and use it in GitHub Desktop.
Replace common non-typeable characters with typeable versions.
/*
Name: StringCleanse
Author: SweptThrone (sweptthr.one)
Description: Replaces non-typeable characters in a provided string with typeable versions.
Returns: The cleansed string, or the original string if no valid non-typeable characters were found.
Argument: The string to cleanse.
UPDATE January 17, 2022: Added some outliers in the 0x21## range. Manually. Also changed some decimals to hex.
*/
function StringCleanse( str ) {
// dirtyRegex is a huge range of letters and numbers that are not typeable, u tag helps us grab them all
let dirtyRegex = /[\u{1D400}-\u{1D6A3}\u{1D7CE}-\u{1D7FF}\u{FF21}-\u{FF5A}\u{FF10}-\u{FF19}\u{24B6}-\u{24E9}\u{2460}-\u{2468}\u{24EA}\u{2102}\u{210A}-\u{210E}\u{2110}-\u{2113}\u{2115}\u{2118}-\u{211D}\u{2124}\u{2128}\u{212A}\u{212C}-\u{2131}\u{2133}\u{2134}\u{2139}\u{2145}-\u{2149}\u{1F130}-\u{1F149}\u{1F150}-\u{1F169}\u{1F170}-\u{1F189}]/gmu
let res = str.match( dirtyRegex )
let finalStr = str
let outliers = { // I HATE UNICODE I HATE UNICODE I HATE UNICODE
0x2102: "C",
0x210A: "g",
0x210B: "H",
0x210C: "H",
0x210D: "H",
0x210E: "h",
0x2110: "I",
0x2111: "I",
0x2112: "I",
0x2113: "l",
0x2115: "N",
0x2118: "P",
0x2119: "P",
0x211A: "Q",
0x211B: "R",
0x211C: "R",
0x211D: "R",
0x2124: "Z",
0x2128: "Z",
0x212A: "K",
0x212C: "B",
0x212D: "C",
0x212E: "e",
0x212F: "e",
0x2130: "E",
0x2131: "F",
0x2133: "M",
0x2134: "o",
0x2139: "i",
0x2145: "D",
0x2146: "d",
0x2147: "e",
0x2148: "i",
0x2149: "j"
}
if ( res != null ) {
for ( i = 0; i < res.length; i++ ) {
let code = res[ i ].codePointAt( 0 )
if ( code >= 0x1D400 && code <= 0x1D6A3 ) { // letters
// sub is the number we have to subtract from our "bad" letter code
// it will be different for each "set" of invalid letters so that it always ends up in the range of typeable letters
let sub = 0
let iter = 0
for ( comp = 0x1D419; comp <= 0x1D6D7; comp += 52 ) { // comp = 119833; comp <= 120483
if ( code <= comp - 26 ) { // lowercase letters
sub = 0x1D3BF + ( 52 * iter ) - 58
break
} else if ( code <= comp ) { // capital letters
sub = 0x1D3BF + ( 52 * iter )
break
}
iter++
}
finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - sub ) )
} else if ( code >= 0x1D7CE && code <= 0x1D7FF ) { // numbers
// sub is the number we have to subtract from our "bad" number code
// it will be different for each "set" of invalid numbers so that it always ends up in the range of typeable numbers
let sub = 0
let iter = 0
for ( comp = 0x1D7D7; comp <= 0x1D809; comp += 10 ) { // comp = 120791; comp <= 120841
if ( code <= comp ) { // numbers
sub = 0x1D79E + ( 10 * iter )
break
}
iter++
}
finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - sub ) )
// these are more consistent because they're only in one range
// but that one range is always out of range of the others
} else if ( code >= 0xFF21 && code <= 0xFF5A ) { // fullwidth letters (i hate unicode)
finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0xFEE0 ) )
} else if ( code >= 0xFF10 && code <= 0xFF19 ) { // fullwidth numbers (I HATE UNICODE)
finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0xFEE0 ) )
} else if ( code >= 0x24B6 && code <= 0x24E9 ) { // circled letters, why do we need these?
finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - ( code > 0x24CF ? 0x246F : 0x2475 ) ) )
} else if ( ( code >= 0x2460 && code <= 0x2468 ) || code == 0x24EA ) { // circled numbers, 0 out of range for some reason
finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - ( code == 0x24EA ? 0x24BA : 0x242F ) ) )
} else if ( code >= 0x1F130 && code <= 0x1F149 ) { // letters in boxes
finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0x1F0EF ) )
} else if ( code >= 0x1F150 && code <= 0x1F169 ) { // letters in circles
finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0x1F10F ) )
} else if ( code >= 0x1F170 && code <= 0x1F189 ) { // letters in black boxes
finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0x1F12F ) )
} else if ( outliers[ code ] ) { // miscellaneous outliers
finalStr = finalStr.replace( res[ i ], outliers[ code ] )
}
}
return finalStr
} else {
return str
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment