SweptThrone/string_cleanse.js

## string_cleanse.js
/*
Name: StringCleanse
Author: SweptThrone (sweptthr.one)
Description: Replaces non-typeable characters in a provided string with typeable versions.
Returns: The cleansed string, or the original string if no valid non-typeable characters were found.
Argument: The string to cleanse.
UPDATE January 17, 2022: Added some outliers in the 0x21## range.  Manually.  Also changed some decimals to hex.
*/

function StringCleanse( str ) {
	// dirtyRegex is a huge range of letters and numbers that are not typeable, u tag helps us grab them all
	let dirtyRegex = /[\u{1D400}-\u{1D6A3}\u{1D7CE}-\u{1D7FF}\u{FF21}-\u{FF5A}\u{FF10}-\u{FF19}\u{24B6}-\u{24E9}\u{2460}-\u{2468}\u{24EA}\u{2102}\u{210A}-\u{210E}\u{2110}-\u{2113}\u{2115}\u{2118}-\u{211D}\u{2124}\u{2128}\u{212A}\u{212C}-\u{2131}\u{2133}\u{2134}\u{2139}\u{2145}-\u{2149}\u{1F130}-\u{1F149}\u{1F150}-\u{1F169}\u{1F170}-\u{1F189}]/gmu
	let res = str.match( dirtyRegex )
	let finalStr = str
	let outliers = { // I HATE UNICODE I HATE UNICODE I HATE UNICODE
		0x2102: "C",
		0x210A: "g",
		0x210B: "H",
		0x210C: "H",
		0x210D: "H",
		0x210E: "h",
		0x2110: "I",
		0x2111: "I",
		0x2112: "I",
		0x2113: "l",
		0x2115: "N",
		0x2118: "P",
		0x2119: "P",
		0x211A: "Q",
		0x211B: "R",
		0x211C: "R",
		0x211D: "R",
		0x2124: "Z",
		0x2128: "Z",
		0x212A: "K",
		0x212C: "B",
		0x212D: "C",
		0x212E: "e",
		0x212F: "e",
		0x2130: "E",
		0x2131: "F",
		0x2133: "M",
		0x2134: "o",
		0x2139: "i",
		0x2145: "D",
		0x2146: "d",
		0x2147: "e",
		0x2148: "i",
		0x2149: "j"
	}


	if ( res != null ) {

		for ( i = 0; i < res.length; i++ ) {

			let code = res[ i ].codePointAt( 0 )

			if ( code >= 0x1D400 && code <= 0x1D6A3 ) { // letters

				// sub is the number we have to subtract from our "bad" letter code
				// it will be different for each "set" of invalid letters so that it always ends up in the range of typeable letters
				let sub = 0
				let iter = 0

				for ( comp = 0x1D419; comp <= 0x1D6D7; comp += 52 ) { // comp = 119833; comp <= 120483
					if ( code <= comp - 26 ) { // lowercase letters
						sub = 0x1D3BF + ( 52 * iter ) - 58
						break
					} else if ( code <= comp ) { // capital letters
						sub = 0x1D3BF + ( 52 * iter )
						break
					}
					iter++
				}

				finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - sub ) )

			} else if ( code >= 0x1D7CE && code <= 0x1D7FF ) { // numbers

				// sub is the number we have to subtract from our "bad" number code
				// it will be different for each "set" of invalid numbers so that it always ends up in the range of typeable numbers
				let sub = 0
				let iter = 0

				for ( comp = 0x1D7D7; comp <= 0x1D809; comp += 10 ) { // comp = 120791; comp <= 120841
					if ( code <= comp ) { // numbers
						sub = 0x1D79E + ( 10 * iter )
						break
					}
					iter++
				}

				finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - sub ) )

			// these are more consistent because they're only in one range
			// but that one range is always out of range of the others
			} else if ( code >= 0xFF21 && code <= 0xFF5A ) { // fullwidth letters (i hate unicode)
				finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0xFEE0 ) )
			} else if ( code >= 0xFF10 && code <= 0xFF19 ) { // fullwidth numbers (I HATE UNICODE)
				finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0xFEE0 ) )
			} else if ( code >= 0x24B6 && code <= 0x24E9 ) { // circled letters, why do we need these?
				finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - ( code > 0x24CF ? 0x246F : 0x2475 ) ) )
			} else if ( ( code >= 0x2460 && code <= 0x2468 ) || code == 0x24EA ) { // circled numbers, 0 out of range for some reason
				finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - ( code == 0x24EA ? 0x24BA : 0x242F ) ) )
			} else if ( code >= 0x1F130 && code <= 0x1F149 ) { // letters in boxes
				finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0x1F0EF ) )
			} else if ( code >= 0x1F150 && code <= 0x1F169 ) { // letters in circles
				finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0x1F10F ) )
			} else if ( code >= 0x1F170 && code <= 0x1F189 ) { // letters in black boxes
				finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0x1F12F ) )
			} else if ( outliers[ code ] ) { // miscellaneous outliers
				finalStr = finalStr.replace( res[ i ], outliers[ code ] )
			}
		}

		return finalStr

	} else {
		return str
	}
}
	/*
	Name: StringCleanse
	Author: SweptThrone (sweptthr.one)
	Description: Replaces non-typeable characters in a provided string with typeable versions.
	Returns: The cleansed string, or the original string if no valid non-typeable characters were found.
	Argument: The string to cleanse.
	UPDATE January 17, 2022: Added some outliers in the 0x21## range. Manually. Also changed some decimals to hex.
	*/

	function StringCleanse( str ) {
	// dirtyRegex is a huge range of letters and numbers that are not typeable, u tag helps us grab them all
	let dirtyRegex = /[\u{1D400}-\u{1D6A3}\u{1D7CE}-\u{1D7FF}\u{FF21}-\u{FF5A}\u{FF10}-\u{FF19}\u{24B6}-\u{24E9}\u{2460}-\u{2468}\u{24EA}\u{2102}\u{210A}-\u{210E}\u{2110}-\u{2113}\u{2115}\u{2118}-\u{211D}\u{2124}\u{2128}\u{212A}\u{212C}-\u{2131}\u{2133}\u{2134}\u{2139}\u{2145}-\u{2149}\u{1F130}-\u{1F149}\u{1F150}-\u{1F169}\u{1F170}-\u{1F189}]/gmu
	let res = str.match( dirtyRegex )
	let finalStr = str
	let outliers = { // I HATE UNICODE I HATE UNICODE I HATE UNICODE
	0x2102: "C",
	0x210A: "g",
	0x210B: "H",
	0x210C: "H",
	0x210D: "H",
	0x210E: "h",
	0x2110: "I",
	0x2111: "I",
	0x2112: "I",
	0x2113: "l",
	0x2115: "N",
	0x2118: "P",
	0x2119: "P",
	0x211A: "Q",
	0x211B: "R",
	0x211C: "R",
	0x211D: "R",
	0x2124: "Z",
	0x2128: "Z",
	0x212A: "K",
	0x212C: "B",
	0x212D: "C",
	0x212E: "e",
	0x212F: "e",
	0x2130: "E",
	0x2131: "F",
	0x2133: "M",
	0x2134: "o",
	0x2139: "i",
	0x2145: "D",
	0x2146: "d",
	0x2147: "e",
	0x2148: "i",
	0x2149: "j"
	}


	if ( res != null ) {

	for ( i = 0; i < res.length; i++ ) {

	let code = res[ i ].codePointAt( 0 )

	if ( code >= 0x1D400 && code <= 0x1D6A3 ) { // letters

	// sub is the number we have to subtract from our "bad" letter code
	// it will be different for each "set" of invalid letters so that it always ends up in the range of typeable letters
	let sub = 0
	let iter = 0

	for ( comp = 0x1D419; comp <= 0x1D6D7; comp += 52 ) { // comp = 119833; comp <= 120483
	if ( code <= comp - 26 ) { // lowercase letters
	sub = 0x1D3BF + ( 52 * iter ) - 58
	break
	} else if ( code <= comp ) { // capital letters
	sub = 0x1D3BF + ( 52 * iter )
	break
	}
	iter++
	}

	finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - sub ) )

	} else if ( code >= 0x1D7CE && code <= 0x1D7FF ) { // numbers

	// sub is the number we have to subtract from our "bad" number code
	// it will be different for each "set" of invalid numbers so that it always ends up in the range of typeable numbers
	let sub = 0
	let iter = 0

	for ( comp = 0x1D7D7; comp <= 0x1D809; comp += 10 ) { // comp = 120791; comp <= 120841
	if ( code <= comp ) { // numbers
	sub = 0x1D79E + ( 10 * iter )
	break
	}
	iter++
	}

	finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - sub ) )

	// these are more consistent because they're only in one range
	// but that one range is always out of range of the others
	} else if ( code >= 0xFF21 && code <= 0xFF5A ) { // fullwidth letters (i hate unicode)
	finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0xFEE0 ) )
	} else if ( code >= 0xFF10 && code <= 0xFF19 ) { // fullwidth numbers (I HATE UNICODE)
	finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0xFEE0 ) )
	} else if ( code >= 0x24B6 && code <= 0x24E9 ) { // circled letters, why do we need these?
	finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - ( code > 0x24CF ? 0x246F : 0x2475 ) ) )
	} else if ( ( code >= 0x2460 && code <= 0x2468 ) \|\| code == 0x24EA ) { // circled numbers, 0 out of range for some reason
	finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - ( code == 0x24EA ? 0x24BA : 0x242F ) ) )
	} else if ( code >= 0x1F130 && code <= 0x1F149 ) { // letters in boxes
	finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0x1F0EF ) )
	} else if ( code >= 0x1F150 && code <= 0x1F169 ) { // letters in circles
	finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0x1F10F ) )
	} else if ( code >= 0x1F170 && code <= 0x1F189 ) { // letters in black boxes
	finalStr = finalStr.replace( res[ i ], String.fromCodePoint( code - 0x1F12F ) )
	} else if ( outliers[ code ] ) { // miscellaneous outliers
	finalStr = finalStr.replace( res[ i ], outliers[ code ] )
	}
	}

	return finalStr

	} else {
	return str
	}
	}