Skip to content

Instantly share code, notes, and snippets.

@dotproto
Last active January 20, 2017 02:38
Show Gist options
  • Save dotproto/87f64736815add079a6b602d99a85227 to your computer and use it in GitHub Desktop.
Save dotproto/87f64736815add079a6b602d99a85227 to your computer and use it in GitHub Desktop.
Examining raw unicode values and their normalized forms. TL:DR; comparing unicode strings using a `.normalized()` and `. localeCompare()`
// References
//
// - https://tc39.github.io/ecma262/#sec-ecmascript-language-types-string-type
// - http://unicode.org/reports/tr15/#Norm_Forms
// - http://unicode.org/faq/normalization.html#7 (What is the difference is between W3C normalization and Unicode normalization?)
// - https://developer.mozilla.org/en-US/docs/Web/API/DOMParser
//
// Resources
//
// - http://stackoverflow.com/questions/8936984/uint8array-to-string-in-javascript
// - https://encoding.spec.whatwg.org/
{
const norm = s => s.normalize()
const localeCompare = (x,y) => !x.localeCompare(y)
// ===========================================================================
const a = '\u00E1'
const b = 'a\u0301'
const c = 'a'
console.info('JS Strings: .normalize() comparison')
console.log(`a === a, ${a === b}`) // false
console.log(`norm(a) === b, ${norm(a) === b}`) // false
console.log(`a === norm(b), ${a === norm(b)}`) // true
console.log(`norm(a) === norm(b), ${norm(a) === norm(b)}`) // true
console.log(`norm(a) === norm(c), ${norm(a) === norm(c)}`) // false
console.log(`norm(b) === norm(c), ${norm(b) === norm(c)}`) // false
console.info('JS Strings: .localeCompare()')
console.log(`a.localeCompare(b), ${!a.localeCompare(b)}`) // true
console.log(`b.localeCompare(a), ${!b.localeCompare(a)}`) // true
console.log(`a.localeCompare(c), ${!a.localeCompare(c)}`) // false
console.log(`b.localeCompare(c), ${!b.localeCompare(c)}`) // false
// ===========================================================================
const frag = document.createElement('div')
frag.innerHTML = '<div data-a="&#x00E1;" data-b="a&#x0301;" data-c="a">hi</div>'
const data = frag.firstChild.dataset
const A = data.a
const B = data.b
const C = data.c
console.info('DOM Strings: .normalize() comparison')
console.log(`A === B, ${A === B}`) // false
console.log(`norm(A) === B, ${norm(A) === B}`) // false
console.log(`A === norm(B), ${A === norm(B)}`) // true
console.log(`norm(A) === norm(B), ${norm(A) === norm(B)}`) // true
console.log(`norm(B) === norm(C), ${norm(A) === norm(C)}`) // false
console.log(`norm(B) === norm(C), ${norm(B) === norm(C)}`) // false
console.info('DOM Strings: .localeCompare()')
console.log(`localeCompare(A, B), ${localeCompare(A, B)}`) // true
console.log(`localeCompare(B, A), ${localeCompare(B, A)}`) // true
console.log(`localeCompare(A, C), ${localeCompare(A, C)}`) // false
console.log(`localeCompare(B, C), ${localeCompare(A, C)}`) // false
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment