Skip to content

Instantly share code, notes, and snippets.

@leidegre
Created June 15, 2016 08:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leidegre/8c117f7302d97ef2d05f5782d65bc09a to your computer and use it in GitHub Desktop.
Save leidegre/8c117f7302d97ef2d05f5782d65bc09a to your computer and use it in GitHub Desktop.
Figure out what casing various Unicode code points are mapped to by javascript
// This program only mapps the Latin-1 and Latin-1 Supplement code range but can be extended to support anything
function mkInterval(a) {
return [a[0], a[1], a[1] - a[0]]
}
function adj(a, b) {
return (a[0] + 1 == b[0])
&& (a[1] + 1 == b[1])
&& (a[2] == b[2])
}
let map = []
for (let i = 0x20; i <= 0xFF; i++) { // basic latin and latin-1 supplement
const a = String.fromCharCode(i)
const b = a.toLowerCase().charCodeAt(0)
if (i !== b) {
map.push(mkInterval([i, b]))
}
}
// Find all continuous intervals
const cont = []
let a = map.shift()
cont.push(a)
for (; ;) {
const b = map.shift()
if (!b) {
break
}
if (adj(a, b)) {
const c = cont.pop()
c[1] = b[0]
cont.push(c)
} else {
cont.push(b)
}
a = b
}
//DEBUG
// cont.forEach((c) => {
// const xs = []
// for (let i = c[0]; i < c[1]; i++) {
// xs.push(`${String.fromCharCode(i)} -> ${String.fromCharCode(i + c[2])}`)
// }
// console.log(xs.join(', '))
// })
console.log(cont)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment