ASCII - !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~��������������������������������� ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
It looks like Github auto ids
- Proceed top to bottom and use
sprintf("%s-%d", id, n)
whenid
has been previously auto-assigned wheren
is the smallest integer >= 1 such that that expression has not been previously seen. - Codepoints < '0' are dropped except for spaces and
-
- Punctuation and non-graphical codepoints >= '0' are dropped.
- Adjacent
-
s are not collapsed. - HTML character references are decoded and encoded letters are not replaced with
-
. {#...}
extension syntax is neither recognized nor exempted.- IDs may start with ASCII numerals.
- Non-ASCII letters are not lower cased.
- No Unicode normalization is done. See below.
// Run in dev console to see what the auto-id assigner does to Normalization examples from unicode.org.
let ids = Array.from(document.querySelectorAll('*[id]')).map(x => x.id);
let idToIdentityNormalForms = {};
let normalForms = [ ['identity', x => x], ['NFC', x => x.normalize('NFC')], ['NFD', x => x.normalize('NFD')], ['NFKC', x => x.normalize('NFKC')], ['NFKD', x => x.normalize('NFKD')] ];
for (let id of ids) {
let matches = [];
for (let nf of normalForms) {
if (nf[1](id) === id) { matches.push(nf[0]) }
}
idToIdentityNormalForms[id] = matches;
}
console.log(JSON.stringify(idToIdentityNormalForms, null, 2));
/*
produces
...
"user-content-nfc-Å---Å": [
"identity"
],
"user-content-nfkc-Äffin---Äffin": [
"identity",
"NFC"
],
"user-content-nfd-Å---å": [
"identity"
],
"user-content-nfkd-Äffin---äffin": [
"identity"
],
...
*/