public
Created

testing namespace collisions for 2-byte abbreviated keys for the document object

  • Download Gist
README.md
Markdown

experimenting with a more general way of hashing a namespace, for future javascript golf. the idea is that existing browser objects can be hashed so that wasteful strings like createDocumentFragment can be avoided entirely, and keys from several objects (such as document, window, String, etc.) can be hashed into one namespace.

this specific approach takes every key on the document, and turns it into a number that is repeatedly modded by 3456 (54 * 64), which is the maximum number of items representable with a two-byte javascript identifier:

/[a-z_$][0-9a-z_$]/i

this yields decent results. in an empty chrome page, there were only 3 collisions. the idea is that the upper bound can be lowered as necessary, until the collisions are acceptable.

namespaceHash.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12
var namespace = function(a,b,c,d,e) {
for(c in document){
for(d=e=c.length;d;e%=3456)e+=d*c.charCodeAt(--d)
e = a.slice(10)[e>>6] + a[e%64]
 
b[e] || (b[e] = [])
b[e].push(c)
}
 
return b
 
}("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$",{})
results.json
JSON
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
{
"Dp": ["attribution-img"],
"IP": ["images"],
"jS": ["uid"],
"pF": ["head"],
"k0": ["all", "createNodeIterator"],
"W2": ["doctype"],
"f6": ["nodeType"],
"uk": ["linkColor"],
"rk": ["body"],
"IB": ["embeds"],
"hw": ["URL"],
"PY": ["parentElement"],
"Ug": ["bgColor"],
"bO": ["styleSheets"],
"qc": ["localName"],
"RZ": ["ownerDocument"],
"Ai": ["forms"],
"h3": ["referrer"],
"kY": ["defaultCharset"],
"sO": ["nodeValue"],
"S$": ["documentURI"],
"Jf": ["height"],
"Hl": ["designMode"],
"K2": ["readyState"],
"qV": ["lastModified"],
"ez": ["webkitVisibilityState"],
"LJ": ["preferredStylesheetSet"],
"J_": ["prefix"],
"zi": ["width", "close"],
"$0": ["xmlEncoding"],
"sY": ["characterSet"],
"W9": ["anchors"],
"K9": ["previousSibling"],
"Wg": ["plugins"],
"Uk": ["fgColor"],
"g6": ["namespaceURI", "createTextNode"],
"Pl": ["activeElement"],
"ry": ["lastChild"],
"Pa": ["xmlStandalone"],
"dx": ["textContent"],
"a8": ["nextSibling"],
"IQ": ["domain"],
"Wu": ["applets"],
"VK": ["charset"],
"c3": ["nodeName"],
"IH": ["cookie"],
"IA": ["childNodes"],
"MI": ["baseURI"],
"OD": ["inputEncoding"],
"pU": ["implementation"],
"HU": ["compatMode"],
"zU": ["links"],
"zg": ["title"],
"Ie": ["firstChild"],
"OK": ["attributes"],
"bs": ["defaultView"],
"KU": ["vlinkColor"],
"MZ": ["xmlVersion"],
"aH": ["selectedStylesheetSet"],
"Kz": ["alinkColor"],
"Ik": ["parentNode"],
"qf": ["webkitHidden"],
"gO": ["location"],
"X7": ["scripts"],
"JU": ["documentElement"],
"kf": ["dir"],
"qW": ["open"],
"zA": ["write"],
"VM": ["writeln"],
"yJ": ["clear"],
"Tb": ["captureEvents"],
"QY": ["releaseEvents"],
"fH": ["hasFocus"],
"Ou": ["createElement"],
"Je": ["createDocumentFragment"],
"PM": ["createComment"],
"NB": ["createCDATASection"],
"PG": ["createProcessingInstruction"],
"LF": ["createAttribute"],
"YL": ["createEntityReference"],
"co": ["getElementsByTagName"],
"x1": ["createElementNS"],
"ze": ["createAttributeNS"],
"cy": ["getElementsByTagNameNS"],
"$x": ["getElementById"],
"af": ["createEvent"],
"YE": ["createRange"],
"gf": ["evaluate"],
"ZE": ["execCommand"],
"Dk": ["queryCommandEnabled"],
"rY": ["queryCommandIndeterm"],
"F$": ["queryCommandState"],
"gk": ["queryCommandSupported"],
"Fh": ["queryCommandValue"],
"zr": ["getElementsByName"],
"hP": ["elementFromPoint"],
"Lu": ["caretRangeFromPoint"],
"vs": ["getSelection"],
"Mf": ["getCSSCanvasContext"],
"uI": ["getElementsByClassName"],
"SS": ["querySelector"],
"dr": ["querySelectorAll"],
"Jr": ["importNode"],
"sd": ["adoptNode"],
"cN": ["createTreeWalker"],
"iJ": ["getOverrideStyle"],
"nA": ["createExpression"],
"fq": ["createNSResolver"],
"sQ": ["insertBefore"],
"nM": ["replaceChild"],
"Z0": ["removeChild"],
"XU": ["appendChild"],
"Kx": ["hasChildNodes"],
"rc": ["cloneNode"],
"w8": ["normalize"],
"dy": ["isSupported"],
"U9": ["hasAttributes"],
"wA": ["lookupPrefix"],
"eT": ["isDefaultNamespace"],
"Q6": ["lookupNamespaceURI"],
"iu": ["addEventListener"],
"Sf": ["removeEventListener"],
"FB": ["isSameNode"],
"Y6": ["isEqualNode"],
"C5": ["compareDocumentPosition"],
"Pq": ["dispatchEvent"],
"Np": ["ELEMENT_NODE"],
"si": ["ATTRIBUTE_NODE"],
"aA": ["TEXT_NODE"],
"Ru": ["CDATA_SECTION_NODE"],
"ek": ["ENTITY_REFERENCE_NODE"],
"AR": ["ENTITY_NODE"],
"EG": ["PROCESSING_INSTRUCTION_NODE"],
"NR": ["COMMENT_NODE"],
"bC": ["DOCUMENT_NODE"],
"WT": ["DOCUMENT_TYPE_NODE"],
"Ih": ["DOCUMENT_FRAGMENT_NODE"],
"bB": ["NOTATION_NODE"],
"o2": ["DOCUMENT_POSITION_DISCONNECTED"],
"tX": ["DOCUMENT_POSITION_PRECEDING"],
"G8": ["DOCUMENT_POSITION_FOLLOWING"],
"_o": ["DOCUMENT_POSITION_CONTAINS"],
"AE": ["DOCUMENT_POSITION_CONTAINED_BY"],
"dq": ["DOCUMENT_POSITION_IMPLEMENTATION_SPECIFIC"]
}
// while this is more efficient:
b[e]||(b[e]=[])
// this is shorter:
b[e]=b[e]||[]

sure sure, and i can modify the base64 code to get rid of the map. the question is whether the approach (not the implementation) is viable at all, or if there's a better way to do it.

This approach is definitely viable. No doubt about it.
And as a general purpose hashing technique, this is pretty good: Fast, simple and quite compact already. Of course the implementation can be shortened.

Better in which way ? It depends on the use case. For a js1k prod, I would go for something more specific and therefore more compact.

Also, I believe the shortened example would actually be shortened to ( if the hashing is also done on the body AKA rk )

d.rk.XU(d.Ou("script")).src="URL"

then we would have to hash on every node. would make more sense to use the strings for any DOM node, even though it takes one more character.

i'm also thinking that .toString(36) may be a better way to generate keys as long as collisions aren't a problem, since it requires much less code.

We could hash the methods/properties name of HTMLElement under one namespace, making the example:

d.rk[N.XU](d.Ou("script")).src="URL" // N holds the hashed keys of HTMLElement

However, I'm not sure what is the use case for this: a new Uglify JS ?

Alas, .toString(36) has a pretty high chance of producing invalid identifiers ( starting by a digit ), but who knows, we might get lucky.

well, i think we could avoid invalid identifiers by limiting the integers to everything greater than '9z'.

(and i'm considering this for my own js1k project based on 140byt.es snippets.)

okay, so a big flaw in this approach is that in a significantly saturated namespace, there are bound to be keywords that overlap with existing javascript keywords like do and in. blarg.

How likely is that to happen ? Surely it's possible to fiddle with the formula e+=d*c.charCodeAt(--d) formula, or if that's really only these 2 keys ( 'do' and 'in' ), we could skip them altogether:

for(c in document){
    for(d=e=c.length;d;e%=3454)e+=d*c.charCodeAt(--d)

    e+=25711<e;  // skip the 'do' key
    e+=26990<e;  // skip the 'in' key

    e = a.slice(10)[e>>6] + a[e%64]

    b[e] || (b[e] = [])
    b[e].push(c)
  }

  return b

}("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_$",{})

it's happening in the code i'm using, giving me errors like Uncaught SyntaxError: Unexpected token if. i guess i'll try fiddling the modulus until it works.

e+=26982<e // skip the "if" keyword :p
Actually, if you wanna go the "skip keywords" road, the modulo should be applied after the "skips", or the "skips" should be applied inside the loop computing e which would allow us to keep the modulo at 3456

by the way, i've rolled these ideas into namedrop.

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.