-
-
Save lahmatiy/e124293c2f2b98e847d0f2bb54c2738e to your computer and use it in GitHub Desktop.
CSSO speed up talk – tokenizer optimisation example tests
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const TAB = 9; | |
const N = 10; | |
const F = 12; | |
const R = 13; | |
const SPACE = 32; | |
const DOUBLE_QUOTE = 34; | |
const QUOTE = 39; | |
const STAR = 42; | |
const SLASH = 47; | |
const ZERO = 48; | |
const NINE = 57; | |
var PUNCTUATION_CHAR = { | |
'\t': 'Tab', // '\t' | |
'\n': 'Newline', // '\n' | |
'\r': 'Newline', // '\r' | |
' ': 'Space', // ' ' | |
'!': 'ExclamationMark', // '!' | |
'"': 'QuotationMark', // '"' | |
'#': 'NumberSign', // '#' | |
'$': 'DollarSign', // '$' | |
'%': 'PercentSign', // '%' | |
'&': 'Ampersand', // '&' | |
'\'': 'Apostrophe', // '\'' | |
'(': 'LeftParenthesis', // '(' | |
')': 'RightParenthesis', // ')' | |
'*': 'Asterisk', // '*' | |
'+': 'PlusSign', // '+' | |
',': 'Comma', // ',' | |
'-': 'HyphenMinus', // '-' | |
'.': 'FullStop', // '.' | |
'/': 'Solidus', // '/' | |
':': 'Colon', // ':' | |
';': 'Semicolon', // ';' | |
'<': 'LessThanSign', // '<' | |
'=': 'EqualsSign', // '=' | |
'>': 'GreaterThanSign', // '>' | |
'?': 'QuestionMark', // '?' | |
'@': 'CommercialAt', // '@' | |
'[': 'LeftSquareBracket', // '[' | |
']': 'RightSquareBracket', // ']' | |
'^': 'CircumflexAccent', // '^' | |
'_': 'LowLine', // '_' | |
'{': 'LeftCurlyBracket', // '{' | |
'|': 'VerticalLine', // '|' | |
'}': 'RightCurlyBracket', // '}' | |
'~': 'Tilde' // '~' | |
}; | |
var PUNCTUATION = { | |
9: 'Tab', // '\t' | |
10: 'Newline', // '\n' | |
13: 'Newline', // '\r' | |
32: 'Space', // ' ' | |
33: 'ExclamationMark', // '!' | |
34: 'QuotationMark', // '"' | |
35: 'NumberSign', // '#' | |
36: 'DollarSign', // '$' | |
37: 'PercentSign', // '%' | |
38: 'Ampersand', // '&' | |
39: 'Apostrophe', // '\'' | |
40: 'LeftParenthesis', // '(' | |
41: 'RightParenthesis', // ')' | |
42: 'Asterisk', // '*' | |
43: 'PlusSign', // '+' | |
44: 'Comma', // ',' | |
45: 'HyphenMinus', // '-' | |
46: 'FullStop', // '.' | |
47: 'Solidus', // '/' | |
58: 'Colon', // ':' | |
59: 'Semicolon', // ';' | |
60: 'LessThanSign', // '<' | |
61: 'EqualsSign', // '=' | |
62: 'GreaterThanSign', // '>' | |
63: 'QuestionMark', // '?' | |
64: 'CommercialAt', // '@' | |
91: 'LeftSquareBracket', // '[' | |
93: 'RightSquareBracket', // ']' | |
94: 'CircumflexAccent', // '^' | |
95: 'LowLine', // '_' | |
123: 'LeftCurlyBracket', // '{' | |
124: 'VerticalLine', // '|' | |
125: 'RightCurlyBracket', // '}' | |
126: 'Tilde' // '~' | |
}; | |
var CATEGORY_LENGTH = Math.max.apply(null, Object.keys(PUNCTUATION)) + 1; | |
var CATEGORY = new Uint32Array(CATEGORY_LENGTH); | |
for (var i = 0; i <= CATEGORY_LENGTH; i++) { | |
CATEGORY[i] = 0; | |
} | |
// fill categories | |
Object.keys(PUNCTUATION).forEach(function(key) { | |
CATEGORY[Number(key)] = 6; | |
}, CATEGORY); | |
var NUMBER = 2; | |
for (var i = 48; i <= 57; i++) { | |
CATEGORY[i] = NUMBER; | |
} | |
var WHITESPACE = 4; | |
CATEGORY[SPACE] = WHITESPACE; | |
CATEGORY[TAB] = WHITESPACE; | |
CATEGORY[N] = WHITESPACE; | |
CATEGORY[R] = WHITESPACE; | |
CATEGORY[F] = WHITESPACE; | |
var STRING = 3; | |
CATEGORY[QUOTE] = STRING; | |
CATEGORY[DOUBLE_QUOTE] = STRING; | |
function test(str) { | |
var result = []; | |
for (var i = 0; i < str.length; i++) { | |
var ch = str.charAt(i); | |
var chNext = str.charAt(i + 1); | |
if (ch === '/' && chNext === '*') { | |
result.push(1); | |
i++; | |
} else if (ch >= '0' && ch <= '9') { | |
result.push(2); | |
} else if (ch === '"' || ch === '\'') { | |
result.push(3); | |
} else if (ch === ' ' || ch === '\n' || ch === '\r' || ch === '\t' || ch === '\f') { | |
result.push(4); | |
} else if (ch in PUNCTUATION_CHAR) { | |
result.push(PUNCTUATION_CHAR[ch]); | |
} else { | |
result.push(5); | |
} | |
} | |
return result; | |
} | |
function test2(str) { | |
var result = []; | |
for (var i = 0; i < str.length; i++) { | |
var code = str.charCodeAt(i); | |
var codeNext = str.charCodeAt(i + 1); | |
if (code === SLASH && codeNext === STAR) { | |
result.push(1); | |
i++; | |
} else if (code >= ZERO && code <= NINE) { | |
result.push(2); | |
} else if (code === DOUBLE_QUOTE || code === QUOTE) { | |
result.push(3); | |
} else if (code === SPACE || code === N || code === R || code === TAB || code === F) { | |
result.push(4); | |
} else if (code in PUNCTUATION) { | |
result.push(PUNCTUATION[code]); | |
} else { | |
result.push(5); | |
} | |
} | |
return result; | |
} | |
function test3(str) { | |
var result = []; | |
for (var i = 0; i < str.length; i++) { | |
var code = str.charCodeAt(i); | |
if (code === SLASH) { | |
var codeNext = str.charCodeAt(i + 1); | |
if (codeNext === STAR) { | |
result.push(1); | |
i++; | |
continue; | |
} | |
} | |
if (code >= ZERO && code <= NINE) { | |
result.push(2); | |
} else if (code === DOUBLE_QUOTE || code === QUOTE) { | |
result.push(3); | |
} else if (code === SPACE || code === N || code === R || code === TAB || code === F) { | |
result.push(4); | |
} else if (code in PUNCTUATION) { | |
result.push(PUNCTUATION[code]); | |
} else { | |
result.push(5); | |
} | |
} | |
return result; | |
} | |
function test4(str) { | |
var result = []; | |
for (var i = 0; i < str.length; i++) { | |
var code = str.charCodeAt(i); | |
if (code === SLASH) { | |
var codeNext = i + 1 < str.length ? str.charCodeAt(i + 1) : 0; | |
if (codeNext === STAR) { | |
result.push(1); | |
i++; | |
continue; | |
} | |
} | |
if (code >= ZERO && code <= NINE) { | |
result.push(2); | |
} else if (code === DOUBLE_QUOTE || code === QUOTE) { | |
result.push(3); | |
} else if (code === SPACE || code === N || code === R || code === TAB || code === F) { | |
result.push(4); | |
} else if (code in PUNCTUATION) { | |
result.push(PUNCTUATION[code]); | |
} else { | |
result.push(5); | |
} | |
} | |
return result; | |
} | |
function test5(str) { | |
var result = []; | |
for (var i = 0; i < str.length; i++) { | |
var code = str.charCodeAt(i); | |
switch (code < CATEGORY_LENGTH ? CATEGORY[code] : 0) { | |
case NUMBER: | |
result.push(2); | |
break; | |
case STRING: | |
result.push(3); | |
break; | |
case WHITESPACE: | |
result.push(4); | |
break; | |
case 6: | |
if (code === SLASH) { | |
var codeNext = i + 1 < str.length ? str.charCodeAt(i + 1) : 0; | |
if (codeNext === STAR) { | |
result.push(1); | |
i++; | |
continue; | |
} | |
} | |
result.push(PUNCTUATION[code]); | |
break; | |
default: | |
result.push(5); | |
} | |
} | |
return result; | |
} | |
var fn = [test, test2, test3, test4, test5]; | |
var string = [ | |
'a{}sd "sf"', | |
'j5yw/*rf8380 u~0u \n\r s ', | |
'""""sdf/348', | |
'\'asda 8 8878 87 87d\'', | |
'3n3b oy893yr89 yq398ry130 y4/' | |
].join(''); | |
for (var f = 0; f < fn.length; f++) { | |
var testFn = fn[f]; | |
console.log(f + 1); | |
console.log(' ', testFn(string).join(', ')); | |
} | |
for (var f = 0; f < fn.length; f++) { | |
var testFn = fn[f]; | |
var time = process.hrtime(); | |
for (var i = 0; i < 10000; i++) { | |
testFn(string); | |
} | |
var timeDiff = process.hrtime(time); | |
console.log(f + 1, parseInt(timeDiff[0] * 1e3 + timeDiff[1] / 1e6, 10)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment