Skip to content

Instantly share code, notes, and snippets.

@rossj
Created May 6, 2018 17:36
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rossj/6e31df6933636c2c7fb1bcc6ac97c72e to your computer and use it in GitHub Desktop.
Save rossj/6e31df6933636c2c7fb1bcc6ac97c72e to your computer and use it in GitHub Desktop.
Some Node utility functions for aggregating byte information about Buffers
const enum UTF8_STATUS {
ACCEPT = 0,
REJECT = 1
}
export interface MyBufAggs {
allNonNull: boolean;
allAsciiChar: boolean;
all7Bit: boolean;
validUtf8: boolean;
}
// From https://stackoverflow.com/questions/1031645/how-to-detect-utf-8-in-plain-c
const utf8d = Buffer.from([
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7f
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9f
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // a0..bf
8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0..df
0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // e0..ef
0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // f0..ff
0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2
1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4
1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6
1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // s7..s8
]);
export function checkStringBuf(b: Buffer): MyBufAggs {
let utf8State = UTF8_STATUS.ACCEPT;
let allNonNull = true;
let allAsciiChar = true;
let all7Bit = true;
const len = b.length;
for (let i = 0; i < len; ++i) {
const byte = b[i];
if (utf8State !== UTF8_STATUS.REJECT) {
// We don't care about the codepoint, so this is
// a simplified version of the decode function.
const type = utf8d[byte];
utf8State = utf8d[256 + utf8State * 16 + type];
}
if (byte === 0) {
allNonNull = false;
allAsciiChar = false;
} else if (byte >= 128) {
allAsciiChar = false;
all7Bit = false;
} else if (byte < 32 || byte >= 127) {
allAsciiChar = false;
}
}
return {
allNonNull,
allAsciiChar,
all7Bit,
validUtf8: utf8State === UTF8_STATUS.ACCEPT
};
}
export function checkStringBufs(bufs: Buffer[]): MyBufAggs {
const aggs: MyBufAggs = {
allNonNull: true,
allAsciiChar: true,
all7Bit: true,
validUtf8: true
};
for (const buf of bufs) {
const aggs2 = checkStringBuf(buf);
aggs.allNonNull = aggs.allNonNull && aggs2.allNonNull;
aggs.allAsciiChar = aggs.allAsciiChar && aggs2.allAsciiChar;
aggs.all7Bit = aggs.all7Bit && aggs2.all7Bit;
aggs.validUtf8 = aggs.validUtf8 && aggs2.validUtf8;
}
return aggs;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment