Skip to content

Instantly share code, notes, and snippets.

@spion
Created May 20, 2020 02:09
Show Gist options
  • Save spion/dc7458287758421c99a19df569d4f45a to your computer and use it in GitHub Desktop.
Save spion/dc7458287758421c99a19df569d4f45a to your computer and use it in GitHub Desktop.
function inRange(a: number, min: number, max: number) {
return min <= a && a <= max;
}
class UTF8Encoder {
bytes = new Uint8Array(4).fill(0)
len = 0;
toBytes(codePoint: number):void {
if (inRange(codePoint, 0x00, 0x7f)) {
this.len = 1;
this.bytes[0] = codePoint;
return;
}
let offset: number = 0;
if (inRange(codePoint, 0x0080, 0x07ff)) {
this.len = 2;
offset = 0xc0;
} else if (inRange(codePoint, 0x0800, 0xffff)) {
this.len = 3;
offset = 0xe0;
} else if (inRange(codePoint, 0x10000, 0x10ffff)) {
this.len = 4;
offset = 0xf0;
} else {
throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`);
}
let count = this.len - 1;
this.bytes[0] = (codePoint >> (6 * count)) + offset;
while (count > 0) {
const temp = codePoint >> (6 * (count - 1));
this.bytes[this.len - count] = 0x80 | (temp & 0x3f)
count--;
}
}
}
class CPIterator {
constructor(private s: string) {}
currentValue = {value: 0, done: false}
offset = 0
next() {
if (this.offset < this.s.length) {
let cP = this.currentValue.value = this.s.codePointAt(this.offset)!;
if (cP >= 0x10000) { this.offset += 2 }
else this.offset += 1;
return this.currentValue;
} else {
this.currentValue.done = true;
return this.currentValue;
}
}
}
class CPIteratorWrapper {
constructor(private iter: CPIterator) {}
[Symbol.iterator]() { return this.iter; }
}
function codepointIterator(s: string) {
let cpIterator = new CPIterator(s);
return new CPIteratorWrapper(cpIterator);
}
function autoGrow(array: Uint8Array) {
let replacement = new Uint8Array(4 + (array.length * 1.5) | 0);
replacement.set(array);
return replacement;
}
function copyTo(array:Uint8Array, offset: number, bytes: Uint8Array, len: number) {
for (let byte = 0; byte < len; ++byte) {
array[offset + byte] = bytes[byte];
}
}
export class TextEncoder {
/** Returns "utf-8". */
readonly encoding = "utf-8";
/** Returns the result of running UTF-8's encoder. */
encode(input = ""): Uint8Array {
const encoder = new UTF8Encoder();
// fast path: assume we only need input.length bytes
// works if no characters are outside ascii
let array = new Uint8Array(input.length);
let offset = 0;
for (let cP of codepointIterator(input)) {
encoder.toBytes(cP);
if (offset + encoder.len > array.length) {
array = autoGrow(array);
}
copyTo(array, offset, encoder.bytes, encoder.len);
offset += encoder.len;
}
return array.slice(0, offset);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment