Created
May 20, 2020 02:09
-
-
Save spion/dc7458287758421c99a19df569d4f45a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function inRange(a: number, min: number, max: number) { | |
return min <= a && a <= max; | |
} | |
class UTF8Encoder { | |
bytes = new Uint8Array(4).fill(0) | |
len = 0; | |
toBytes(codePoint: number):void { | |
if (inRange(codePoint, 0x00, 0x7f)) { | |
this.len = 1; | |
this.bytes[0] = codePoint; | |
return; | |
} | |
let offset: number = 0; | |
if (inRange(codePoint, 0x0080, 0x07ff)) { | |
this.len = 2; | |
offset = 0xc0; | |
} else if (inRange(codePoint, 0x0800, 0xffff)) { | |
this.len = 3; | |
offset = 0xe0; | |
} else if (inRange(codePoint, 0x10000, 0x10ffff)) { | |
this.len = 4; | |
offset = 0xf0; | |
} else { | |
throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`); | |
} | |
let count = this.len - 1; | |
this.bytes[0] = (codePoint >> (6 * count)) + offset; | |
while (count > 0) { | |
const temp = codePoint >> (6 * (count - 1)); | |
this.bytes[this.len - count] = 0x80 | (temp & 0x3f) | |
count--; | |
} | |
} | |
} | |
class CPIterator { | |
constructor(private s: string) {} | |
currentValue = {value: 0, done: false} | |
offset = 0 | |
next() { | |
if (this.offset < this.s.length) { | |
let cP = this.currentValue.value = this.s.codePointAt(this.offset)!; | |
if (cP >= 0x10000) { this.offset += 2 } | |
else this.offset += 1; | |
return this.currentValue; | |
} else { | |
this.currentValue.done = true; | |
return this.currentValue; | |
} | |
} | |
} | |
class CPIteratorWrapper { | |
constructor(private iter: CPIterator) {} | |
[Symbol.iterator]() { return this.iter; } | |
} | |
function codepointIterator(s: string) { | |
let cpIterator = new CPIterator(s); | |
return new CPIteratorWrapper(cpIterator); | |
} | |
function autoGrow(array: Uint8Array) { | |
let replacement = new Uint8Array(4 + (array.length * 1.5) | 0); | |
replacement.set(array); | |
return replacement; | |
} | |
function copyTo(array:Uint8Array, offset: number, bytes: Uint8Array, len: number) { | |
for (let byte = 0; byte < len; ++byte) { | |
array[offset + byte] = bytes[byte]; | |
} | |
} | |
export class TextEncoder { | |
/** Returns "utf-8". */ | |
readonly encoding = "utf-8"; | |
/** Returns the result of running UTF-8's encoder. */ | |
encode(input = ""): Uint8Array { | |
const encoder = new UTF8Encoder(); | |
// fast path: assume we only need input.length bytes | |
// works if no characters are outside ascii | |
let array = new Uint8Array(input.length); | |
let offset = 0; | |
for (let cP of codepointIterator(input)) { | |
encoder.toBytes(cP); | |
if (offset + encoder.len > array.length) { | |
array = autoGrow(array); | |
} | |
copyTo(array, offset, encoder.bytes, encoder.len); | |
offset += encoder.len; | |
} | |
return array.slice(0, offset); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment