Last active
March 6, 2024 23:45
-
-
Save enepomnyaschih/72c423f727d395eeaa09697058238727 to your computer and use it in GitHub Desktop.
https://www.npmjs.com/package/byte-base64 - Encode JS Uint8Array, simple array of bytes or native JS string to base64 and back
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
MIT License | |
Copyright (c) 2020 Egor Nepomnyaschih | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in all | |
copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
SOFTWARE. | |
*/ | |
/* | |
// This constant can also be computed with the following algorithm: | |
const base64abc = [], | |
A = "A".charCodeAt(0), | |
a = "a".charCodeAt(0), | |
n = "0".charCodeAt(0); | |
for (let i = 0; i < 26; ++i) { | |
base64abc.push(String.fromCharCode(A + i)); | |
} | |
for (let i = 0; i < 26; ++i) { | |
base64abc.push(String.fromCharCode(a + i)); | |
} | |
for (let i = 0; i < 10; ++i) { | |
base64abc.push(String.fromCharCode(n + i)); | |
} | |
base64abc.push("+"); | |
base64abc.push("/"); | |
*/ | |
const base64abc = [ | |
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", | |
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", | |
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", | |
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", | |
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "/" | |
]; | |
/* | |
// This constant can also be computed with the following algorithm: | |
const l = 256, base64codes = new Uint8Array(l); | |
for (let i = 0; i < l; ++i) { | |
base64codes[i] = 255; // invalid character | |
} | |
base64abc.forEach((char, index) => { | |
base64codes[char.charCodeAt(0)] = index; | |
}); | |
base64codes["=".charCodeAt(0)] = 0; // ignored anyway, so we just need to prevent an error | |
*/ | |
const base64codes = [ | |
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | |
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, | |
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 0, 255, 255, | |
255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, | |
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255, | |
255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, | |
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 | |
]; | |
function getBase64Code(charCode) { | |
if (charCode >= base64codes.length) { | |
throw new Error("Unable to parse base64 string."); | |
} | |
const code = base64codes[charCode]; | |
if (code === 255) { | |
throw new Error("Unable to parse base64 string."); | |
} | |
return code; | |
} | |
export function bytesToBase64(bytes) { | |
let result = '', i, l = bytes.length; | |
for (i = 2; i < l; i += 3) { | |
result += base64abc[bytes[i - 2] >> 2]; | |
result += base64abc[((bytes[i - 2] & 0x03) << 4) | (bytes[i - 1] >> 4)]; | |
result += base64abc[((bytes[i - 1] & 0x0F) << 2) | (bytes[i] >> 6)]; | |
result += base64abc[bytes[i] & 0x3F]; | |
} | |
if (i === l + 1) { // 1 octet yet to write | |
result += base64abc[bytes[i - 2] >> 2]; | |
result += base64abc[(bytes[i - 2] & 0x03) << 4]; | |
result += "=="; | |
} | |
if (i === l) { // 2 octets yet to write | |
result += base64abc[bytes[i - 2] >> 2]; | |
result += base64abc[((bytes[i - 2] & 0x03) << 4) | (bytes[i - 1] >> 4)]; | |
result += base64abc[(bytes[i - 1] & 0x0F) << 2]; | |
result += "="; | |
} | |
return result; | |
} | |
export function base64ToBytes(str) { | |
if (str.length % 4 !== 0) { | |
throw new Error("Unable to parse base64 string."); | |
} | |
const index = str.indexOf("="); | |
if (index !== -1 && index < str.length - 2) { | |
throw new Error("Unable to parse base64 string."); | |
} | |
let missingOctets = str.endsWith("==") ? 2 : str.endsWith("=") ? 1 : 0, | |
n = str.length, | |
result = new Uint8Array(3 * (n / 4)), | |
buffer; | |
for (let i = 0, j = 0; i < n; i += 4, j += 3) { | |
buffer = | |
getBase64Code(str.charCodeAt(i)) << 18 | | |
getBase64Code(str.charCodeAt(i + 1)) << 12 | | |
getBase64Code(str.charCodeAt(i + 2)) << 6 | | |
getBase64Code(str.charCodeAt(i + 3)); | |
result[j] = buffer >> 16; | |
result[j + 1] = (buffer >> 8) & 0xFF; | |
result[j + 2] = buffer & 0xFF; | |
} | |
return result.subarray(0, result.length - missingOctets); | |
} | |
export function base64encode(str, encoder = new TextEncoder()) { | |
return bytesToBase64(encoder.encode(str)); | |
} | |
export function base64decode(str, decoder = new TextDecoder()) { | |
return decoder.decode(base64ToBytes(str)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I think it's important to understand the differences and limitations of this implementation vs
btoa
andatob
.The first comment gives an example that
btoa
cannot handle.However, some may be misled to believe that this is a superior implementation or a drop-in replacement for
btoa
/atob
. In reality, they are just two different implementations of Base64 encoding/decoding with different features and limitations.The NPM page for this implementation states:
However, this isn't entirely true as explained below.
Input String Handling
The first main difference is how these two implementations handle input strings. Given the same string, each implementation may encode it differently.
Which one is correct? Well, they both are. The difference is that
base64encode
treats strings as text and encodes them to UTF-8 before encoding that UTF-8 to Base64. However,btoa
expects a binary string, which is a sequence of UTF-16 code units in the range of0x00
and0xFF
inclusive, essentially bytes, which it then encodes to Base64. The string"aeiou"
as a binary string should really be thought of as"\x61\x65\x69\x6F\x75"
rather than letters.Take the charcter
a
for example. It has the Unicode code point value0x0061
. Since this value is within the byte range,btoa
will treat it as the byte0x61
. In UTF-8,a
is also represented as the single byte0x61
, so bothbtoa
andbase64encode
encode it the same. Note that you could also represent the string"a"
as"\x61"
to illustrate this point.However, the character
ä
has the Unicode code point value0x00E4
. Since this value is within the byte range,btoa
will treat it as the byte0xE4
. However, in UTF-8ä
is represented as two bytes0xC3
,0xA4
, sobase64encode
encodes those two bytes instead. Even though the identical strings"ä"
and"\xE4"
are equivalent for the purpose ofbtoa
, they should not be thought of as the same when usingbase64encode
.This doesn't mean that
base64encode
is incapable of encoding the byte0xE4
. It just means that you can't pass it the string"\xE4"
and expect it to encode the byte0xE4
to Base64. Instead, you have to usebytesToBase64
and pass it the array[0xE4]
or an equivalentUint8Array
.Since
atob
(the reverse ofbtoa
) returns a binary string, you can't reliably pass that string tobase64encode
.None of this is to say that either
btoa
orbase64encode
is more correct. You just have to know what strings they each expect to receive. Binary in the case ofbtoa
and text in the case ofbase64encode
.The critical takeaway is not that
btoa
can only handle ASCII characters (technically it can handle ASCII and Latin-1 Supplement characters). It's thatbtoa
does not expect you to give it text. It expects a string of bytes.Base64 Decoding
The second main difference is that
atob
implements a "forgiving-base64 decode" algorithm whilebase64decode
andbase64ToBytes
do not.The forgiving-base64 decode algorithm:
/[\t\n\f\r ]/
).=
characters.Alternatives
You don't need a library to handle Base64 encoding/decoding. You can use
btoa
andatob
. Here are a couple of short alternatives that usebtoa
andatob
under the hood. And as a bonus, you get the forgiving-base64 decode algorithm.You can find a TypeScript version of this alternative including base64url support in both JS and TS in this gist.