Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Javascript Base64 UTF8 for the Browser / Server. Base64 UTF-8 Encoding and Decoding Libraries / Modules for AMD, CommonJS, Nodejs and Browsers. Cross-browser compatible.
// UTF8 Module
//
// Cleaner and modularized utf-8 encoding and decoding library for javascript.
//
// copyright: MIT
// author: Nijiko Yonskai, @nijikokun, nijikokun@gmail.com
(function (name, definition, context, dependencies) {
if (typeof context['module'] !== 'undefined' && context['module']['exports']) { if (dependencies && context['require']) { for (var i = 0; i < dependencies.length; i++) context[dependencies[i]] = context['require'](dependencies[i]); } context['module']['exports'] = definition.apply(context); }
else if (typeof context['define'] !== 'undefined' && context['define'] === 'function' && context['define']['amd']) { define(name, (dependencies || []), definition); }
else { context[name] = definition.apply(context); }
})('utf8', function () {
return {
encode: function (string) {
if (typeof string !== 'string') return string;
else string = string.replace(/\r\n/g, "\n");
var output = "", i = 0, charCode;
for (i; i < string.length; i++) {
charCode = string.charCodeAt(i);
if (charCode < 128)
output += String.fromCharCode(charCode);
else if ((charCode > 127) && (charCode < 2048))
output += String.fromCharCode((charCode >> 6) | 192),
output += String.fromCharCode((charCode & 63) | 128);
else
output += String.fromCharCode((charCode >> 12) | 224),
output += String.fromCharCode(((charCode >> 6) & 63) | 128),
output += String.fromCharCode((charCode & 63) | 128);
}
return output;
},
decode: function (string) {
if (typeof string !== 'string') return string;
var output = "", i = 0, charCode = 0;
while (i < string.length) {
charCode = string.charCodeAt(i);
if (charCode < 128)
output += String.fromCharCode(charCode),
i++;
else if ((charCode > 191) && (charCode < 224))
output += String.fromCharCode(((charCode & 31) << 6) | (string.charCodeAt(i + 1) & 63)),
i += 2;
else
output += String.fromCharCode(((charCode & 15) << 12) | ((string.charCodeAt(i + 1) & 63) << 6) | (string.charCodeAt(i + 2) & 63)),
i += 3;
}
return output;
}
};
}, this);
// Base64 Module
//
// Cleaner, modularized and properly scoped base64 encoding and decoding module for strings.
//
// copyright: MIT
// author: Nijiko Yonskai, @nijikokun, nijikokun@gmail.com
(function (name, definition, context, dependencies) {
if (typeof context['module'] !== 'undefined' && context['module']['exports']) { if (dependencies && context['require']) { for (var i = 0; i < dependencies.length; i++) context[dependencies[i]] = context['require'](dependencies[i]); } context['module']['exports'] = definition.apply(context); }
else if (typeof context['define'] !== 'undefined' && context['define'] === 'function' && context['define']['amd']) { define(name, (dependencies || []), definition); }
else { context[name] = definition(); }
})('base64', function (utf8) {
var $this = this;
var $utf8 = utf8 || this.utf8;
var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
return {
encode: function (input) {
if (typeof $utf8 === 'undefined') throw { error: "MissingMethod", message: "UTF8 Module is missing." };
if (typeof input !== 'string') return input;
else input = $utf8.encode(input);
var output = "", a, b, c, d, e, f, g, i = 0;
while (i < input.length) {
a = input.charCodeAt(i++);
b = input.charCodeAt(i++);
c = input.charCodeAt(i++);
d = a >> 2;
e = ((a & 3) << 4) | (b >> 4);
f = ((b & 15) << 2) | (c >> 6);
g = c & 63;
if (isNaN(b)) f = g = 64;
else if (isNaN(c)) g = 64;
output += map.charAt(d) + map.charAt(e) + map.charAt(f) + map.charAt(g);
}
return output;
},
decode: function (input) {
if (typeof $utf8 === 'undefined') throw { error: "MissingMethod", message: "UTF8 Module is missing." };
if (typeof input !== 'string') return input;
else input = input.replace(/[^A-Za-z0-9\+\/\=]/g, "");
var output = "", a, b, c, d, e, f, g, i = 0;
while (i < input.length) {
d = map.indexOf(input.charAt(i++));
e = map.indexOf(input.charAt(i++));
f = map.indexOf(input.charAt(i++));
g = map.indexOf(input.charAt(i++));
a = (d << 2) | (e >> 4);
b = ((e & 15) << 4) | (f >> 2);
c = ((f & 3) << 6) | g;
output += String.fromCharCode(a);
if (f != 64) output += String.fromCharCode(b);
if (g != 64) output += String.fromCharCode(c);
}
return $utf8.decode(output);
}
}
}, this, [ "utf8" ]);
// Base64 Module
//
// Cleaner, modularized and properly scoped base64 encoding and decoding module for strings.
//
// copyright: MIT
// author: Nijiko Yonskai, @nijikokun, nijikokun@gmail.com
(function (name, definition, context, dependencies) {
if (typeof context['module'] !== 'undefined' && context['module']['exports']) { if (dependencies && context['require']) { for (var i = 0; i < dependencies.length; i++) context[dependencies[i]] = context['require'](dependencies[i]); } context['module']['exports'] = definition.apply(context); }
else if (typeof context['define'] !== 'undefined' && context['define'] === 'function' && context['define']['amd']) { define(name, (dependencies || []), definition); }
else { context[name] = definition(); }
})('base64', function (utf8) {
var $this = this;
var $utf8 = utf8 || this.utf8;
var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
return {
encode: function (input) {
if (typeof $utf8 === 'undefined') throw { error: "MissingMethod", message: "UTF8 Module is missing." };
if (typeof input !== 'string') return input;
else input = $utf8.encode(input);
var output = "", a, b, c, d, e, f, g, i = 0;
while (i < input.length) {
a = input.charCodeAt(i++);
b = input.charCodeAt(i++);
c = input.charCodeAt(i++);
d = a >> 2;
e = ((a & 3) << 4) | (b >> 4);
f = ((b & 15) << 2) | (c >> 6);
g = c & 63;
if (isNaN(b)) f = g = 64;
else if (isNaN(c)) g = 64;
output += map.charAt(d) + map.charAt(e) + map.charAt(f) + map.charAt(g);
}
return output;
},
decode: function (input) {
if (typeof $utf8 === 'undefined') throw { error: "MissingMethod", message: "UTF8 Module is missing." };
if (typeof input !== 'string') return input;
else input = input.replace(/[^A-Za-z0-9\+\/\=]/g, "");
var output = "", a, b, c, d, e, f, g, i = 0;
while (i < input.length) {
d = map.indexOf(input.charAt(i++));
e = map.indexOf(input.charAt(i++));
f = map.indexOf(input.charAt(i++));
g = map.indexOf(input.charAt(i++));
a = (d << 2) | (e >> 4);
b = ((e & 15) << 4) | (f >> 2);
c = ((f & 3) << 6) | g;
output += String.fromCharCode(a);
if (f != 64) output += String.fromCharCode(b);
if (g != 64) output += String.fromCharCode(c);
}
return $utf8.decode(output);
}
}
}, this, [ "utf8" ]);
// UTF8 Module
//
// Cleaner and modularized utf-8 encoding and decoding library for javascript.
//
// copyright: MIT
// author: Nijiko Yonskai, @nijikokun, nijikokun@gmail.com
(function (name, definition, context, dependencies) {
if (typeof context['module'] !== 'undefined' && context['module']['exports']) { if (dependencies && context['require']) { for (var i = 0; i < dependencies.length; i++) context[dependencies[i]] = context['require'](dependencies[i]); } context['module']['exports'] = definition.apply(context); }
else if (typeof context['define'] !== 'undefined' && context['define'] === 'function' && context['define']['amd']) { define(name, (dependencies || []), definition); }
else { context[name] = definition.apply(context); }
})('utf8', function () {
return {
encode: function (string) {
if (typeof string !== 'string') return string;
else string = string.replace(/\r\n/g, "\n");
var output = "", i = 0, charCode;
for (i; i < string.length; i++) {
charCode = string.charCodeAt(i);
if (charCode < 128)
output += String.fromCharCode(charCode);
else if ((charCode > 127) && (charCode < 2048))
output += String.fromCharCode((charCode >> 6) | 192),
output += String.fromCharCode((charCode & 63) | 128);
else
output += String.fromCharCode((charCode >> 12) | 224),
output += String.fromCharCode(((charCode >> 6) & 63) | 128),
output += String.fromCharCode((charCode & 63) | 128);
}
return output;
},
decode: function (string) {
if (typeof string !== 'string') return string;
var output = "", i = 0, charCode = 0;
while (i < string.length) {
charCode = string.charCodeAt(i);
if (charCode < 128)
output += String.fromCharCode(charCode),
i++;
else if ((charCode > 191) && (charCode < 224))
output += String.fromCharCode(((charCode & 31) << 6) | (string.charCodeAt(i + 1) & 63)),
i += 2;
else
output += String.fromCharCode(((charCode & 15) << 12) | ((string.charCodeAt(i + 1) & 63) << 6) | (string.charCodeAt(i + 2) & 63)),
i += 3;
}
return output;
}
};
}, this);
@tadaa9

This comment has been minimized.

Copy link

@tadaa9 tadaa9 commented May 18, 2013

There is an error in the "decode" function of "base64". You must use the "window.atob" function and not the "window.btoa" function.

@nijikokun

This comment has been minimized.

Copy link
Owner Author

@nijikokun nijikokun commented May 24, 2013

Thanks! I've updated it to use atob @tadaa9

@delbertooo

This comment has been minimized.

Copy link

@delbertooo delbertooo commented Jul 31, 2013

Thanks for that code. In some Browsers (i.e. my Chrome28 and FF22) the native btoa function has problems with multi byte characters (throws InvalidCharacterError).

So, wouldn't it be better to utf8 de/encode those strings, too? Something like:

encode: function (input) {
      if (typeof $utf8 === 'undefined') throw { error: "MissingMethod", message: "UTF8 Module is missing." };
      if (typeof input !== 'string') return input;
      else input = $utf8.encode(input);
      if (typeof $this.btoa !== 'undefined') return $this.btoa(input); // moved
      var output = "", a, b, c, d, e, f, g, i = 0;
      // ...

and

decode: function (input) {
      if (typeof $utf8 === 'undefined') throw { error: "MissingMethod", message: "UTF8 Module is missing." };
      if (typeof input !== 'string') return input;
      else input.replace(/[^A-Za-z0-9\+\/\=]/g, "");
      if (typeof $this.atob !== 'undefined') return $utf8.decode($this.atob(input)); // moved and decoded
      // ...

With these changes i get no problems with lines like:

base64.decode(base64.encode('Hello World 秦始皇帝'))
@nijikokun

This comment has been minimized.

Copy link
Owner Author

@nijikokun nijikokun commented Sep 27, 2013

@delbertooo Actually this makes more sense. Updating to reflect changes

@modusinternet

This comment has been minimized.

Copy link

@modusinternet modusinternet commented Nov 15, 2013

I'm having a lot of trouble getting this library to work because I can find no documentation or examples of how to integrate this library and I'm unsure if it's the content I'm attempting to decode or the way I'm attempting to use it. Could we get a link to a couple examples demonstrating it's use and any details concerning Content Delivery Network (CDN) access?

@nijikokun

This comment has been minimized.

Copy link
Owner Author

@nijikokun nijikokun commented Dec 9, 2013

@modusinternet simply copy the raw data from the first file on this gist (it includes both base64 and utf8)

then you simply base64.decode(base64.encode('Hello World 秦始皇帝'))

@vote539

This comment has been minimized.

Copy link

@vote539 vote539 commented Jun 25, 2014

Where you have context['define'] === 'function' on line 9, I think you meant to put typeof context['define'] === 'function'.

@vote539

This comment has been minimized.

Copy link

@vote539 vote539 commented Jun 25, 2014

Also, I'm not having much luck with the Gist. The following example seems to fail.

var data = "ZGlzcCgiSGVsbG8gV8OzcmxkIDMiKTsKJSBow6lsbMOzIHfDp3JsZAo=";
console.log(require("base64").decode(data));

/* * * Expected Value * * *\
disp("Hello Wórld 3");
% hélló wçrld
*/

/* * * Actual Value * * *\
disp("Hello Wórld 3");
% hélló wçrld
*/
@nijikokun

This comment has been minimized.

Copy link
Owner Author

@nijikokun nijikokun commented Sep 11, 2014

@vote539 I've removed atob and btoa as they are the culprit here, they do not do well with utf8 and I don't really like the workaround for it: https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_.22Unicode_Problem.22

@nijikokun

This comment has been minimized.

Copy link
Owner Author

@nijikokun nijikokun commented Sep 11, 2014

The code you provided with the updated gist works as expected now. Sorry for the late reply.

@friedemannsommer

This comment has been minimized.

Copy link

@friedemannsommer friedemannsommer commented Aug 26, 2016

@nijikokun you should replace your "input.replace" (base64.module.js#L44) with following code. Strings are immutable in JavaScript.

https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace
http://stackoverflow.com/a/4717855

// @component base64.module.js
input = input.replace(/[^A-Za-z0-9\+\/\=]/g, "");
@nijikokun

This comment has been minimized.

Copy link
Owner Author

@nijikokun nijikokun commented Nov 11, 2016

@friedemannsommer you are correct, gist has been updated 👍 nice catch

@shubo

This comment has been minimized.

Copy link

@shubo shubo commented Feb 21, 2017

@uetkaje

This comment has been minimized.

Copy link

@uetkaje uetkaje commented Jul 24, 2018

On line 67, I think it should be similar like UTF8 module.

context[name] = definition();

should change with

context[name] = definition.apply(context);
@rmunn

This comment has been minimized.

Copy link

@rmunn rmunn commented Aug 13, 2020

This fails for emojis like 📚 (U+1F4DA BOOKS) which are represented by a surrogate pair (0xD83D 0xDCDA, in the case of 📚) in Javascript's UTF-16 encoding. It will encode U+1F4DA as if it were two codepoints U+D83D and U+DCDA, which aren't actually valid Unicode codepoints, and produce the UTF-8 encoding "\xed\xa0\xbd\xed\xb3\x9a", which is invalid UTF-8 and will either throw an error (on strict UTF-8 parsers) or turn into either two or six � characters, depending on the parser. The correct UTF-8 encoding of U+1F4DA is "\xf0\x9f\x93\x9a", which will be encoded and decoded correctly by the code below:

(function (name, definition, context, dependencies) {
  if (typeof context['module'] !== 'undefined' && context['module']['exports']) { if (dependencies && context['require']) { for (var i = 0; i < dependencies.length; i++) context[dependencies[i]] = context['require'](dependencies[i]); } context['module']['exports'] = definition.apply(context); }
  else if (typeof context['define'] !== 'undefined' && context['define'] === 'function' && context['define']['amd']) { define(name, (dependencies || []), definition); }
  else { context[name] = definition.apply(context); }
})('utf8', function () {
  return {
    encode: function (string) {
      if (typeof string !== 'string') return string;
      else string = string.replace(/\r\n/g, "\n");
      var output = "", i = 0, charCode;

      for (i; i < string.length; i++) {
        charCode = string.charCodeAt(i);

        if (charCode < 128) {
          output += String.fromCharCode(charCode);
        } else if ((charCode > 127) && (charCode < 2048)) {
          output += String.fromCharCode((charCode >> 6) | 192);
          output += String.fromCharCode((charCode & 63) | 128);
        } else if ((charCode > 55295) && (charCode < 57344) && string.length > i+1) {
          // Surrogate pair
          var hiSurrogate = charCode;
          var loSurrogate = string.charCodeAt(i+1);
          i++;  // Skip the low surrogate on the next loop pass
          var codePoint = (((hiSurrogate - 55296) << 10) | (loSurrogate - 56320)) + 65536;
          output += String.fromCharCode((codePoint >> 18) | 240);
          output += String.fromCharCode(((codePoint >> 12) & 63) | 128);
          output += String.fromCharCode(((codePoint >> 6) & 63) | 128);
          output += String.fromCharCode((codePoint & 63) | 128);
        } else {
          // Not a surrogate pair, or a dangling surrogate without its partner that we'll just encode as-is
          output += String.fromCharCode((charCode >> 12) | 224);
          output += String.fromCharCode(((charCode >> 6) & 63) | 128);
          output += String.fromCharCode((charCode & 63) | 128);
        }
      }

      return output;
    },

    decode: function (string) {
      if (typeof string !== 'string') return string;
      var output = "", i = 0, charCode = 0;

      while (i < string.length) {
        charCode = string.charCodeAt(i);

        if (charCode < 128) {
          output += String.fromCharCode(charCode),
          i++;
        } else if ((charCode > 191) && (charCode < 224)) {
          output += String.fromCharCode(((charCode & 31) << 6) | (string.charCodeAt(i + 1) & 63));
          i += 2;
        } else if ((charCode > 223) && (charCode < 240)) {
          output += String.fromCharCode(((charCode & 15) << 12) | ((string.charCodeAt(i + 1) & 63) << 6) | (string.charCodeAt(i + 2) & 63));
          i += 3;
        } else {
          var codePoint = ((charCode & 7) << 18) | ((string.charCodeAt(i + 1) & 63) << 12) | ((string.charCodeAt(i + 2) & 63) << 6) | (string.charCodeAt(i + 3) & 63);
          // If you don't have to support Internet Explorer:
          output += String.fromCodePoint(codePoint);
          // If you can't use String.fromCodePoint here because you still need to support Internet Explorer:
          // output += String.fromCharCode(((codePoint - 65536) >> 10) + 55296) + String.fromCharCode(((codePoint - 65536) & 1023) + 56320);
          i += 4;
        }
      }

      return output;
    }
  };
}, this);

This is still not 100% right, as it will produce a garbled string if the input contains a single low surrogate character on its own followed by more non-surrogate text. In my use case, I don't care whether such an invalid string gets garbled beyond recognition, so I haven't bothered writing the extra code that would be necessary to handle that rare corner case. If you need to handle that case, you probably already know enough about surrogate pairs to be able to correctly implement it yourself.

As this is based on @nijikokun's original code, I hereby license all my contributions in this comment under the same MIT license as @nijikokun's original.

@rmunn

This comment has been minimized.

Copy link

@rmunn rmunn commented Aug 13, 2020

See also Jermolene/TiddlyWiki5#4685, which was caused by this code not handling surrogate pairs properly, and was fixed by the code I posted in https://gist.github.com/Nijikokun/5192472#gistcomment-3416186 just above.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.