hugowetterberg/utf8buffer.js

## utf8buffer.js
// https://github.com/beatgammit/base64-js
var lookup = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'

;(function (exports) {
	'use strict'

	var Arr = (typeof Uint8Array !== 'undefined')
		? Uint8Array
		: Array

	var PLUS = '+'.charCodeAt(0)
	var SLASH = '/'.charCodeAt(0)
	var NUMBER = '0'.charCodeAt(0)
	var LOWER = 'a'.charCodeAt(0)
	var UPPER = 'A'.charCodeAt(0)
	var PLUS_URL_SAFE = '-'.charCodeAt(0)
	var SLASH_URL_SAFE = '_'.charCodeAt(0)

	function decode (elt) {
		var code = elt.charCodeAt(0)
		if (code === PLUS || code === PLUS_URL_SAFE) return 62 // '+'
		if (code === SLASH || code === SLASH_URL_SAFE) return 63 // '/'
		if (code < NUMBER) return -1 // no match
		if (code < NUMBER + 10) return code - NUMBER + 26 + 26
		if (code < UPPER + 26) return code - UPPER
		if (code < LOWER + 26) return code - LOWER + 26
	}

	function b64ToByteArray (b64) {
		var i, j, l, tmp, placeHolders, arr

		if (b64.length % 4 > 0) {
			throw new Error('Invalid string. Length must be a multiple of 4')
		}

		// the number of equal signs (place holders)
		// if there are two placeholders, than the two characters before it
		// represent one byte
		// if there is only one, then the three characters before it represent 2 bytes
		// this is just a cheap hack to not do indexOf twice
		var len = b64.length
		placeHolders = b64.charAt(len - 2) === '=' ? 2 : b64.charAt(len - 1) === '=' ? 1 : 0

		// base64 is 4/3 + up to two characters of the original data
		arr = new Arr(b64.length * 3 / 4 - placeHolders)

		// if there are placeholders, only get up to the last complete 4 chars
		l = placeHolders > 0 ? b64.length - 4 : b64.length

		var L = 0

		function push (v) {
			arr[L++] = v
		}

		for (i = 0, j = 0; i < l; i += 4, j += 3) {
			tmp = (decode(b64.charAt(i)) << 18) | (decode(b64.charAt(i + 1)) << 12) | (decode(b64.charAt(i + 2)) << 6) | decode(b64.charAt(i + 3))
			push((tmp & 0xFF0000) >> 16)
			push((tmp & 0xFF00) >> 8)
			push(tmp & 0xFF)
		}

		if (placeHolders === 2) {
			tmp = (decode(b64.charAt(i)) << 2) | (decode(b64.charAt(i + 1)) >> 4)
			push(tmp & 0xFF)
		} else if (placeHolders === 1) {
			tmp = (decode(b64.charAt(i)) << 10) | (decode(b64.charAt(i + 1)) << 4) | (decode(b64.charAt(i + 2)) >> 2)
			push((tmp >> 8) & 0xFF)
			push(tmp & 0xFF)
		}

		return arr
	}

	function uint8ToBase64 (uint8) {
		var i
		var extraBytes = uint8.length % 3 // if we have 1 byte left, pad 2 bytes
		var output = ''
		var temp, length

		function encode (num) {
			return lookup.charAt(num)
		}

		function tripletToBase64 (num) {
			return encode(num >> 18 & 0x3F) + encode(num >> 12 & 0x3F) + encode(num >> 6 & 0x3F) + encode(num & 0x3F)
		}

		// go through the array every three bytes, we'll deal with trailing stuff later
		for (i = 0, length = uint8.length - extraBytes; i < length; i += 3) {
			temp = (uint8[i] << 16) + (uint8[i + 1] << 8) + (uint8[i + 2])
			output += tripletToBase64(temp)
		}

		// pad the end with zeros, but make sure to not forget the extra bytes
		switch (extraBytes) {
			case 1:
				temp = uint8[uint8.length - 1]
				output += encode(temp >> 2)
				output += encode((temp << 4) & 0x3F)
				output += '=='
				break
			case 2:
				temp = (uint8[uint8.length - 2] << 8) + (uint8[uint8.length - 1])
				output += encode(temp >> 10)
				output += encode((temp >> 4) & 0x3F)
				output += encode((temp << 2) & 0x3F)
				output += '='
				break
			default:
				break
		}

		return output
	}

	exports.toByteArray = b64ToByteArray
	exports.fromByteArray = uint8ToBase64
}(typeof exports === 'undefined' ? (this.base64js = {}) : exports))

// Copied from http://xahlee.info/js/js_unicode_code_point.html
// returns a char's Unicode codepoint, of the char at index idx of string str
// 2013-07-16 from https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/String/charCodeAt
function fixedCharCodeAt (str, idx) {
	// ex. fixedCharCodeAt ('\uD800\uDC00', 0); // 65536
	// ex. fixedCharCodeAt ('\uD800\uDC00', 1); // 65536
	idx = idx || 0;
	var code = str.charCodeAt(idx);
	var hi, low;
	if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
		hi = code;
		low = str.charCodeAt(idx+1);
		if (isNaN(low)) {
			throw 'High surrogate not followed by low surrogate in fixedCharCodeAt()';
		}
		return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
	}
	if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
		// We return false to allow loops to skip this iteration since should have already handled high surrogate above in the previous iteration
		return false;
		/*hi = str.charCodeAt(idx-1);
		low = code;
		return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;*/
	}
	return code;
}

// Google closure, string to byte modified to handle code points up to U+7FFFFFFF (full utf8)
// https://github.com/google/closure-library/blob/28d9db61f5dc639c010be74e4d61682121d2dbd7/closure/goog/crypt/crypt.js#L110
/**
 * Converts a JS string to a UTF-8 "byte" array.
 * @param {string} str 16-bit unicode string.
 * @return {!Array<number>} UTF-8 byte array.
 */
var stringToUtf8ByteArray = function(str) {
	// TODO(user): Use native implementations if/when available
	var out = [], p = 0;
	for (var i = 0; i < str.length; i++) {
		var c = fixedCharCodeAt(str, i);
		if (c === false) continue;

		if (c < 128) {
			out[p++] = c;
		} else if (c < 2048) {
			out[p++] = (c >> 6) | 192;
			out[p++] = (c & 63) | 128;
		} else if (c < 65536) {
			out[p++] = (c >> 12) | 224;
			out[p++] = ((c >> 6) & 63) | 128;
			out[p++] = (c & 63) | 128;
		} else if (c < 2097152) {
			out[p++] = (c >> 18) | 240;
			out[p++] = ((c >> 12) & 63) | 128;
			out[p++] = ((c >> 6) & 63) | 128;
			out[p++] = (c & 63) | 128;
		} else if (c < 67108864) {
			out[p++] = (c >> 24) | 248;
			out[p++] = ((c >> 18) & 63) | 128;
			out[p++] = ((c >> 12) & 63) | 128;
			out[p++] = ((c >> 6) & 63) | 128;
			out[p++] = (c & 63) | 128;
		} else if (c < 2147483648) {
			out[p++] = (c >> 30) | 252;
			out[p++] = ((c >> 24) & 63) | 128;
			out[p++] = ((c >> 18) & 63) | 128;
			out[p++] = ((c >> 12) & 63) | 128;
			out[p++] = ((c >> 6) & 63) | 128;
			out[p++] = (c & 63) | 128;
		}
	}
	return out;
};


// Using the js-implementation
var utfyPass = "Huöut væ 💥💖 Iñtërnâtiônàlizætiøn";
var data = stringToUtf8ByteArray(utfyPass);
var base = exports.fromByteArray(data);

// Using native buffera
var buf = new Buffer(utfyPass, 'utf8');

console.log(base);
if (buf.toString('base64') === base) {
	console.log('Yay, we passed comparison with native base64-encoded utf8 buffer!')
	console.log(new Buffer(base, 'base64').toString('utf8'));
} else {
	console.log("Noo! Fail!")
}
	// https://github.com/beatgammit/base64-js
	var lookup = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'

	;(function (exports) {
	'use strict'

	var Arr = (typeof Uint8Array !== 'undefined')
	? Uint8Array
	: Array

	var PLUS = '+'.charCodeAt(0)
	var SLASH = '/'.charCodeAt(0)
	var NUMBER = '0'.charCodeAt(0)
	var LOWER = 'a'.charCodeAt(0)
	var UPPER = 'A'.charCodeAt(0)
	var PLUS_URL_SAFE = '-'.charCodeAt(0)
	var SLASH_URL_SAFE = '_'.charCodeAt(0)

	function decode (elt) {
	var code = elt.charCodeAt(0)
	if (code === PLUS \|\| code === PLUS_URL_SAFE) return 62 // '+'
	if (code === SLASH \|\| code === SLASH_URL_SAFE) return 63 // '/'
	if (code < NUMBER) return -1 // no match
	if (code < NUMBER + 10) return code - NUMBER + 26 + 26
	if (code < UPPER + 26) return code - UPPER
	if (code < LOWER + 26) return code - LOWER + 26
	}

	function b64ToByteArray (b64) {
	var i, j, l, tmp, placeHolders, arr

	if (b64.length % 4 > 0) {
	throw new Error('Invalid string. Length must be a multiple of 4')
	}

	// the number of equal signs (place holders)
	// if there are two placeholders, than the two characters before it
	// represent one byte
	// if there is only one, then the three characters before it represent 2 bytes
	// this is just a cheap hack to not do indexOf twice
	var len = b64.length
	placeHolders = b64.charAt(len - 2) === '=' ? 2 : b64.charAt(len - 1) === '=' ? 1 : 0

	// base64 is 4/3 + up to two characters of the original data
	arr = new Arr(b64.length * 3 / 4 - placeHolders)

	// if there are placeholders, only get up to the last complete 4 chars
	l = placeHolders > 0 ? b64.length - 4 : b64.length

	var L = 0

	function push (v) {
	arr[L++] = v
	}

	for (i = 0, j = 0; i < l; i += 4, j += 3) {
	tmp = (decode(b64.charAt(i)) << 18) \| (decode(b64.charAt(i + 1)) << 12) \| (decode(b64.charAt(i + 2)) << 6) \| decode(b64.charAt(i + 3))
	push((tmp & 0xFF0000) >> 16)
	push((tmp & 0xFF00) >> 8)
	push(tmp & 0xFF)
	}

	if (placeHolders === 2) {
	tmp = (decode(b64.charAt(i)) << 2) \| (decode(b64.charAt(i + 1)) >> 4)
	push(tmp & 0xFF)
	} else if (placeHolders === 1) {
	tmp = (decode(b64.charAt(i)) << 10) \| (decode(b64.charAt(i + 1)) << 4) \| (decode(b64.charAt(i + 2)) >> 2)
	push((tmp >> 8) & 0xFF)
	push(tmp & 0xFF)
	}

	return arr
	}

	function uint8ToBase64 (uint8) {
	var i
	var extraBytes = uint8.length % 3 // if we have 1 byte left, pad 2 bytes
	var output = ''
	var temp, length

	function encode (num) {
	return lookup.charAt(num)
	}

	function tripletToBase64 (num) {
	return encode(num >> 18 & 0x3F) + encode(num >> 12 & 0x3F) + encode(num >> 6 & 0x3F) + encode(num & 0x3F)
	}

	// go through the array every three bytes, we'll deal with trailing stuff later
	for (i = 0, length = uint8.length - extraBytes; i < length; i += 3) {
	temp = (uint8[i] << 16) + (uint8[i + 1] << 8) + (uint8[i + 2])
	output += tripletToBase64(temp)
	}

	// pad the end with zeros, but make sure to not forget the extra bytes
	switch (extraBytes) {
	case 1:
	temp = uint8[uint8.length - 1]
	output += encode(temp >> 2)
	output += encode((temp << 4) & 0x3F)
	output += '=='
	break
	case 2:
	temp = (uint8[uint8.length - 2] << 8) + (uint8[uint8.length - 1])
	output += encode(temp >> 10)
	output += encode((temp >> 4) & 0x3F)
	output += encode((temp << 2) & 0x3F)
	output += '='
	break
	default:
	break
	}

	return output
	}

	exports.toByteArray = b64ToByteArray
	exports.fromByteArray = uint8ToBase64
	}(typeof exports === 'undefined' ? (this.base64js = {}) : exports))

	// Copied from http://xahlee.info/js/js_unicode_code_point.html
	// returns a char's Unicode codepoint, of the char at index idx of string str
	// 2013-07-16 from https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Global_Objects/String/charCodeAt
	function fixedCharCodeAt (str, idx) {
	// ex. fixedCharCodeAt ('\uD800\uDC00', 0); // 65536
	// ex. fixedCharCodeAt ('\uD800\uDC00', 1); // 65536
	idx = idx \|\| 0;
	var code = str.charCodeAt(idx);
	var hi, low;
	if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
	hi = code;
	low = str.charCodeAt(idx+1);
	if (isNaN(low)) {
	throw 'High surrogate not followed by low surrogate in fixedCharCodeAt()';
	}
	return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
	}
	if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
	// We return false to allow loops to skip this iteration since should have already handled high surrogate above in the previous iteration
	return false;
	/*hi = str.charCodeAt(idx-1);
	low = code;
	return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;*/
	}
	return code;
	}

	// Google closure, string to byte modified to handle code points up to U+7FFFFFFF (full utf8)
	// https://github.com/google/closure-library/blob/28d9db61f5dc639c010be74e4d61682121d2dbd7/closure/goog/crypt/crypt.js#L110
	/**
	* Converts a JS string to a UTF-8 "byte" array.
	* @param {string} str 16-bit unicode string.
	* @return {!Array<number>} UTF-8 byte array.
	*/
	var stringToUtf8ByteArray = function(str) {
	// TODO(user): Use native implementations if/when available
	var out = [], p = 0;
	for (var i = 0; i < str.length; i++) {
	var c = fixedCharCodeAt(str, i);
	if (c === false) continue;

	if (c < 128) {
	out[p++] = c;
	} else if (c < 2048) {
	out[p++] = (c >> 6) \| 192;
	out[p++] = (c & 63) \| 128;
	} else if (c < 65536) {
	out[p++] = (c >> 12) \| 224;
	out[p++] = ((c >> 6) & 63) \| 128;
	out[p++] = (c & 63) \| 128;
	} else if (c < 2097152) {
	out[p++] = (c >> 18) \| 240;
	out[p++] = ((c >> 12) & 63) \| 128;
	out[p++] = ((c >> 6) & 63) \| 128;
	out[p++] = (c & 63) \| 128;
	} else if (c < 67108864) {
	out[p++] = (c >> 24) \| 248;
	out[p++] = ((c >> 18) & 63) \| 128;
	out[p++] = ((c >> 12) & 63) \| 128;
	out[p++] = ((c >> 6) & 63) \| 128;
	out[p++] = (c & 63) \| 128;
	} else if (c < 2147483648) {
	out[p++] = (c >> 30) \| 252;
	out[p++] = ((c >> 24) & 63) \| 128;
	out[p++] = ((c >> 18) & 63) \| 128;
	out[p++] = ((c >> 12) & 63) \| 128;
	out[p++] = ((c >> 6) & 63) \| 128;
	out[p++] = (c & 63) \| 128;
	}
	}
	return out;
	};


	// Using the js-implementation
	var utfyPass = "Huöut væ 💥💖 Iñtërnâtiônàlizætiøn";
	var data = stringToUtf8ByteArray(utfyPass);
	var base = exports.fromByteArray(data);

	// Using native buffera
	var buf = new Buffer(utfyPass, 'utf8');

	console.log(base);
	if (buf.toString('base64') === base) {
	console.log('Yay, we passed comparison with native base64-encoded utf8 buffer!')
	console.log(new Buffer(base, 'base64').toString('utf8'));
	} else {
	console.log("Noo! Fail!")
	}