public
Last active

ES6 Unicode Shims for ES3+

  • Download Gist
es6-unicode-shims.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
/*!
* ES6 Unicode Shims 0.1
* (c) 2012 Steven Levithan <http://slevithan.com/>
* MIT License
*/
 
/**
* Returns a string created using the specified sequence of Unicode code points. Accepts integers
* between 0 and 0x10FFFF. Code points above 0xFFFF are converted to surrogate pairs. If a provided
* integer is in the surrogate range, it produces an unpaired surrogate. Comes from accepted ES6
* proposals.
* @memberOf String
* @param {Number} cp1, cp2... Sequence of Unicode code points.
* @returns {String} String created from the specified code points.
* @example
*
* // Basic use
* String.fromCodePoint(0x41); // -> 'A'
*
* // Multiple code points; returns astral characters as surrogate pairs
* String.fromCodePoint(0x20B20, 0x28B4E, 0x29DF6);
* // Unlike String.fromCharCode, this correctly handles code points above 0xFFFF
*/
if (!String.fromCodePoint) {
String.fromCodePoint = function () {
var chars = [], point, offset, units, i;
for (i = 0; i < arguments.length; ++i) {
point = arguments[i];
offset = point - 0x10000;
units = point > 0xFFFF ? [0xD800 + (offset >> 10), 0xDC00 + (offset & 0x3FF)] : [point];
chars.push(String.fromCharCode.apply(null, units));
}
return chars.join("");
};
}
 
/**
* Returns the numeric Unicode code point of the character at the given index. Here `pos` is the
* code *unit* position. If it's the second surrogate of a pair or an unpaired starting surrogate,
* the code unit of the surrogate is returned; otherwise the code point is derived from the
* surrogate pair. Comes from accepted ES6 proposals.
* @memberOf String.prototype
* @param {Number} [pos=0] Code point index in the string. Defaults to `0` if not a number.
* @returns {Number} Code point at the specified index. `NaN` if the index is less than `0` or
* greater than the string length.
* @example
*
* var str = String.fromCodePoint(166734);
* str.codePointAt(0); // -> 166734
* // Unlike the charCodeAt method, this correctly handles code points above 0xFFFF
*/
if (!String.prototype.codePointAt) {
String.prototype.codePointAt = function (pos) {
pos = isNaN(pos) ? 0 : pos;
var str = String(this),
code = str.charCodeAt(pos),
next = str.charCodeAt(pos + 1);
// If a surrogate pair
if (0xD800 <= code && code <= 0xDBFF && 0xDC00 <= next && next <= 0xDFFF) {
return ((code - 0xD800) * 0x400) + (next - 0xDC00) + 0x10000;
}
return code;
};
}

Heads up: the String.prototype.codePointAt shim has a few bugs, causing it to fail some of these tests: https://github.com/mathiasbynens/String.prototype.codePointAt/blob/master/tests/tests.js It made me wonder why you didn’t just use the polyfill in the spec proposal, but then noticed that one was buggy too (in a different way, though). I’ve now written a polyfill of my own along with a test suite: https://github.com/mathiasbynens/String.prototype.codePointAt

The same thing goes for your String.fromCodePoint polyfill. It fails some of these tests: https://github.com/mathiasbynens/String.fromCodePoint/blob/master/tests/tests.js The one in Norbert’s spec proposal works fine, though.

The issues can be fixed pretty easily — feel free to use the tests I provided to ensure correct behavior.

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.