Skip to content

Instantly share code, notes, and snippets.

@masakielastic
Created July 12, 2013 15:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save masakielastic/5985383 to your computer and use it in GitHub Desktop.
Save masakielastic/5985383 to your computer and use it in GitHub Desktop.
diff --git a/ext/json/JSON_parser.c b/ext/json/JSON_parser.c
index dd832a7..3749998 100644
--- a/ext/json/JSON_parser.c
+++ b/ext/json/JSON_parser.c
@@ -353,40 +353,59 @@ use_string:
static void utf16_to_utf8(smart_str *buf, unsigned short utf16)
{
- if (utf16 < 0x80)
- {
- smart_str_appendc(buf, (unsigned char) utf16);
- }
- else if (utf16 < 0x800)
- {
- smart_str_appendc(buf, 0xc0 | (utf16 >> 6));
- smart_str_appendc(buf, 0x80 | (utf16 & 0x3f));
- }
- else if ((utf16 & 0xfc00) == 0xdc00
- && buf->len >= 3
- && ((unsigned char) buf->c[buf->len - 3]) == 0xed
- && ((unsigned char) buf->c[buf->len - 2] & 0xf0) == 0xa0
- && ((unsigned char) buf->c[buf->len - 1] & 0xc0) == 0x80)
- {
- /* found surrogate pair */
- unsigned long utf32;
-
- utf32 = (((buf->c[buf->len - 2] & 0xf) << 16)
- | ((buf->c[buf->len - 1] & 0x3f) << 10)
- | (utf16 & 0x3ff)) + 0x10000;
- buf->len -= 3;
-
- smart_str_appendc(buf, (unsigned char) (0xf0 | (utf32 >> 18)));
- smart_str_appendc(buf, 0x80 | ((utf32 >> 12) & 0x3f));
- smart_str_appendc(buf, 0x80 | ((utf32 >> 6) & 0x3f));
- smart_str_appendc(buf, 0x80 | (utf32 & 0x3f));
- }
- else
- {
- smart_str_appendc(buf, 0xe0 | (utf16 >> 12));
- smart_str_appendc(buf, 0x80 | ((utf16 >> 6) & 0x3f));
- smart_str_appendc(buf, 0x80 | (utf16 & 0x3f));
+ if ((utf16 & 0xfc00) == 0xdc00) {
+
+ if (buf->len >= 3
+ && ((unsigned char) buf->c[buf->len - 3] == 0xed)
+ && ((unsigned char) buf->c[buf->len - 2] & 0xf0) == 0xa0
+ && ((unsigned char) buf->c[buf->len - 1] & 0xc0) == 0x80
+ ) {
+ /* found surrogate pair */
+ unsigned long utf32;
+
+ utf32 = (((buf->c[buf->len - 2] & 0xf) << 16)
+ | ((buf->c[buf->len - 1] & 0x3f) << 10)
+ | (utf16 & 0x3ff)) + 0x10000;
+ buf->len -= 3;
+
+ smart_str_appendc(buf, (unsigned char) (0xf0 | (utf32 >> 18)));
+ smart_str_appendc(buf, 0x80 | ((utf32 >> 12) & 0x3f));
+ smart_str_appendc(buf, 0x80 | ((utf32 >> 6) & 0x3f));
+ smart_str_appendc(buf, 0x80 | (utf32 & 0x3f));
+ } else {
+ buf->len -= 3;
+ smart_str_appendl(buf, "\xef\xbf\xbd", 3);
+ }
+
+ } else {
+
+ if (buf->len >= 3
+ && ((unsigned char) buf->c[buf->len - 3] == 0xed)
+ && ((unsigned char) buf->c[buf->len - 2] & 0xf0) == 0xa0
+ && ((unsigned char) buf->c[buf->len - 1] & 0xc0) == 0x80
+ ) {
+ buf->len -= 3;
+ smart_str_appendl(buf, "\xef\xbf\xbd", 3);
+ }
+
+ if (utf16 < 0x80)
+ {
+ smart_str_appendc(buf, (unsigned char) utf16);
+ }
+ else if (utf16 < 0x800)
+ {
+ smart_str_appendc(buf, 0xc0 | (utf16 >> 6));
+ smart_str_appendc(buf, 0x80 | (utf16 & 0x3f));
+ }
+ else
+ {
+ smart_str_appendc(buf, 0xe0 | (utf16 >> 12));
+ smart_str_appendc(buf, 0x80 | ((utf16 >> 6) & 0x3f));
+ smart_str_appendc(buf, 0x80 | (utf16 & 0x3f));
+ }
}
+
+
}
static void attach_zval(JSON_parser jp, int up, int cur, smart_str *key, int assoc TSRMLS_DC)
<?php
var_dump(
"\xef\xbf\xbd" === json_decode('"\udc00"'),
"\xef\xbf\xbd"."\xed\xa0\x80" === json_decode('"\ud800\ud800"'),
"\xed\xa0\x80" === json_decode('"\ud800"')
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment