Last active
August 29, 2015 13:56
-
-
Save LelouchHe/8951627 to your computer and use it in GitHub Desktop.
libjson 7.6.1 utf8 bug修复的patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Common subdirectories: libjson_7.6.1/_internal/Source/JSONDefs and libjson_7.6.1.mine/_internal/Source/JSONDefs | |
diff -uN libjson_7.6.1/_internal/Source/JSONWorker.cpp libjson_7.6.1.mine/_internal/Source/JSONWorker.cpp | |
--- libjson_7.6.1/_internal/Source/JSONWorker.cpp 2012-05-30 17:14:36.000000000 +0800 | |
+++ libjson_7.6.1.mine/_internal/Source/JSONWorker.cpp 2013-04-19 07:05:06.000000000 +0800 | |
@@ -275,12 +275,12 @@ | |
void JSONWorker::UTF(const json_char * & pos, json_string & result, const json_char * const end) json_nothrow { | |
JSON_ASSERT_SAFE(((long)end - (long)pos) > 4, JSON_TEXT("UTF will go out of bounds"), return;); | |
- json_uchar first = UTF8(pos, end); | |
+ json_uchar first = UTF8(pos, result, end); | |
if (json_unlikely((first > 0xD800) && (first < 0xDBFF) && | |
(*(pos + 1) == '\\') && (*(pos + 2) == 'u'))){ | |
const json_char * original_pos = pos; //if the 2nd character is not correct I need to roll back the iterator | |
pos += 2; | |
- json_uchar second = UTF8(pos, end); | |
+ json_uchar second = UTF8(pos, result, end); | |
//surrogate pair, not two characters | |
if (json_unlikely((second > 0xDC00) && (second < 0xDFFF))){ | |
result += SurrogatePair(first, second); | |
@@ -293,7 +293,10 @@ | |
} | |
#endif | |
-json_uchar JSONWorker::UTF8(const json_char * & pos, const json_char * const end) json_nothrow { | |
+// 利用\uxxxx目前最多4个字节,将转义后的编码写到原先的5个字节(包括'u') | |
+// 对JSON_UNICODE仍然返回json_uchar(保持兼容), 并且不对result修改 | |
+// 否则返回'\0',并丢弃结果 | |
+json_uchar JSONWorker::UTF8(const json_char * & pos, json_string & result, const json_char * const end) json_nothrow { | |
JSON_ASSERT_SAFE(((long)end - (long)pos) > 4, JSON_TEXT("UTF will go out of bounds"), return JSON_TEXT('\0');); | |
#ifdef JSON_UNICODE | |
++pos; | |
@@ -301,10 +304,29 @@ | |
++pos; | |
return temp | Hex(pos); | |
#else | |
+ /* | |
JSON_ASSERT(*(pos + 1) == JSON_TEXT('0'), JSON_TEXT("wide utf character (hihi)")); | |
JSON_ASSERT(*(pos + 2) == JSON_TEXT('0'), JSON_TEXT("wide utf character (hilo)")); | |
pos += 3; | |
return Hex(pos); | |
+ */ | |
+ pos++; | |
+ int len = 1; | |
+ if (json_likely(pos[0] > '0' || pos[1] >= '8')) { | |
+ len = 3; | |
+ } else if (pos[1] > 0 || pos[2] >= '8') { | |
+ len = 2; | |
+ } | |
+ json_uchar first = Hex(pos); | |
+ pos++; | |
+ json_uchar second = Hex(pos); | |
+ uint16_t code = ((uint16_t)first << 8) | second; // pos位置为最后一个字节,下一次循环自增即可 | |
+ result += (json_uchar)(0xFF << (8 - len)) | (code >> ((len - 1) * 6)); | |
+ while (--len > 0) { | |
+ result += (json_uchar)(0x80 | ((code >> ((len - 1) * 6) & 0x3F))); | |
+ } | |
+ | |
+ return '\0'; // 丢弃的结果 | |
#endif | |
} | |
@@ -381,7 +403,7 @@ | |
#ifdef JSON_UNICODE | |
UTF(pos, res, end); | |
#else | |
- res += UTF8(pos, end); | |
+ UTF8(pos, res, end); | |
#endif | |
break; | |
#ifndef JSON_STRICT | |
@@ -491,6 +513,7 @@ | |
END_MEM_SCOPE | |
json_uchar hi = ((p & 0x00F0) >> 4) + 48; | |
#else | |
+ // 这里还是假设前两位是00 | |
res += JSON_TEXT("00"); | |
json_uchar hi = (p >> 4) + 48; | |
#endif | |
diff -uN libjson_7.6.1/_internal/Source/JSONWorker.h libjson_7.6.1.mine/_internal/Source/JSONWorker.h | |
--- libjson_7.6.1/_internal/Source/JSONWorker.h 2012-03-20 23:18:54.000000000 +0800 | |
+++ libjson_7.6.1.mine/_internal/Source/JSONWorker.h 2013-04-19 06:05:41.000000000 +0800 | |
@@ -43,7 +43,7 @@ | |
JSON_PRIVATE | |
#ifdef JSON_READ_PRIORITY | |
static json_char Hex(const json_char * & pos) json_nothrow; | |
- static json_uchar UTF8(const json_char * & pos, const json_char * const end) json_nothrow; | |
+ static json_uchar UTF8(const json_char * & pos, json_string & result, const json_char * const end) json_nothrow; | |
#endif | |
#ifdef JSON_ESCAPE_WRITES | |
static json_string toUTF8(json_uchar p) json_nothrow; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
我这个patch只修复了最多3字节utf8的情况..其他的情况中文情况比较少见..就没有处理了..其实应该加一下的..