Skip to content

Instantly share code, notes, and snippets.

@LelouchHe
Last active August 29, 2015 13:56
Show Gist options
  • Save LelouchHe/8951627 to your computer and use it in GitHub Desktop.
Save LelouchHe/8951627 to your computer and use it in GitHub Desktop.
libjson 7.6.1 utf8 bug修复的patch
Common subdirectories: libjson_7.6.1/_internal/Source/JSONDefs and libjson_7.6.1.mine/_internal/Source/JSONDefs
diff -uN libjson_7.6.1/_internal/Source/JSONWorker.cpp libjson_7.6.1.mine/_internal/Source/JSONWorker.cpp
--- libjson_7.6.1/_internal/Source/JSONWorker.cpp 2012-05-30 17:14:36.000000000 +0800
+++ libjson_7.6.1.mine/_internal/Source/JSONWorker.cpp 2013-04-19 07:05:06.000000000 +0800
@@ -275,12 +275,12 @@
void JSONWorker::UTF(const json_char * & pos, json_string & result, const json_char * const end) json_nothrow {
JSON_ASSERT_SAFE(((long)end - (long)pos) > 4, JSON_TEXT("UTF will go out of bounds"), return;);
- json_uchar first = UTF8(pos, end);
+ json_uchar first = UTF8(pos, result, end);
if (json_unlikely((first > 0xD800) && (first < 0xDBFF) &&
(*(pos + 1) == '\\') && (*(pos + 2) == 'u'))){
const json_char * original_pos = pos; //if the 2nd character is not correct I need to roll back the iterator
pos += 2;
- json_uchar second = UTF8(pos, end);
+ json_uchar second = UTF8(pos, result, end);
//surrogate pair, not two characters
if (json_unlikely((second > 0xDC00) && (second < 0xDFFF))){
result += SurrogatePair(first, second);
@@ -293,7 +293,10 @@
}
#endif
-json_uchar JSONWorker::UTF8(const json_char * & pos, const json_char * const end) json_nothrow {
+// 利用\uxxxx目前最多4个字节,将转义后的编码写到原先的5个字节(包括'u')
+// 对JSON_UNICODE仍然返回json_uchar(保持兼容), 并且不对result修改
+// 否则返回'\0',并丢弃结果
+json_uchar JSONWorker::UTF8(const json_char * & pos, json_string & result, const json_char * const end) json_nothrow {
JSON_ASSERT_SAFE(((long)end - (long)pos) > 4, JSON_TEXT("UTF will go out of bounds"), return JSON_TEXT('\0'););
#ifdef JSON_UNICODE
++pos;
@@ -301,10 +304,29 @@
++pos;
return temp | Hex(pos);
#else
+ /*
JSON_ASSERT(*(pos + 1) == JSON_TEXT('0'), JSON_TEXT("wide utf character (hihi)"));
JSON_ASSERT(*(pos + 2) == JSON_TEXT('0'), JSON_TEXT("wide utf character (hilo)"));
pos += 3;
return Hex(pos);
+ */
+ pos++;
+ int len = 1;
+ if (json_likely(pos[0] > '0' || pos[1] >= '8')) {
+ len = 3;
+ } else if (pos[1] > 0 || pos[2] >= '8') {
+ len = 2;
+ }
+ json_uchar first = Hex(pos);
+ pos++;
+ json_uchar second = Hex(pos);
+ uint16_t code = ((uint16_t)first << 8) | second; // pos位置为最后一个字节,下一次循环自增即可
+ result += (json_uchar)(0xFF << (8 - len)) | (code >> ((len - 1) * 6));
+ while (--len > 0) {
+ result += (json_uchar)(0x80 | ((code >> ((len - 1) * 6) & 0x3F)));
+ }
+
+ return '\0'; // 丢弃的结果
#endif
}
@@ -381,7 +403,7 @@
#ifdef JSON_UNICODE
UTF(pos, res, end);
#else
- res += UTF8(pos, end);
+ UTF8(pos, res, end);
#endif
break;
#ifndef JSON_STRICT
@@ -491,6 +513,7 @@
END_MEM_SCOPE
json_uchar hi = ((p & 0x00F0) >> 4) + 48;
#else
+ // 这里还是假设前两位是00
res += JSON_TEXT("00");
json_uchar hi = (p >> 4) + 48;
#endif
diff -uN libjson_7.6.1/_internal/Source/JSONWorker.h libjson_7.6.1.mine/_internal/Source/JSONWorker.h
--- libjson_7.6.1/_internal/Source/JSONWorker.h 2012-03-20 23:18:54.000000000 +0800
+++ libjson_7.6.1.mine/_internal/Source/JSONWorker.h 2013-04-19 06:05:41.000000000 +0800
@@ -43,7 +43,7 @@
JSON_PRIVATE
#ifdef JSON_READ_PRIORITY
static json_char Hex(const json_char * & pos) json_nothrow;
- static json_uchar UTF8(const json_char * & pos, const json_char * const end) json_nothrow;
+ static json_uchar UTF8(const json_char * & pos, json_string & result, const json_char * const end) json_nothrow;
#endif
#ifdef JSON_ESCAPE_WRITES
static json_string toUTF8(json_uchar p) json_nothrow;
@LelouchHe
Copy link
Author

我这个patch只修复了最多3字节utf8的情况..其他的情况中文情况比较少见..就没有处理了..其实应该加一下的..

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment