Created
April 22, 2019 13:53
-
-
Save lexborisov/78ac7d8c3ede83c4a2f2345ea878618c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# HG changeset patch | |
# User Alexander Borisov <alexander.borisov@nginx.com> | |
# Date 1555939407 -10800 | |
# Mon Apr 22 16:23:27 2019 +0300 | |
# Node ID 8585f3f8959538a8b8602166a8c5c3f1193d2822 | |
# Parent 3869b6e1f29672571cfdf6f4e8a7f63f58cb7621 | |
Added allocation size check for njs_string_alloc() function. | |
diff -r 3869b6e1f296 -r 8585f3f89595 njs/njs_string.c | |
--- a/njs/njs_string.c Sun Apr 21 17:36:25 2019 +0800 | |
+++ b/njs/njs_string.c Mon Apr 22 16:23:27 2019 +0300 | |
@@ -181,12 +181,16 @@ njs_string_new(njs_vm_t *vm, njs_value_t | |
nxt_noinline u_char * | |
-njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size, | |
- uint32_t length) | |
+njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint64_t size, | |
+ uint64_t length) | |
{ | |
- uint32_t total, map_offset, *map; | |
+ uint64_t total, map_offset, *map; | |
njs_string_t *string; | |
+ if (nxt_slow_path(size >= NJS_STRING_MAX_LENGTH)) { | |
+ goto memory_error; | |
+ } | |
+ | |
value->type = NJS_STRING; | |
njs_string_truth(value, size); | |
@@ -225,13 +229,15 @@ njs_string_alloc(njs_vm_t *vm, njs_value | |
string->retain = 1; | |
if (map_offset != 0) { | |
- map = (uint32_t *) (string->start + map_offset); | |
+ map = (uint64_t *) (string->start + map_offset); | |
map[0] = 0; | |
} | |
return string->start; | |
} | |
+memory_error: | |
+ | |
njs_memory_error(vm); | |
return NULL; | |
diff -r 3869b6e1f296 -r 8585f3f89595 njs/njs_string.h | |
--- a/njs/njs_string.h Sun Apr 21 17:36:25 2019 +0800 | |
+++ b/njs/njs_string.h Mon Apr 22 16:23:27 2019 +0300 | |
@@ -141,8 +141,8 @@ njs_string_length(njs_value_t *string) | |
njs_ret_t njs_string_set(njs_vm_t *vm, njs_value_t *value, const u_char *start, | |
uint32_t size); | |
-u_char *njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size, | |
- uint32_t length); | |
+u_char *njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint64_t size, | |
+ uint64_t length); | |
njs_ret_t njs_string_new(njs_vm_t *vm, njs_value_t *value, const u_char *start, | |
uint32_t size, uint32_t length); | |
njs_ret_t njs_string_hex(njs_vm_t *vm, njs_value_t *value, | |
# HG changeset patch | |
# User Alexander Borisov <alexander.borisov@nginx.com> | |
# Date 1555939423 -10800 | |
# Mon Apr 22 16:23:43 2019 +0300 | |
# Node ID 72c82b61bc149b41599ed2570fefd03e55fb1761 | |
# Parent 8585f3f8959538a8b8602166a8c5c3f1193d2822 | |
Fixed calculate length for UTF-8 string with escape characters. | |
This closes #133 issue on GitHub. | |
diff -r 8585f3f89595 -r 72c82b61bc14 njs/njs_parser_terminal.c | |
--- a/njs/njs_parser_terminal.c Mon Apr 22 16:23:27 2019 +0300 | |
+++ b/njs/njs_parser_terminal.c Mon Apr 22 16:23:43 2019 +0300 | |
@@ -24,6 +24,8 @@ static njs_token_t njs_parser_array(njs_ | |
njs_parser_node_t *array); | |
static nxt_int_t njs_parser_array_item(njs_vm_t *vm, njs_parser_t *parser, | |
njs_parser_node_t *array, njs_parser_node_t *value); | |
+static njs_ret_t njs_parser_calc_escape_string(njs_vm_t *vm, | |
+ njs_parser_t *parser, size_t *out_size, size_t *out_length); | |
static njs_token_t njs_parser_escape_string_create(njs_vm_t *vm, | |
njs_parser_t *parser, njs_value_t *value); | |
@@ -724,176 +726,264 @@ njs_parser_escape_string_create(njs_vm_t | |
njs_value_t *value) | |
{ | |
u_char c, *start, *dst; | |
- size_t size,length, hex_length; | |
- uint64_t u; | |
+ size_t size, length, hex_length; | |
+ uint64_t cp; | |
+ njs_ret_t ret; | |
nxt_str_t *string; | |
- const u_char *p, *src, *end, *hex_end; | |
+ const u_char *src, *end, *hex_end; | |
- start = NULL; | |
- dst = NULL; | |
+ ret = njs_parser_calc_escape_string(vm, parser, &size, &length); | |
+ if (nxt_slow_path(ret != NXT_OK)) { | |
+ return NJS_TOKEN_ILLEGAL; | |
+ } | |
+ | |
+ start = njs_string_alloc(vm, value, size, length); | |
+ if (nxt_slow_path(start == NULL)) { | |
+ return NJS_TOKEN_ERROR; | |
+ } | |
- for ( ;; ) { | |
- /* | |
- * The loop runs twice: at the first step string size and | |
- * UTF-8 length are evaluated. Then the string is allocated | |
- * and at the second step string content is copied. | |
- */ | |
- size = 0; | |
- length = 0; | |
+ dst = start; | |
+ | |
+ string = njs_parser_text(parser); | |
+ src = string->start; | |
+ end = src + string->length; | |
- string = njs_parser_text(parser); | |
- src = string->start; | |
- end = src + string->length; | |
+ while (src < end) { | |
+ c = *src++; | |
- while (src < end) { | |
+ if (c == '\\') { | |
+ /* | |
+ * Testing "src == end" is not required here | |
+ * since this has been already tested by lexer. | |
+ */ | |
+ | |
c = *src++; | |
- if (c == '\\') { | |
+ switch (c) { | |
+ case 'u': | |
/* | |
- * Testing "src == end" is not required here | |
- * since this has been already tested by lexer. | |
+ * A character after "u" can be safely tested here | |
+ * because there is always a closing quote at the | |
+ * end of string: ...\u". | |
*/ | |
- c = *src++; | |
+ | |
+ if (*src != '{') { | |
+ hex_length = 4; | |
+ goto hex_length; | |
+ } | |
+ | |
+ src++; | |
+ hex_length = 0; | |
+ hex_end = end; | |
- switch (c) { | |
+ goto hex; | |
+ | |
+ case 'x': | |
+ hex_length = 2; | |
+ goto hex_length; | |
+ | |
+ case '0': | |
+ c = '\0'; | |
+ break; | |
- case 'u': | |
- hex_length = 4; | |
- /* | |
- * A character after "u" can be safely tested here | |
- * because there is always a closing quote at the | |
- * end of string: ...\u". | |
- */ | |
- if (*src != '{') { | |
- goto hex_length_test; | |
- } | |
+ case 'b': | |
+ c = '\b'; | |
+ break; | |
+ | |
+ case 'f': | |
+ c = '\f'; | |
+ break; | |
+ | |
+ case 'n': | |
+ c = '\n'; | |
+ break; | |
+ | |
+ case 'r': | |
+ c = '\r'; | |
+ break; | |
+ case 't': | |
+ c = '\t'; | |
+ break; | |
+ | |
+ case 'v': | |
+ c = '\v'; | |
+ break; | |
+ | |
+ case '\r': | |
+ /* | |
+ * A character after "\r" can be safely tested here | |
+ * because there is always a closing quote at the | |
+ * end of string: ...\\r". | |
+ */ | |
+ | |
+ if (*src == '\n') { | |
src++; | |
- hex_length = 0; | |
- hex_end = end; | |
+ } | |
- goto hex; | |
+ continue; | |
- case 'x': | |
- hex_length = 2; | |
- goto hex_length_test; | |
+ case '\n': | |
+ continue; | |
- case '0': | |
- c = '\0'; | |
- break; | |
+ default: | |
+ break; | |
+ } | |
+ } | |
- case 'b': | |
- c = '\b'; | |
- break; | |
+ *dst++ = c; | |
- case 'f': | |
- c = '\f'; | |
- break; | |
+ continue; | |
+ | |
+ hex_length: | |
- case 'n': | |
- c = '\n'; | |
- break; | |
+ hex_end = src + hex_length; | |
+ | |
+ hex: | |
+ cp = njs_number_hex_parse(&src, hex_end); | |
- case 'r': | |
- c = '\r'; | |
- break; | |
+ dst = nxt_utf8_encode(dst, (uint32_t) cp); | |
+ if (nxt_slow_path(dst == NULL)) { | |
+ njs_parser_syntax_error(vm, parser, | |
+ "Invalid Unicode code point \"%V\"", | |
+ njs_parser_text(parser)); | |
+ | |
+ return NJS_TOKEN_ILLEGAL; | |
+ } | |
- case 't': | |
- c = '\t'; | |
- break; | |
+ /* Skip '}' character */ | |
+ if (hex_length == 0) { | |
+ src++; | |
+ } | |
+ } | |
+ | |
+ if (length > NJS_STRING_MAP_STRIDE && length != size) { | |
+ njs_string_offset_map_init(start, size); | |
+ } | |
- case 'v': | |
- c = '\v'; | |
- break; | |
+ return NJS_TOKEN_STRING; | |
+} | |
+ | |
- case '\r': | |
- /* | |
- * A character after "\r" can be safely tested here | |
- * because there is always a closing quote at the | |
- * end of string: ...\\r". | |
- */ | |
- if (*src == '\n') { | |
- src++; | |
- } | |
+static njs_ret_t | |
+njs_parser_calc_escape_string(njs_vm_t *vm, njs_parser_t *parser, | |
+ size_t *out_size, size_t *out_length) | |
+{ | |
+ size_t size, length, hex_length; | |
+ uint64_t cp; | |
+ nxt_str_t *string; | |
+ const u_char *ptr, *src, *end, *hex_end; | |
+ | |
+ size = 0; | |
+ length = 0; | |
+ | |
+ string = njs_parser_text(parser); | |
+ src = string->start; | |
+ end = src + string->length; | |
+ | |
+ while (src < end) { | |
- continue; | |
+ if (*src == '\\') { | |
+ src++; | |
- case '\n': | |
- continue; | |
+ switch (*src) { | |
+ case 'u': | |
+ src++; | |
- default: | |
- break; | |
+ if (*src != '{') { | |
+ hex_length = 4; | |
+ goto hex_length; | |
} | |
- } | |
+ | |
+ src++; | |
+ hex_length = 0; | |
+ hex_end = end; | |
+ | |
+ goto hex; | |
+ | |
+ case 'x': | |
+ src++; | |
+ hex_length = 2; | |
+ goto hex_length; | |
- size++; | |
- length++; | |
+ case '\r': | |
+ src++; | |
- if (dst != NULL) { | |
- *dst++ = c; | |
- } | |
+ if (*src == '\n') { | |
+ src++; | |
+ } | |
+ | |
+ continue; | |
- continue; | |
- | |
- hex_length_test: | |
+ case '\n': | |
+ src++; | |
+ continue; | |
- hex_end = src + hex_length; | |
+ default: | |
+ break; | |
+ } | |
+ } | |
- if (hex_end > end) { | |
+ if (*src >= 0x80) { | |
+ ptr = src; | |
+ | |
+ if (nxt_slow_path(nxt_utf8_decode(&src, end) == 0xffffffff)) { | |
goto invalid; | |
} | |
- hex: | |
+ size += src - ptr; | |
+ length++; | |
+ | |
+ continue; | |
+ } | |
- p = src; | |
- u = njs_number_hex_parse(&src, hex_end); | |
+ src++; | |
+ size++; | |
+ length++; | |
+ | |
+ continue; | |
+ | |
+ hex_length: | |
- if (hex_length != 0) { | |
- if (src != hex_end) { | |
- goto invalid; | |
- } | |
+ hex_end = src + hex_length; | |
+ | |
+ if (nxt_slow_path(hex_end > end)) { | |
+ goto invalid; | |
+ } | |
- } else { | |
- if (src == p || (src - p) > 6) { | |
- goto invalid; | |
- } | |
+ hex: | |
- if (src == end || *src++ != '}') { | |
- goto invalid; | |
- } | |
+ ptr = src; | |
+ cp = njs_number_hex_parse(&src, hex_end); | |
+ | |
+ if (hex_length != 0) { | |
+ if (src != hex_end) { | |
+ goto invalid; | |
} | |
- size += nxt_utf8_size(u); | |
- length++; | |
+ } else { | |
+ if (src == ptr || (src - ptr) > 6) { | |
+ goto invalid; | |
+ } | |
- if (dst != NULL) { | |
- dst = nxt_utf8_encode(dst, (uint32_t) u); | |
- if (dst == NULL) { | |
- goto invalid; | |
- } | |
+ if (src == end || *src++ != '}') { | |
+ goto invalid; | |
} | |
} | |
- if (start != NULL) { | |
- if (length > NJS_STRING_MAP_STRIDE && length != size) { | |
- njs_string_offset_map_init(start, size); | |
- } | |
- | |
- return NJS_TOKEN_STRING; | |
- } | |
+ size += nxt_utf8_size(cp); | |
+ length++; | |
+ } | |
- start = njs_string_alloc(vm, value, size, length); | |
- if (nxt_slow_path(start == NULL)) { | |
- return NJS_TOKEN_ERROR; | |
- } | |
+ *out_size = size; | |
+ *out_length = length; | |
- dst = start; | |
- } | |
+ return NXT_OK; | |
invalid: | |
njs_parser_syntax_error(vm, parser, "Invalid Unicode code point \"%V\"", | |
njs_parser_text(parser)); | |
- return NJS_TOKEN_ILLEGAL; | |
+ return NJS_ERROR; | |
} | |
diff -r 8585f3f89595 -r 72c82b61bc14 njs/test/njs_unit_test.c | |
--- a/njs/test/njs_unit_test.c Mon Apr 22 16:23:27 2019 +0300 | |
+++ b/njs/test/njs_unit_test.c Mon Apr 22 16:23:43 2019 +0300 | |
@@ -4318,6 +4318,12 @@ static njs_unit_test_t njs_test[] = | |
{ nxt_string("'abc'.length"), | |
nxt_string("3") }, | |
+ { nxt_string("'привет\\n'.length"), | |
+ nxt_string("7") }, | |
+ | |
+ { nxt_string("'привет\\n\\u{61}\\u{3B1}\\u{20AC}'.length"), | |
+ nxt_string("10") }, | |
+ | |
{ nxt_string("''.hasOwnProperty('length')"), | |
nxt_string("true") }, | |
# HG changeset patch | |
# User Alexander Borisov <alexander.borisov@nginx.com> | |
# Date 1555939430 -10800 | |
# Mon Apr 22 16:23:50 2019 +0300 | |
# Node ID d266822aa1311a0b8079a009d9e2789fbc5478e8 | |
# Parent 72c82b61bc149b41599ed2570fefd03e55fb1761 | |
Fixed parsing surrogate pair presents as UTF-16 escape sequences. | |
This closes #96 issue on GitHub. | |
diff -r 72c82b61bc14 -r d266822aa131 njs/njs_parser_terminal.c | |
--- a/njs/njs_parser_terminal.c Mon Apr 22 16:23:43 2019 +0300 | |
+++ b/njs/njs_parser_terminal.c Mon Apr 22 16:23:50 2019 +0300 | |
@@ -727,7 +727,7 @@ njs_parser_escape_string_create(njs_vm_t | |
{ | |
u_char c, *start, *dst; | |
size_t size, length, hex_length; | |
- uint64_t cp; | |
+ uint64_t cp, cp_pair; | |
njs_ret_t ret; | |
nxt_str_t *string; | |
const u_char *src, *end, *hex_end; | |
@@ -743,6 +743,7 @@ njs_parser_escape_string_create(njs_vm_t | |
} | |
dst = start; | |
+ cp_pair = 0; | |
string = njs_parser_text(parser); | |
src = string->start; | |
@@ -842,6 +843,21 @@ njs_parser_escape_string_create(njs_vm_t | |
hex: | |
cp = njs_number_hex_parse(&src, hex_end); | |
+ /* Skip '}' character. */ | |
+ if (hex_length == 0) { | |
+ src++; | |
+ } | |
+ | |
+ /* Surrogate pair. */ | |
+ if (cp_pair != 0) { | |
+ cp = 0x10000 + ((cp_pair - 0xd800) << 10) + (cp - 0xdc00); | |
+ cp_pair = 0; | |
+ | |
+ } else if (cp >= 0xd800 && cp <= 0xdfff) { | |
+ cp_pair = cp; | |
+ continue; | |
+ } | |
+ | |
dst = nxt_utf8_encode(dst, (uint32_t) cp); | |
if (nxt_slow_path(dst == NULL)) { | |
njs_parser_syntax_error(vm, parser, | |
@@ -850,11 +866,6 @@ njs_parser_escape_string_create(njs_vm_t | |
return NJS_TOKEN_ILLEGAL; | |
} | |
- | |
- /* Skip '}' character */ | |
- if (hex_length == 0) { | |
- src++; | |
- } | |
} | |
if (length > NJS_STRING_MAP_STRIDE && length != size) { | |
@@ -870,12 +881,13 @@ njs_parser_calc_escape_string(njs_vm_t * | |
size_t *out_size, size_t *out_length) | |
{ | |
size_t size, length, hex_length; | |
- uint64_t cp; | |
+ uint64_t cp, cp_pair; | |
nxt_str_t *string; | |
const u_char *ptr, *src, *end, *hex_end; | |
size = 0; | |
length = 0; | |
+ cp_pair = 0; | |
string = njs_parser_text(parser); | |
src = string->start; | |
@@ -971,6 +983,24 @@ njs_parser_calc_escape_string(njs_vm_t * | |
} | |
} | |
+ /* Surrogate pair. */ | |
+ if (cp_pair != 0) { | |
+ if (nxt_slow_path(cp < 0xdc00 || cp > 0xdfff)) { | |
+ goto invalid_pair; | |
+ } | |
+ | |
+ cp = 0x10000 + ((cp_pair - 0xd800) << 10) + (cp - 0xdc00); | |
+ cp_pair = 0; | |
+ | |
+ } else if (cp >= 0xd800 && cp <= 0xdfff) { | |
+ if (nxt_slow_path(cp > 0xdbff || src[0] != '\\' || src[1] != 'u')) { | |
+ goto invalid_pair; | |
+ } | |
+ | |
+ cp_pair = cp; | |
+ continue; | |
+ } | |
+ | |
size += nxt_utf8_size(cp); | |
length++; | |
} | |
@@ -986,4 +1016,11 @@ invalid: | |
njs_parser_text(parser)); | |
return NJS_ERROR; | |
+ | |
+invalid_pair: | |
+ | |
+ njs_parser_syntax_error(vm, parser, "Invalid surrogate pair \"%V\"", | |
+ njs_parser_text(parser)); | |
+ | |
+ return NJS_ERROR; | |
} | |
diff -r 72c82b61bc14 -r d266822aa131 njs/test/njs_unit_test.c | |
--- a/njs/test/njs_unit_test.c Mon Apr 22 16:23:43 2019 +0300 | |
+++ b/njs/test/njs_unit_test.c Mon Apr 22 16:23:50 2019 +0300 | |
@@ -4324,6 +4324,23 @@ static njs_unit_test_t njs_test[] = | |
{ nxt_string("'привет\\n\\u{61}\\u{3B1}\\u{20AC}'.length"), | |
nxt_string("10") }, | |
+ { nxt_string("'\\ud83d\\udc4d'"), | |
+ nxt_string("\xf0\x9f\x91\x8d") }, | |
+ | |
+ { nxt_string("'\\ud83d\\udc4d'.length"), | |
+ nxt_string("1") }, | |
+ | |
+ { nxt_string("'\\ud83d abc \\udc4d'"), | |
+ nxt_string("SyntaxError: Invalid surrogate pair " | |
+ "\"\\ud83d abc \\udc4d\" in 1") }, | |
+ | |
+ { nxt_string("'\\ud83d'"), | |
+ nxt_string("SyntaxError: Invalid surrogate pair \"\\ud83d\" in 1") }, | |
+ | |
+ { nxt_string("'\\ud83d\\uabcd'"), | |
+ nxt_string("SyntaxError: Invalid surrogate pair " | |
+ "\"\\ud83d\\uabcd\" in 1") }, | |
+ | |
{ nxt_string("''.hasOwnProperty('length')"), | |
nxt_string("true") }, | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment