Skip to content

Instantly share code, notes, and snippets.

@lexborisov
Created April 22, 2019 13:53
Show Gist options
  • Save lexborisov/78ac7d8c3ede83c4a2f2345ea878618c to your computer and use it in GitHub Desktop.
Save lexborisov/78ac7d8c3ede83c4a2f2345ea878618c to your computer and use it in GitHub Desktop.
# HG changeset patch
# User Alexander Borisov <alexander.borisov@nginx.com>
# Date 1555939407 -10800
# Mon Apr 22 16:23:27 2019 +0300
# Node ID 8585f3f8959538a8b8602166a8c5c3f1193d2822
# Parent 3869b6e1f29672571cfdf6f4e8a7f63f58cb7621
Added allocation size check for njs_string_alloc() function.
diff -r 3869b6e1f296 -r 8585f3f89595 njs/njs_string.c
--- a/njs/njs_string.c Sun Apr 21 17:36:25 2019 +0800
+++ b/njs/njs_string.c Mon Apr 22 16:23:27 2019 +0300
@@ -181,12 +181,16 @@ njs_string_new(njs_vm_t *vm, njs_value_t
nxt_noinline u_char *
-njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size,
- uint32_t length)
+njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint64_t size,
+ uint64_t length)
{
- uint32_t total, map_offset, *map;
+ uint64_t total, map_offset, *map;
njs_string_t *string;
+ if (nxt_slow_path(size >= NJS_STRING_MAX_LENGTH)) {
+ goto memory_error;
+ }
+
value->type = NJS_STRING;
njs_string_truth(value, size);
@@ -225,13 +229,15 @@ njs_string_alloc(njs_vm_t *vm, njs_value
string->retain = 1;
if (map_offset != 0) {
- map = (uint32_t *) (string->start + map_offset);
+ map = (uint64_t *) (string->start + map_offset);
map[0] = 0;
}
return string->start;
}
+memory_error:
+
njs_memory_error(vm);
return NULL;
diff -r 3869b6e1f296 -r 8585f3f89595 njs/njs_string.h
--- a/njs/njs_string.h Sun Apr 21 17:36:25 2019 +0800
+++ b/njs/njs_string.h Mon Apr 22 16:23:27 2019 +0300
@@ -141,8 +141,8 @@ njs_string_length(njs_value_t *string)
njs_ret_t njs_string_set(njs_vm_t *vm, njs_value_t *value, const u_char *start,
uint32_t size);
-u_char *njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint32_t size,
- uint32_t length);
+u_char *njs_string_alloc(njs_vm_t *vm, njs_value_t *value, uint64_t size,
+ uint64_t length);
njs_ret_t njs_string_new(njs_vm_t *vm, njs_value_t *value, const u_char *start,
uint32_t size, uint32_t length);
njs_ret_t njs_string_hex(njs_vm_t *vm, njs_value_t *value,
# HG changeset patch
# User Alexander Borisov <alexander.borisov@nginx.com>
# Date 1555939423 -10800
# Mon Apr 22 16:23:43 2019 +0300
# Node ID 72c82b61bc149b41599ed2570fefd03e55fb1761
# Parent 8585f3f8959538a8b8602166a8c5c3f1193d2822
Fixed calculate length for UTF-8 string with escape characters.
This closes #133 issue on GitHub.
diff -r 8585f3f89595 -r 72c82b61bc14 njs/njs_parser_terminal.c
--- a/njs/njs_parser_terminal.c Mon Apr 22 16:23:27 2019 +0300
+++ b/njs/njs_parser_terminal.c Mon Apr 22 16:23:43 2019 +0300
@@ -24,6 +24,8 @@ static njs_token_t njs_parser_array(njs_
njs_parser_node_t *array);
static nxt_int_t njs_parser_array_item(njs_vm_t *vm, njs_parser_t *parser,
njs_parser_node_t *array, njs_parser_node_t *value);
+static njs_ret_t njs_parser_calc_escape_string(njs_vm_t *vm,
+ njs_parser_t *parser, size_t *out_size, size_t *out_length);
static njs_token_t njs_parser_escape_string_create(njs_vm_t *vm,
njs_parser_t *parser, njs_value_t *value);
@@ -724,176 +726,264 @@ njs_parser_escape_string_create(njs_vm_t
njs_value_t *value)
{
u_char c, *start, *dst;
- size_t size,length, hex_length;
- uint64_t u;
+ size_t size, length, hex_length;
+ uint64_t cp;
+ njs_ret_t ret;
nxt_str_t *string;
- const u_char *p, *src, *end, *hex_end;
+ const u_char *src, *end, *hex_end;
- start = NULL;
- dst = NULL;
+ ret = njs_parser_calc_escape_string(vm, parser, &size, &length);
+ if (nxt_slow_path(ret != NXT_OK)) {
+ return NJS_TOKEN_ILLEGAL;
+ }
+
+ start = njs_string_alloc(vm, value, size, length);
+ if (nxt_slow_path(start == NULL)) {
+ return NJS_TOKEN_ERROR;
+ }
- for ( ;; ) {
- /*
- * The loop runs twice: at the first step string size and
- * UTF-8 length are evaluated. Then the string is allocated
- * and at the second step string content is copied.
- */
- size = 0;
- length = 0;
+ dst = start;
+
+ string = njs_parser_text(parser);
+ src = string->start;
+ end = src + string->length;
- string = njs_parser_text(parser);
- src = string->start;
- end = src + string->length;
+ while (src < end) {
+ c = *src++;
- while (src < end) {
+ if (c == '\\') {
+ /*
+ * Testing "src == end" is not required here
+ * since this has been already tested by lexer.
+ */
+
c = *src++;
- if (c == '\\') {
+ switch (c) {
+ case 'u':
/*
- * Testing "src == end" is not required here
- * since this has been already tested by lexer.
+ * A character after "u" can be safely tested here
+ * because there is always a closing quote at the
+ * end of string: ...\u".
*/
- c = *src++;
+
+ if (*src != '{') {
+ hex_length = 4;
+ goto hex_length;
+ }
+
+ src++;
+ hex_length = 0;
+ hex_end = end;
- switch (c) {
+ goto hex;
+
+ case 'x':
+ hex_length = 2;
+ goto hex_length;
+
+ case '0':
+ c = '\0';
+ break;
- case 'u':
- hex_length = 4;
- /*
- * A character after "u" can be safely tested here
- * because there is always a closing quote at the
- * end of string: ...\u".
- */
- if (*src != '{') {
- goto hex_length_test;
- }
+ case 'b':
+ c = '\b';
+ break;
+
+ case 'f':
+ c = '\f';
+ break;
+
+ case 'n':
+ c = '\n';
+ break;
+
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+
+ case 'v':
+ c = '\v';
+ break;
+
+ case '\r':
+ /*
+ * A character after "\r" can be safely tested here
+ * because there is always a closing quote at the
+ * end of string: ...\\r".
+ */
+
+ if (*src == '\n') {
src++;
- hex_length = 0;
- hex_end = end;
+ }
- goto hex;
+ continue;
- case 'x':
- hex_length = 2;
- goto hex_length_test;
+ case '\n':
+ continue;
- case '0':
- c = '\0';
- break;
+ default:
+ break;
+ }
+ }
- case 'b':
- c = '\b';
- break;
+ *dst++ = c;
- case 'f':
- c = '\f';
- break;
+ continue;
+
+ hex_length:
- case 'n':
- c = '\n';
- break;
+ hex_end = src + hex_length;
+
+ hex:
+ cp = njs_number_hex_parse(&src, hex_end);
- case 'r':
- c = '\r';
- break;
+ dst = nxt_utf8_encode(dst, (uint32_t) cp);
+ if (nxt_slow_path(dst == NULL)) {
+ njs_parser_syntax_error(vm, parser,
+ "Invalid Unicode code point \"%V\"",
+ njs_parser_text(parser));
+
+ return NJS_TOKEN_ILLEGAL;
+ }
- case 't':
- c = '\t';
- break;
+ /* Skip '}' character */
+ if (hex_length == 0) {
+ src++;
+ }
+ }
+
+ if (length > NJS_STRING_MAP_STRIDE && length != size) {
+ njs_string_offset_map_init(start, size);
+ }
- case 'v':
- c = '\v';
- break;
+ return NJS_TOKEN_STRING;
+}
+
- case '\r':
- /*
- * A character after "\r" can be safely tested here
- * because there is always a closing quote at the
- * end of string: ...\\r".
- */
- if (*src == '\n') {
- src++;
- }
+static njs_ret_t
+njs_parser_calc_escape_string(njs_vm_t *vm, njs_parser_t *parser,
+ size_t *out_size, size_t *out_length)
+{
+ size_t size, length, hex_length;
+ uint64_t cp;
+ nxt_str_t *string;
+ const u_char *ptr, *src, *end, *hex_end;
+
+ size = 0;
+ length = 0;
+
+ string = njs_parser_text(parser);
+ src = string->start;
+ end = src + string->length;
+
+ while (src < end) {
- continue;
+ if (*src == '\\') {
+ src++;
- case '\n':
- continue;
+ switch (*src) {
+ case 'u':
+ src++;
- default:
- break;
+ if (*src != '{') {
+ hex_length = 4;
+ goto hex_length;
}
- }
+
+ src++;
+ hex_length = 0;
+ hex_end = end;
+
+ goto hex;
+
+ case 'x':
+ src++;
+ hex_length = 2;
+ goto hex_length;
- size++;
- length++;
+ case '\r':
+ src++;
- if (dst != NULL) {
- *dst++ = c;
- }
+ if (*src == '\n') {
+ src++;
+ }
+
+ continue;
- continue;
-
- hex_length_test:
+ case '\n':
+ src++;
+ continue;
- hex_end = src + hex_length;
+ default:
+ break;
+ }
+ }
- if (hex_end > end) {
+ if (*src >= 0x80) {
+ ptr = src;
+
+ if (nxt_slow_path(nxt_utf8_decode(&src, end) == 0xffffffff)) {
goto invalid;
}
- hex:
+ size += src - ptr;
+ length++;
+
+ continue;
+ }
- p = src;
- u = njs_number_hex_parse(&src, hex_end);
+ src++;
+ size++;
+ length++;
+
+ continue;
+
+ hex_length:
- if (hex_length != 0) {
- if (src != hex_end) {
- goto invalid;
- }
+ hex_end = src + hex_length;
+
+ if (nxt_slow_path(hex_end > end)) {
+ goto invalid;
+ }
- } else {
- if (src == p || (src - p) > 6) {
- goto invalid;
- }
+ hex:
- if (src == end || *src++ != '}') {
- goto invalid;
- }
+ ptr = src;
+ cp = njs_number_hex_parse(&src, hex_end);
+
+ if (hex_length != 0) {
+ if (src != hex_end) {
+ goto invalid;
}
- size += nxt_utf8_size(u);
- length++;
+ } else {
+ if (src == ptr || (src - ptr) > 6) {
+ goto invalid;
+ }
- if (dst != NULL) {
- dst = nxt_utf8_encode(dst, (uint32_t) u);
- if (dst == NULL) {
- goto invalid;
- }
+ if (src == end || *src++ != '}') {
+ goto invalid;
}
}
- if (start != NULL) {
- if (length > NJS_STRING_MAP_STRIDE && length != size) {
- njs_string_offset_map_init(start, size);
- }
-
- return NJS_TOKEN_STRING;
- }
+ size += nxt_utf8_size(cp);
+ length++;
+ }
- start = njs_string_alloc(vm, value, size, length);
- if (nxt_slow_path(start == NULL)) {
- return NJS_TOKEN_ERROR;
- }
+ *out_size = size;
+ *out_length = length;
- dst = start;
- }
+ return NXT_OK;
invalid:
njs_parser_syntax_error(vm, parser, "Invalid Unicode code point \"%V\"",
njs_parser_text(parser));
- return NJS_TOKEN_ILLEGAL;
+ return NJS_ERROR;
}
diff -r 8585f3f89595 -r 72c82b61bc14 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Mon Apr 22 16:23:27 2019 +0300
+++ b/njs/test/njs_unit_test.c Mon Apr 22 16:23:43 2019 +0300
@@ -4318,6 +4318,12 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("'abc'.length"),
nxt_string("3") },
+ { nxt_string("'привет\\n'.length"),
+ nxt_string("7") },
+
+ { nxt_string("'привет\\n\\u{61}\\u{3B1}\\u{20AC}'.length"),
+ nxt_string("10") },
+
{ nxt_string("''.hasOwnProperty('length')"),
nxt_string("true") },
# HG changeset patch
# User Alexander Borisov <alexander.borisov@nginx.com>
# Date 1555939430 -10800
# Mon Apr 22 16:23:50 2019 +0300
# Node ID d266822aa1311a0b8079a009d9e2789fbc5478e8
# Parent 72c82b61bc149b41599ed2570fefd03e55fb1761
Fixed parsing surrogate pair presents as UTF-16 escape sequences.
This closes #96 issue on GitHub.
diff -r 72c82b61bc14 -r d266822aa131 njs/njs_parser_terminal.c
--- a/njs/njs_parser_terminal.c Mon Apr 22 16:23:43 2019 +0300
+++ b/njs/njs_parser_terminal.c Mon Apr 22 16:23:50 2019 +0300
@@ -727,7 +727,7 @@ njs_parser_escape_string_create(njs_vm_t
{
u_char c, *start, *dst;
size_t size, length, hex_length;
- uint64_t cp;
+ uint64_t cp, cp_pair;
njs_ret_t ret;
nxt_str_t *string;
const u_char *src, *end, *hex_end;
@@ -743,6 +743,7 @@ njs_parser_escape_string_create(njs_vm_t
}
dst = start;
+ cp_pair = 0;
string = njs_parser_text(parser);
src = string->start;
@@ -842,6 +843,21 @@ njs_parser_escape_string_create(njs_vm_t
hex:
cp = njs_number_hex_parse(&src, hex_end);
+ /* Skip '}' character. */
+ if (hex_length == 0) {
+ src++;
+ }
+
+ /* Surrogate pair. */
+ if (cp_pair != 0) {
+ cp = 0x10000 + ((cp_pair - 0xd800) << 10) + (cp - 0xdc00);
+ cp_pair = 0;
+
+ } else if (cp >= 0xd800 && cp <= 0xdfff) {
+ cp_pair = cp;
+ continue;
+ }
+
dst = nxt_utf8_encode(dst, (uint32_t) cp);
if (nxt_slow_path(dst == NULL)) {
njs_parser_syntax_error(vm, parser,
@@ -850,11 +866,6 @@ njs_parser_escape_string_create(njs_vm_t
return NJS_TOKEN_ILLEGAL;
}
-
- /* Skip '}' character */
- if (hex_length == 0) {
- src++;
- }
}
if (length > NJS_STRING_MAP_STRIDE && length != size) {
@@ -870,12 +881,13 @@ njs_parser_calc_escape_string(njs_vm_t *
size_t *out_size, size_t *out_length)
{
size_t size, length, hex_length;
- uint64_t cp;
+ uint64_t cp, cp_pair;
nxt_str_t *string;
const u_char *ptr, *src, *end, *hex_end;
size = 0;
length = 0;
+ cp_pair = 0;
string = njs_parser_text(parser);
src = string->start;
@@ -971,6 +983,24 @@ njs_parser_calc_escape_string(njs_vm_t *
}
}
+ /* Surrogate pair. */
+ if (cp_pair != 0) {
+ if (nxt_slow_path(cp < 0xdc00 || cp > 0xdfff)) {
+ goto invalid_pair;
+ }
+
+ cp = 0x10000 + ((cp_pair - 0xd800) << 10) + (cp - 0xdc00);
+ cp_pair = 0;
+
+ } else if (cp >= 0xd800 && cp <= 0xdfff) {
+ if (nxt_slow_path(cp > 0xdbff || src[0] != '\\' || src[1] != 'u')) {
+ goto invalid_pair;
+ }
+
+ cp_pair = cp;
+ continue;
+ }
+
size += nxt_utf8_size(cp);
length++;
}
@@ -986,4 +1016,11 @@ invalid:
njs_parser_text(parser));
return NJS_ERROR;
+
+invalid_pair:
+
+ njs_parser_syntax_error(vm, parser, "Invalid surrogate pair \"%V\"",
+ njs_parser_text(parser));
+
+ return NJS_ERROR;
}
diff -r 72c82b61bc14 -r d266822aa131 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Mon Apr 22 16:23:43 2019 +0300
+++ b/njs/test/njs_unit_test.c Mon Apr 22 16:23:50 2019 +0300
@@ -4324,6 +4324,23 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("'привет\\n\\u{61}\\u{3B1}\\u{20AC}'.length"),
nxt_string("10") },
+ { nxt_string("'\\ud83d\\udc4d'"),
+ nxt_string("\xf0\x9f\x91\x8d") },
+
+ { nxt_string("'\\ud83d\\udc4d'.length"),
+ nxt_string("1") },
+
+ { nxt_string("'\\ud83d abc \\udc4d'"),
+ nxt_string("SyntaxError: Invalid surrogate pair "
+ "\"\\ud83d abc \\udc4d\" in 1") },
+
+ { nxt_string("'\\ud83d'"),
+ nxt_string("SyntaxError: Invalid surrogate pair \"\\ud83d\" in 1") },
+
+ { nxt_string("'\\ud83d\\uabcd'"),
+ nxt_string("SyntaxError: Invalid surrogate pair "
+ "\"\\ud83d\\uabcd\" in 1") },
+
{ nxt_string("''.hasOwnProperty('length')"),
nxt_string("true") },
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment