Created
May 8, 2019 14:12
-
-
Save lexborisov/2bc6b696f122c2f118168b0c3d1a77c3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# HG changeset patch | |
# User Alexander Borisov <alexander.borisov@nginx.com> | |
# Date 1557324305 -10800 | |
# Wed May 08 17:05:05 2019 +0300 | |
# Node ID 31caf5d422b907ccc38f471ff98872cc5363a713 | |
# Parent 96c78f35a8092d1f2d7f439230623051bffcb5a2 | |
Added processing asterisk quantifier for String.prototype.replace. | |
diff -r 96c78f35a809 -r 31caf5d422b9 njs/njs_string.c | |
--- a/njs/njs_string.c Mon May 06 21:24:31 2019 +0300 | |
+++ b/njs/njs_string.c Wed May 08 17:05:05 2019 +0300 | |
@@ -3041,17 +3041,23 @@ njs_string_replace_regexp(njs_vm_t *vm, | |
njs_string_replace_t *r) | |
{ | |
int *captures; | |
+ u_char *p; | |
njs_ret_t ret; | |
+ const u_char *end; | |
njs_regexp_pattern_t *pattern; | |
- njs_string_replace_part_t *part; | |
+ njs_string_replace_part_t *part, rep_part; | |
pattern = args[1].data.u.regexp->pattern; | |
+ part = r->parts.start; | |
+ rep_part = part[1]; | |
+ | |
+ end = r->part[0].start + r->part[0].size; | |
+ | |
do { | |
ret = njs_regexp_match(vm, &pattern->regex[r->type], | |
r->part[0].start, r->part[0].size, | |
r->match_data); | |
- | |
if (ret >= 0) { | |
captures = nxt_regex_captures(r->match_data); | |
@@ -3082,25 +3088,42 @@ njs_string_replace_regexp(njs_vm_t *vm, | |
r->part -= 2; | |
} | |
- r->part[2].start = r->part[0].start + captures[1]; | |
- r->part[2].size = r->part[0].size - captures[1]; | |
- njs_set_invalid(&r->part[2].value); | |
- | |
- if (r->function != NULL) { | |
- return njs_string_replace_regexp_function(vm, args, r, | |
- captures, ret); | |
+ if (captures[1] == 0) { | |
+ p = (u_char *) nxt_utf8_next(r->part[0].start, end); | |
+ | |
+ r->part[1].start = r->part[0].start; | |
+ r->part[2].start = p; | |
+ | |
+ if (r->part[0].start < end) { | |
+ r->part[1].size = p - r->part[0].start; | |
+ r->part[2].size = end - p; | |
+ | |
+ } else { | |
+ r->part[1].size = 0; | |
+ r->part[2].size = 0; | |
+ } | |
+ | |
+ r->part[0] = rep_part; | |
+ | |
+ } else { | |
+ r->part[2].start = r->part[0].start + captures[1]; | |
+ r->part[2].size = r->part[0].size - captures[1]; | |
+ njs_set_invalid(&r->part[2].value); | |
+ | |
+ if (r->function != NULL) { | |
+ return njs_string_replace_regexp_function(vm, args, r, | |
+ captures, ret); | |
+ } | |
+ | |
+ r->part[0].size = captures[0]; | |
+ | |
+ r->part[1] = rep_part; | |
} | |
- r->part[0].size = captures[0]; | |
- | |
if (!pattern->global) { | |
return njs_string_replace_regexp_join(vm, r); | |
} | |
- /* A literal replacement is stored in the second part. */ | |
- part = r->parts.start; | |
- r->part[1] = part[1]; | |
- | |
r->part += 2; | |
} | |
@@ -3111,7 +3134,7 @@ njs_string_replace_regexp(njs_vm_t *vm, | |
return NXT_ERROR; | |
} | |
- } while (r->part[0].size > 0); | |
+ } while (end >= r->part[0].start); | |
if (r->part != r->parts.start) { | |
return njs_string_replace_regexp_join(vm, r); | |
@@ -3428,10 +3451,15 @@ static njs_ret_t | |
njs_string_replace_substitute(njs_vm_t *vm, njs_string_replace_t *r, | |
int *captures) | |
{ | |
+ int *cur_capture; | |
uint32_t i, n, last; | |
+ const u_char *end; | |
njs_string_subst_t *s; | |
njs_string_replace_part_t *part, *subject; | |
+ cur_capture = NULL; | |
+ | |
+ end = r->part[0].start + r->part[0].size; | |
last = r->substitutions->items; | |
part = nxt_array_add_multiple(&r->parts, &njs_array_mem_proto, vm->mem_pool, | |
@@ -3442,8 +3470,23 @@ njs_string_replace_substitute(njs_vm_t * | |
r->part = &part[-1]; | |
- part[last].start = r->part[0].start + captures[1]; | |
- part[last].size = r->part[0].size - captures[1]; | |
+ if (captures[1] == 0) { | |
+ captures[1] = 1; | |
+ | |
+ part[last].start = r->part[0].start + 1; | |
+ | |
+ if (r->part[0].size != 0) { | |
+ part[last].size = r->part[0].size - 1; | |
+ | |
+ } else { | |
+ part[last].size = 0; | |
+ } | |
+ | |
+ } else { | |
+ part[last].start = r->part[0].start + captures[1]; | |
+ part[last].size = r->part[0].size - captures[1]; | |
+ } | |
+ | |
njs_set_invalid(&part[last].value); | |
r->part[0].size = captures[0]; | |
@@ -3479,6 +3522,37 @@ njs_string_replace_substitute(njs_vm_t * | |
* "$&" is the same as "$0", the "$0" however is not supported. | |
*/ | |
default: | |
+ if (captures[n] == captures[n + 1]) { | |
+ | |
+ if (captures[n - 1] == captures[n]) { | |
+ part->start = r->part[0].start; | |
+ part->size = 0; | |
+ break; | |
+ } | |
+ | |
+ if (cur_capture != NULL) { | |
+ goto next_part; | |
+ } | |
+ | |
+ cur_capture = &captures[n]; | |
+ continue; | |
+ } | |
+ | |
+ if (cur_capture != NULL) { | |
+next_part: | |
+ part->start = r->part[0].start + *cur_capture; | |
+ | |
+ if (part->start < end) { | |
+ part->size = nxt_utf8_next(part->start, end) - part->start; | |
+ | |
+ } else { | |
+ part->size = 0; | |
+ } | |
+ | |
+ cur_capture = NULL; | |
+ break; | |
+ } | |
+ | |
part->start = r->part[0].start + captures[n]; | |
part->size = captures[n + 1] - captures[n]; | |
break; | |
@@ -3488,6 +3562,20 @@ njs_string_replace_substitute(njs_vm_t * | |
part++; | |
} | |
+ if (cur_capture != NULL) { | |
+ part->start = r->part[0].start + *cur_capture; | |
+ | |
+ if (part->start < end) { | |
+ part->size = nxt_utf8_next(part->start, end) - part->start; | |
+ | |
+ } else { | |
+ part->size = 0; | |
+ } | |
+ | |
+ njs_set_invalid(&part->value); | |
+ part++; | |
+ } | |
+ | |
r->part = part; | |
return NXT_OK; | |
diff -r 96c78f35a809 -r 31caf5d422b9 njs/test/njs_unit_test.c | |
--- a/njs/test/njs_unit_test.c Mon May 06 21:24:31 2019 +0300 | |
+++ b/njs/test/njs_unit_test.c Wed May 08 17:05:05 2019 +0300 | |
@@ -5404,6 +5404,18 @@ static njs_unit_test_t njs_test[] = | |
{ nxt_string("('β' + 'α'.repeat(33)+'β').replace(/(α+)(β+)/, function(m, p1) { return p1[32]; })"), | |
nxt_string("βα") }, | |
+ { nxt_string("'abc'.replace(/(h*)(z*)(g*)/g, '$1nn$2zz$3')"), | |
+ nxt_string("nnzzannzzbnnzzcnnzz") }, | |
+ | |
+ { nxt_string("'abc'.replace(/(h*)(z*)/g, '$1nn$2zz$3yy')"), | |
+ nxt_string("nnzz$3yyannzz$3yybnnzz$3yycnnzz$3yy") }, | |
+ | |
+ { nxt_string("'aabbccaa'.replace(/a*/g, '')"), | |
+ nxt_string("bbcc") }, | |
+ | |
+ { nxt_string("''.replace(/a*/g, '')"), | |
+ nxt_string("") }, | |
+ | |
{ nxt_string("'abc'.match(/a*/g)"), | |
nxt_string("a,,,") }, | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment