Skip to content

Instantly share code, notes, and snippets.

@lexborisov
Created May 8, 2019 14:12
Show Gist options
  • Save lexborisov/2bc6b696f122c2f118168b0c3d1a77c3 to your computer and use it in GitHub Desktop.
Save lexborisov/2bc6b696f122c2f118168b0c3d1a77c3 to your computer and use it in GitHub Desktop.
# HG changeset patch
# User Alexander Borisov <alexander.borisov@nginx.com>
# Date 1557324305 -10800
# Wed May 08 17:05:05 2019 +0300
# Node ID 31caf5d422b907ccc38f471ff98872cc5363a713
# Parent 96c78f35a8092d1f2d7f439230623051bffcb5a2
Added processing asterisk quantifier for String.prototype.replace.
diff -r 96c78f35a809 -r 31caf5d422b9 njs/njs_string.c
--- a/njs/njs_string.c Mon May 06 21:24:31 2019 +0300
+++ b/njs/njs_string.c Wed May 08 17:05:05 2019 +0300
@@ -3041,17 +3041,23 @@ njs_string_replace_regexp(njs_vm_t *vm,
njs_string_replace_t *r)
{
int *captures;
+ u_char *p;
njs_ret_t ret;
+ const u_char *end;
njs_regexp_pattern_t *pattern;
- njs_string_replace_part_t *part;
+ njs_string_replace_part_t *part, rep_part;
pattern = args[1].data.u.regexp->pattern;
+ part = r->parts.start;
+ rep_part = part[1];
+
+ end = r->part[0].start + r->part[0].size;
+
do {
ret = njs_regexp_match(vm, &pattern->regex[r->type],
r->part[0].start, r->part[0].size,
r->match_data);
-
if (ret >= 0) {
captures = nxt_regex_captures(r->match_data);
@@ -3082,25 +3088,42 @@ njs_string_replace_regexp(njs_vm_t *vm,
r->part -= 2;
}
- r->part[2].start = r->part[0].start + captures[1];
- r->part[2].size = r->part[0].size - captures[1];
- njs_set_invalid(&r->part[2].value);
-
- if (r->function != NULL) {
- return njs_string_replace_regexp_function(vm, args, r,
- captures, ret);
+ if (captures[1] == 0) {
+ p = (u_char *) nxt_utf8_next(r->part[0].start, end);
+
+ r->part[1].start = r->part[0].start;
+ r->part[2].start = p;
+
+ if (r->part[0].start < end) {
+ r->part[1].size = p - r->part[0].start;
+ r->part[2].size = end - p;
+
+ } else {
+ r->part[1].size = 0;
+ r->part[2].size = 0;
+ }
+
+ r->part[0] = rep_part;
+
+ } else {
+ r->part[2].start = r->part[0].start + captures[1];
+ r->part[2].size = r->part[0].size - captures[1];
+ njs_set_invalid(&r->part[2].value);
+
+ if (r->function != NULL) {
+ return njs_string_replace_regexp_function(vm, args, r,
+ captures, ret);
+ }
+
+ r->part[0].size = captures[0];
+
+ r->part[1] = rep_part;
}
- r->part[0].size = captures[0];
-
if (!pattern->global) {
return njs_string_replace_regexp_join(vm, r);
}
- /* A literal replacement is stored in the second part. */
- part = r->parts.start;
- r->part[1] = part[1];
-
r->part += 2;
}
@@ -3111,7 +3134,7 @@ njs_string_replace_regexp(njs_vm_t *vm,
return NXT_ERROR;
}
- } while (r->part[0].size > 0);
+ } while (end >= r->part[0].start);
if (r->part != r->parts.start) {
return njs_string_replace_regexp_join(vm, r);
@@ -3428,10 +3451,15 @@ static njs_ret_t
njs_string_replace_substitute(njs_vm_t *vm, njs_string_replace_t *r,
int *captures)
{
+ int *cur_capture;
uint32_t i, n, last;
+ const u_char *end;
njs_string_subst_t *s;
njs_string_replace_part_t *part, *subject;
+ cur_capture = NULL;
+
+ end = r->part[0].start + r->part[0].size;
last = r->substitutions->items;
part = nxt_array_add_multiple(&r->parts, &njs_array_mem_proto, vm->mem_pool,
@@ -3442,8 +3470,23 @@ njs_string_replace_substitute(njs_vm_t *
r->part = &part[-1];
- part[last].start = r->part[0].start + captures[1];
- part[last].size = r->part[0].size - captures[1];
+ if (captures[1] == 0) {
+ captures[1] = 1;
+
+ part[last].start = r->part[0].start + 1;
+
+ if (r->part[0].size != 0) {
+ part[last].size = r->part[0].size - 1;
+
+ } else {
+ part[last].size = 0;
+ }
+
+ } else {
+ part[last].start = r->part[0].start + captures[1];
+ part[last].size = r->part[0].size - captures[1];
+ }
+
njs_set_invalid(&part[last].value);
r->part[0].size = captures[0];
@@ -3479,6 +3522,37 @@ njs_string_replace_substitute(njs_vm_t *
* "$&" is the same as "$0", the "$0" however is not supported.
*/
default:
+ if (captures[n] == captures[n + 1]) {
+
+ if (captures[n - 1] == captures[n]) {
+ part->start = r->part[0].start;
+ part->size = 0;
+ break;
+ }
+
+ if (cur_capture != NULL) {
+ goto next_part;
+ }
+
+ cur_capture = &captures[n];
+ continue;
+ }
+
+ if (cur_capture != NULL) {
+next_part:
+ part->start = r->part[0].start + *cur_capture;
+
+ if (part->start < end) {
+ part->size = nxt_utf8_next(part->start, end) - part->start;
+
+ } else {
+ part->size = 0;
+ }
+
+ cur_capture = NULL;
+ break;
+ }
+
part->start = r->part[0].start + captures[n];
part->size = captures[n + 1] - captures[n];
break;
@@ -3488,6 +3562,20 @@ njs_string_replace_substitute(njs_vm_t *
part++;
}
+ if (cur_capture != NULL) {
+ part->start = r->part[0].start + *cur_capture;
+
+ if (part->start < end) {
+ part->size = nxt_utf8_next(part->start, end) - part->start;
+
+ } else {
+ part->size = 0;
+ }
+
+ njs_set_invalid(&part->value);
+ part++;
+ }
+
r->part = part;
return NXT_OK;
diff -r 96c78f35a809 -r 31caf5d422b9 njs/test/njs_unit_test.c
--- a/njs/test/njs_unit_test.c Mon May 06 21:24:31 2019 +0300
+++ b/njs/test/njs_unit_test.c Wed May 08 17:05:05 2019 +0300
@@ -5404,6 +5404,18 @@ static njs_unit_test_t njs_test[] =
{ nxt_string("('β' + 'α'.repeat(33)+'β').replace(/(α+)(β+)/, function(m, p1) { return p1[32]; })"),
nxt_string("βα") },
+ { nxt_string("'abc'.replace(/(h*)(z*)(g*)/g, '$1nn$2zz$3')"),
+ nxt_string("nnzzannzzbnnzzcnnzz") },
+
+ { nxt_string("'abc'.replace(/(h*)(z*)/g, '$1nn$2zz$3yy')"),
+ nxt_string("nnzz$3yyannzz$3yybnnzz$3yycnnzz$3yy") },
+
+ { nxt_string("'aabbccaa'.replace(/a*/g, '')"),
+ nxt_string("bbcc") },
+
+ { nxt_string("''.replace(/a*/g, '')"),
+ nxt_string("") },
+
{ nxt_string("'abc'.match(/a*/g)"),
nxt_string("a,,,") },
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment