Last active
December 21, 2015 16:59
-
-
Save dobrokot/6337339 to your computer and use it in GitHub Desktop.
faster state sets merge in grep utility, dfa.c (avoid O(N^2) algorithms)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Index: dfa.c | |
=================================================================== | |
--- dfa.c (revision grep-2.14-faster-dfa) | |
+++ dfa.c (revision grep-2.14) | |
@@ -1992,6 +1992,133 @@ | |
s->elems[i] = s->elems[i + 1]; | |
} | |
+/* Merge implementation of sorted sets, then number of sets smore than 2. | |
+ Priority queue aka 'heap' data structure used to select maximum of | |
+ many elements */ | |
+ | |
+typedef struct { | |
+ position p; | |
+ size_t set_index; /* Remember which set this element was taken from */ | |
+} heap_elem; | |
+ | |
+ | |
+static void | |
+heap_add (heap_elem *heap, size_t *heap_size, heap_elem e) | |
+{ | |
+ size_t i = *heap_size; | |
+ while (i) | |
+ { | |
+ size_t parent = (i-1) / 2; | |
+ if (e.p.index < heap[parent].p.index) | |
+ break; | |
+ | |
+ heap[i] = heap[parent]; | |
+ i = parent; | |
+ } | |
+ heap[i] = e; | |
+ *heap_size += 1; | |
+} | |
+ | |
+static void | |
+heap_remove_top_and_add (heap_elem *heap, size_t heap_size, heap_elem e) | |
+{ | |
+ size_t i = 0; | |
+ size_t lchild = 2*i + 1; | |
+ size_t n = heap_size; | |
+ while (lchild < n) | |
+ { | |
+ size_t rchild = lchild + 1; | |
+ size_t nextchild = lchild; | |
+ if (rchild < n && heap[lchild].p.index < heap[rchild].p.index) | |
+ nextchild = rchild; | |
+ if (e.p.index >= heap[nextchild].p.index) | |
+ break; | |
+ heap[i] = heap[nextchild]; | |
+ i = nextchild; | |
+ lchild = 2*i + 1; | |
+ } | |
+ heap[i] = e; | |
+} | |
+ | |
+ | |
+static void | |
+heap_remove_top (heap_elem *heap, size_t *heap_size) | |
+{ | |
+ *heap_size -= 1; | |
+ if (*heap_size == 0) | |
+ return; | |
+ heap_remove_top_and_add (heap, *heap_size, heap[*heap_size]); | |
+} | |
+ | |
+static void | |
+heap_merge_many_sets (position_set *sets, size_t n, position_set *result) | |
+{ | |
+ size_t heap_size = 0; | |
+ position_set r; /* faster access to local variables */ | |
+ heap_elem *heap = XNMALLOC (n, heap_elem); | |
+ size_t *ids = XNMALLOC (n, size_t); | |
+ size_t sum_of_sizes = 0; | |
+ size_t i; | |
+ | |
+ for (i = 0; i < n; ++i) | |
+ { | |
+ sum_of_sizes += sets[i].nelem; | |
+ /* special value for "end of set", to reduce | |
+ dependent memory reads */ | |
+ ids[i] = ~(size_t)0; | |
+ if (sets[i].nelem) | |
+ { | |
+ heap_elem e = { sets[i].elems[0], i }; | |
+ heap_add (heap, &heap_size, e); | |
+ if (sets[i].nelem > 1) | |
+ ids[i] = 1; | |
+ } | |
+ } | |
+ | |
+ REALLOC_IF_NECESSARY(result->elems, result->alloc, sum_of_sizes); | |
+ r = *result; | |
+ r.nelem = 0; | |
+ | |
+ while (heap_size) | |
+ { | |
+ heap_elem e = heap[0]; | |
+ if (r.nelem && r.elems[r.nelem-1].index == e.p.index) | |
+ r.elems[r.nelem-1].constraint |= e.p.constraint; | |
+ else | |
+ r.elems[r.nelem++] = e.p; | |
+ i = e.set_index; | |
+ if (ids[i] != ~(size_t)0) | |
+ { | |
+ heap_elem e2 = { sets[i].elems[ids[i]], i }; | |
+ heap_remove_top_and_add (heap, heap_size, e2); | |
+ ++ids[i]; | |
+ if (ids[i] == sets[i].nelem) | |
+ ids[i] = ~(size_t)0; | |
+ } | |
+ else | |
+ { | |
+ heap_remove_top (heap, &heap_size); | |
+ } | |
+ } | |
+ *result = r; | |
+ free (ids); | |
+ free (heap); | |
+} | |
+ | |
+static void | |
+merge_many_sets (position_set *sets, size_t n, position_set *result) | |
+{ | |
+ /* optimized solutions for special/trivial cases */ | |
+ if (n == 0) | |
+ result->nelem = 0; | |
+ else if (n == 1) | |
+ copy (&sets[0], result); | |
+ else if (n == 2) | |
+ merge (&sets[0], &sets[1], result); | |
+ else | |
+ heap_merge_many_sets(sets, n, result); | |
+} | |
+ | |
/* Find the index of the state corresponding to the given position set with | |
the given preceding context, or create a new state if there is no such | |
state. Context tells whether we got here on a newline or letter. */ | |
@@ -2482,13 +2609,16 @@ | |
charclass leftovers; /* Stuff in the label that didn't match. */ | |
int leftoversf; /* True if leftovers is nonempty. */ | |
position_set follows; /* Union of the follows of some group. */ | |
- position_set tmp; /* Temporary space for merging sets. */ | |
+ position_set follows_tmp; /* Temporary space for merging sets. */ | |
int possible_contexts; /* Contexts that this group can match. */ | |
int separate_contexts; /* Context that new state wants to know. */ | |
state_num state; /* New state. */ | |
state_num state_newline; /* New state on a newline transition. */ | |
state_num state_letter; /* New state on a letter transition. */ | |
int next_isnt_1st_byte = 0; /* Flag if we can't add state0. */ | |
+ position_set *sets = 0; /* Temporary array of sets, to pass many sets to merge function */ | |
+ size_t sets_n = 0; /* Sets array size */ | |
+ size_t sets_alloc = 0; /* Sets array capacity */ | |
size_t i, j, k; | |
MALLOC (grps, NOTCHAR); | |
@@ -2607,7 +2737,7 @@ | |
} | |
alloc_position_set (&follows, d->nleaves); | |
- alloc_position_set (&tmp, d->nleaves); | |
+ alloc_position_set (&follows_tmp, d->nleaves); | |
/* If we are a searching matcher, the default transition is to a state | |
containing the positions of state 0, otherwise the default transition | |
@@ -2637,13 +2767,12 @@ | |
for (i = 0; i < ngrps; ++i) | |
{ | |
- follows.nelem = 0; | |
- | |
- /* Find the union of the follows of the positions of the group. | |
- This is a hideously inefficient loop. Fix it someday. */ | |
+ /* Find the union of the follows of the positions of the group. */ | |
+ sets_n = grps[i].nelem; | |
+ REALLOC_IF_NECESSARY(sets, sets_alloc, sets_n); | |
for (j = 0; j < grps[i].nelem; ++j) | |
- for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k) | |
- insert (d->follows[grps[i].elems[j]].elems[k], &follows); | |
+ sets[j] = d->follows[grps[i].elems[j]]; | |
+ merge_many_sets (sets, sets_n, &follows_tmp); | |
if (d->mb_cur_max > 1) | |
{ | |
@@ -2666,9 +2795,9 @@ | |
<mb A>, so we cannot add state[0]. */ | |
next_isnt_1st_byte = 0; | |
- for (j = 0; j < follows.nelem; ++j) | |
+ for (j = 0; j < follows_tmp.nelem; ++j) | |
{ | |
- if (!(d->multibyte_prop[follows.elems[j].index] & 1)) | |
+ if (!(d->multibyte_prop[follows_tmp.elems[j].index] & 1)) | |
{ | |
next_isnt_1st_byte = 1; | |
break; | |
@@ -2680,8 +2809,7 @@ | |
of state 0 as well. */ | |
if (d->searchflag | |
&& (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte))) | |
- for (j = 0; j < d->states[0].elems.nelem; ++j) | |
- insert (d->states[0].elems.elems[j], &follows); | |
+ merge (&follows_tmp, &d->states[0].elems, &follows); | |
/* Find out if the new state will want any context information. */ | |
possible_contexts = charclass_context (labels[i]); | |
@@ -2720,7 +2848,8 @@ | |
for (i = 0; i < ngrps; ++i) | |
free (grps[i].elems); | |
free (follows.elems); | |
- free (tmp.elems); | |
+ free (sets); | |
+ free (follows_tmp.elems); | |
free (grps); | |
free (labels); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment