Created
August 25, 2013 00:56
-
-
Save dobrokot/6331243 to your computer and use it in GitHub Desktop.
faster state sets merge in grep utility, dfa.c (avoid O(N^2) algorithms)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- dfa.c.back 2013-08-05 16:53:47.824514544 +0400 | |
+++ dfa.c 2013-08-25 04:23:18.783885918 +0400 | |
@@ -1992,6 +1992,115 @@ | |
s->elems[i] = s->elems[i + 1]; | |
} | |
+/* merge implementation of sorted sets, then number of sets smore than 2 | |
+ priority queue aka 'heap' data structure used to select maximum of | |
+ many elements */ | |
+ | |
+typedef struct { | |
+ position p; | |
+ size_t set_index; /* remember which set this element was taken from */ | |
+} heap_elem; | |
+ | |
+ | |
+static void | |
+heap_add (heap_elem *heap, size_t *heap_size, heap_elem e) | |
+{ | |
+ size_t i = *heap_size; | |
+ while (i) { | |
+ size_t parent = (i-1) / 2; | |
+ if (e.p.index < heap[parent].p.index) | |
+ break; | |
+ | |
+ heap[i] = heap[parent]; | |
+ i = parent; | |
+ } | |
+ heap[i] = e; | |
+ *heap_size += 1; | |
+} | |
+ | |
+static void | |
+heap_remove_top_and_add (heap_elem *heap, size_t heap_size, heap_elem e) | |
+{ | |
+ size_t i = 0; | |
+ size_t lchild = 2*i + 1; | |
+ size_t n = heap_size; | |
+ while (lchild < n) { | |
+ size_t rchild = lchild + 1; | |
+ size_t nextchild = lchild; | |
+ if (rchild < n && heap[lchild].p.index < heap[rchild].p.index) | |
+ nextchild = rchild; | |
+ if (e.p.index >= heap[nextchild].p.index) | |
+ break; | |
+ heap[i] = heap[nextchild]; | |
+ i = nextchild; | |
+ lchild = 2*i + 1; | |
+ } | |
+ heap[i] = e; | |
+} | |
+ | |
+ | |
+static void | |
+heap_remove_top (heap_elem *heap, size_t *heap_size) | |
+{ | |
+ *heap_size -= 1; | |
+ if (*heap_size == 0) | |
+ return; | |
+ heap_remove_top_and_add(heap, *heap_size, heap[*heap_size]); | |
+} | |
+ | |
+ | |
+static void | |
+merge_many_sets (position_set *sets, size_t n, position_set *result) | |
+{ | |
+ //TODO: special cases for n == 1, and n == 2 | |
+ //heap_elem *heap = (heap_elem*)malloc(n * sizeof(heap_elem)); | |
+ size_t heap_size = 0; | |
+ heap_elem *heap = XNMALLOC(n, heap_elem); | |
+ size_t *ids = XNMALLOC(n, size_t); | |
+ size_t sum_of_sizes = 0; | |
+ size_t i; | |
+ | |
+ for (i = 0; i < n; ++i) { | |
+ sum_of_sizes += sets[i].nelem; | |
+ /* special value for "end of set", to reduce | |
+ dependent memory reads */ | |
+ ids[i] = ~(size_t)0; | |
+ if (sets[i].nelem) { | |
+ heap_elem e = { sets[i].elems[0], i }; | |
+ heap_add(heap, &heap_size, e); | |
+ if (sets[i].nelem > 1) | |
+ ids[i] = 1; | |
+ } | |
+ } | |
+ | |
+ position_set r; | |
+ REALLOC_IF_NECESSARY(r.elems, r.alloc, sum_of_sizes); | |
+ r.nelem = 0; | |
+ | |
+ while (heap_size) { | |
+ heap_elem e = heap[0]; | |
+ if (r.nelem && r.elems[r.nelem-1].index == e.p.index) | |
+ r.elems[r.nelem-1].constraint |= e.p.constraint; | |
+ else { | |
+ r.elems[r.nelem++] = e.p; | |
+ } | |
+ i = e.set_index; | |
+ if (ids[i] != ~(size_t)0) { | |
+ heap_elem e2 = { sets[i].elems[ids[i]], i }; | |
+ heap_remove_top_and_add(heap, heap_size, e2); | |
+ ++ids[i]; | |
+ if (ids[i] == sets[i].nelem) | |
+ ids[i] = ~(size_t)0; | |
+ } | |
+ else { | |
+ heap_remove_top(heap, &heap_size); | |
+ } | |
+ } | |
+ *result = r; | |
+ free(ids); | |
+ free(heap); | |
+} | |
+ | |
/* Find the index of the state corresponding to the given position set with | |
the given preceding context, or create a new state if there is no such | |
state. Context tells whether we got here on a newline or letter. */ | |
@@ -2482,7 +2591,7 @@ | |
charclass leftovers; /* Stuff in the label that didn't match. */ | |
int leftoversf; /* True if leftovers is nonempty. */ | |
position_set follows; /* Union of the follows of some group. */ | |
- position_set tmp; /* Temporary space for merging sets. */ | |
+ position_set follows_tmp; /* Temporary space for merging sets. */ | |
int possible_contexts; /* Contexts that this group can match. */ | |
int separate_contexts; /* Context that new state wants to know. */ | |
state_num state; /* New state. */ | |
@@ -2607,7 +2716,11 @@ | |
} | |
alloc_position_set (&follows, d->nleaves); | |
- alloc_position_set (&tmp, d->nleaves); | |
+ alloc_position_set (&follows_tmp, d->nleaves); | |
+ | |
+ position_set *sets = 0; | |
+ size_t sets_n = 0, sets_alloc = 0; | |
+ | |
/* If we are a searching matcher, the default transition is to a state | |
containing the positions of state 0, otherwise the default transition | |
@@ -2637,13 +2750,12 @@ | |
for (i = 0; i < ngrps; ++i) | |
{ | |
- follows.nelem = 0; | |
- | |
- /* Find the union of the follows of the positions of the group. | |
- This is a hideously inefficient loop. Fix it someday. */ | |
+ /* Find the union of the follows of the positions of the group. */ | |
+ sets_n = grps[i].nelem; | |
+ REALLOC_IF_NECESSARY(sets, sets_alloc, sets_n); | |
for (j = 0; j < grps[i].nelem; ++j) | |
- for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k) | |
- insert (d->follows[grps[i].elems[j]].elems[k], &follows); | |
+ sets[j] = d->follows[grps[i].elems[j]]; | |
+ merge_many_sets(sets, sets_n, &follows_tmp); | |
if (d->mb_cur_max > 1) | |
{ | |
@@ -2666,9 +2778,9 @@ | |
<mb A>, so we cannot add state[0]. */ | |
next_isnt_1st_byte = 0; | |
- for (j = 0; j < follows.nelem; ++j) | |
+ for (j = 0; j < follows_tmp.nelem; ++j) | |
{ | |
- if (!(d->multibyte_prop[follows.elems[j].index] & 1)) | |
+ if (!(d->multibyte_prop[follows_tmp.elems[j].index] & 1)) | |
{ | |
next_isnt_1st_byte = 1; | |
break; | |
@@ -2679,11 +2791,8 @@ | |
/* If we are building a searching matcher, throw in the positions | |
of state 0 as well. */ | |
if (d->searchflag | |
- && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte))) { | |
- for (j = 0; j < d->states[0].elems.nelem; ++j) { | |
- insert(d->states[0].elems.elems[j], &follows); | |
- } | |
- } | |
+ && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte))) | |
+ merge(&follows_tmp, &d->states[0].elems, &follows); | |
/* Find out if the new state will want any context information. */ | |
possible_contexts = charclass_context (labels[i]); | |
@@ -2722,7 +2831,8 @@ | |
for (i = 0; i < ngrps; ++i) | |
free (grps[i].elems); | |
free (follows.elems); | |
- free (tmp.elems); | |
+ free (sets); | |
+ free (follows_tmp.elems); | |
free (grps); | |
free (labels); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment