From b2d0bd0da4ab023bb5f9b59bfccebd2e09b14f07 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Wed, 19 Sep 2018 12:58:54 +0100 Subject: [PATCH] tree234.c: new search234() system. This is a thing I've been meaning to set up for a while: it's a pull-based search system (that is, the caller takes each step of the search manually, rather than providing a callback), which lets the caller inspect every step of the search, including the index of each candidate element in the tree. This allows flexible kinds of query that play the element and its index off against each other. I've also rewritten the existing findrelpos234() search function using the new one as a primitive, because that simplifies it a lot! --- tree234.c | 300 ++++++++++++++++++++++++++++++++++++++---------------- tree234.h | 35 +++++++ 2 files changed, 246 insertions(+), 89 deletions(-) diff --git a/tree234.c b/tree234.c index 6c826309..ea8ff9ee 100644 --- a/tree234.c +++ b/tree234.c @@ -107,6 +107,18 @@ static int countnode234(node234 * n) return count; } +/* + * Internal function to return the number of elements in a node. + */ +static int elements234(node234 *n) +{ + int i; + for (i = 0; i < 3; i++) + if (!n->elems[i]) + break; + return i; +} + /* * Count the elements in a tree. */ @@ -514,99 +526,66 @@ void *index234(tree234 * t, int index) void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp, int relation, int *index) { - node234 *n; - void *ret; - int c; - int idx, ecount, kcount, cmpret; + search234_state ss; + int reldir = (relation == REL234_LT || relation == REL234_LE ? -1 : + relation == REL234_GT || relation == REL234_GE ? +1 : 0); + int equal_permitted = (relation != REL234_LT && relation != REL234_GT); + void *toret; - if (t->root == NULL) - return NULL; + /* Only LT / GT relations are permitted with a null query element. */ + assert(!(equal_permitted && !e)); if (cmp == NULL) cmp = t->cmp; - n = t->root; - /* - * Attempt to find the element itself. - */ - idx = 0; - ecount = -1; - /* - * Prepare a fake `cmp' result if e is NULL. - */ - cmpret = 0; - if (e == NULL) { - assert(relation == REL234_LT || relation == REL234_GT); - if (relation == REL234_LT) - cmpret = +1; /* e is a max: always greater */ - else if (relation == REL234_GT) - cmpret = -1; /* e is a min: always smaller */ - } - while (1) { - for (kcount = 0; kcount < 4; kcount++) { - if (kcount >= 3 || n->elems[kcount] == NULL || - (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) { - break; - } - if (n->kids[kcount]) - idx += n->counts[kcount]; - if (c == 0) { - ecount = kcount; - break; - } - idx++; - } - if (ecount >= 0) - break; - if (n->kids[kcount]) - n = n->kids[kcount]; - else - break; + search234_start(&ss, t); + while (ss.element) { + int cmpret; + + if (e) { + cmpret = cmp(e, ss.element); + } else { + cmpret = -reldir; /* invent a fixed compare result */ + } + + if (cmpret == 0) { + /* + * We've found an element that compares exactly equal to + * the query element. + */ + if (equal_permitted) { + /* If our search relation permits equality, we've + * finished already. */ + if (index) + *index = ss.index; + return ss.element; + } else { + /* Otherwise, pretend this element was slightly too + * big/small, according to the direction of search. */ + cmpret = reldir; + } + } + + search234_step(&ss, cmpret); } - if (ecount >= 0) { - /* - * We have found the element we're looking for. It's - * n->elems[ecount], at tree index idx. If our search - * relation is EQ, LE or GE we can now go home. - */ - if (relation != REL234_LT && relation != REL234_GT) { - if (index) - *index = idx; - return n->elems[ecount]; - } + /* + * No element compares equal to the one we were after, but + * ss.index indicates the index that element would have if it were + * inserted. + * + * So if our search relation is EQ, we must simply return failure. + */ + if (relation == REL234_EQ) + return NULL; - /* - * Otherwise, we'll do an indexed lookup for the previous - * or next element. (It would be perfectly possible to - * implement these search types in a non-counted tree by - * going back up from where we are, but far more fiddly.) - */ - if (relation == REL234_LT) - idx--; - else - idx++; - } else { - /* - * We've found our way to the bottom of the tree and we - * know where we would insert this node if we wanted to: - * we'd put it in in place of the (empty) subtree - * n->kids[kcount], and it would have index idx - * - * But the actual element isn't there. So if our search - * relation is EQ, we're doomed. - */ - if (relation == REL234_EQ) - return NULL; - - /* - * Otherwise, we must do an index lookup for index idx-1 - * (if we're going left - LE or LT) or index idx (if we're - * going right - GE or GT). - */ - if (relation == REL234_LT || relation == REL234_LE) { - idx--; - } + /* + * Otherwise, we must do an index lookup for the previous index + * (if we're going left - LE or LT) or this index (if we're going + * right - GE or GT). + */ + if (relation == REL234_LT || relation == REL234_LE) { + ss.index--; } /* @@ -614,10 +593,10 @@ void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp, * to do the rest. This will return NULL if the index is out of * bounds, which is exactly what we want. */ - ret = index234(t, idx); - if (ret && index) - *index = idx; - return ret; + toret = index234(t, ss.index); + if (toret && index) + *index = ss.index; + return toret; } void *find234(tree234 * t, void *e, cmpfn234 cmp) { @@ -632,6 +611,80 @@ void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index) return findrelpos234(t, e, cmp, REL234_EQ, index); } +void search234_start(search234_state *state, tree234 *t) +{ + state->_node = t->root; + state->_base = 0; /* index of first element in this node's subtree */ + state->_last = -1; /* indicate that this node is not previously visted */ + search234_step(state, 0); +} +void search234_step(search234_state *state, int direction) +{ + node234 *node = state->_node; + int i; + + if (!node) { + state->element = NULL; + state->index = 0; + return; + } + + if (state->_last != -1) { + /* + * We're already pointing at some element of a node, so we + * should restrict to the elements left or right of it, + * depending on the requested search direction. + */ + assert(direction); + assert(node); + + if (direction > 0) { + state->_lo = state->_last + 1; + direction = +1; + } else { + state->_hi = state->_last - 1; + direction = -1; + } + + if (state->_lo > state->_hi) { + /* + * We've run out of elements in this node, i.e. we've + * narrowed to nothing but a child pointer. Descend to + * that child, and update _base to the leftmost index of + * its subtree. + */ + for (i = 0; i < state->_lo; i++) + state->_base += 1 + node->counts[i]; + state->_node = node = node->kids[state->_lo]; + state->_last = -1; + } + } + + if (state->_last == -1) { + /* + * We've just entered a new node - either because of the above + * code, or because we were called from search234_start - and + * anything in that node is a viable answer. + */ + state->_lo = 0; + state->_hi = node ? elements234(node)-1 : 0; + } + + /* + * Now we've got something we can return. + */ + if (!node) { + state->element = NULL; + state->index = state->_base; + } else { + state->_last = (state->_lo + state->_hi) / 2; + state->element = node->elems[state->_last]; + state->index = state->_base + state->_last; + for (i = 0; i <= state->_last; i++) + state->index += node->counts[i]; + } +} + /* * Delete an element e in a 2-3-4 tree. Does not free the element, * merely removes all links to it from the tree nodes. @@ -1418,6 +1471,73 @@ int findtest(void) } } +void searchtest_recurse(search234_state ss, int lo, int hi, + char **expected, char *directionbuf, + char *directionptr) +{ + *directionptr = '\0'; + + if (!ss.element) { + if (lo != hi) { + error("search234(%s) gave NULL for non-empty interval [%d,%d)", + directionbuf, lo, hi); + } else if (ss.index != lo) { + error("search234(%s) gave index %d should be %d", + directionbuf, ss.index, lo); + } else { + printf("%*ssearch234(%s) gave NULL,%d\n", + (int)(directionptr-directionbuf) * 2, "", directionbuf, + ss.index); + } + } else if (lo == hi) { + error("search234(%s) gave %s for empty interval [%d,%d)", + directionbuf, (char *)ss.element, lo, hi); + } else if (ss.element != expected[ss.index]) { + error("search234(%s) gave element %s should be %s", + directionbuf, (char *)ss.element, expected[ss.index]); + } else if (ss.index < lo || ss.index >= hi) { + error("search234(%s) gave index %d should be in [%d,%d)", + directionbuf, ss.index, lo, hi); + return; + } else { + search234_state next; + + printf("%*ssearch234(%s) gave %s,%d\n", + (int)(directionptr-directionbuf) * 2, "", directionbuf, + (char *)ss.element, ss.index); + + next = ss; + search234_step(&next, -1); + *directionptr = '-'; + searchtest_recurse(next, lo, ss.index, + expected, directionbuf, directionptr+1); + + next = ss; + search234_step(&next, +1); + *directionptr = '+'; + searchtest_recurse(next, ss.index+1, hi, + expected, directionbuf, directionptr+1); + } +} + +void searchtest(void) +{ + char *expected[NSTR], *p; + char directionbuf[NSTR * 10]; + int n; + search234_state ss; + + printf("beginning searchtest:"); + for (n = 0; (p = index234(tree, n)) != NULL; n++) { + expected[n] = p; + printf(" %d=%s", n, p); + } + printf(" count=%d\n", n); + + search234_start(&ss, tree); + searchtest_recurse(ss, 0, n, expected, directionbuf, directionbuf); +} + int main(void) { int in[NSTR]; @@ -1432,6 +1552,7 @@ int main(void) cmp = mycmp; verify(); + searchtest(); for (i = 0; i < 10000; i++) { j = randomnumber(&seed); j %= NSTR; @@ -1446,6 +1567,7 @@ int main(void) in[j] = 1; } findtest(); + searchtest(); } while (arraylen > 0) { diff --git a/tree234.h b/tree234.h index ba743087..55cbe360 100644 --- a/tree234.h +++ b/tree234.h @@ -132,6 +132,41 @@ void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index); void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp, int relation, int *index); +/* + * A more general search type still. Use search234_start() to + * initialise one of these state structures; it will fill in + * state->element with an element of the tree, and state->index with + * the index of that element. If you don't like that element, call + * search234_step, with direction == -1 if you want an element earlier + * in the tree, or +1 if you want a later one. + * + * If either function returns state->element == NULL, then you've + * narrowed the search to a point between two adjacent elements, so + * there are no further elements left to return consistent with the + * constraints you've imposed. In this case, state->index tells you + * how many elements come before the point you narrowed down to. After + * this, you mustn't call search234_step again (unless the state + * structure is first reinitialised). + * + * The use of this search system is that you get both the candidate + * element _and_ its index at every stage, so you can use both of them + * to make your decision. Also, you can remember element pointers from + * earlier in the search. + * + * The fields beginning with underscores are private to the + * implementation, and only exposed so that clients can know how much + * space to allocate for the structure as a whole. Don't modify them. + * (Except that it's safe to copy the whole structure.) + */ +typedef struct search234_state { + void *element; + int index; + int _lo, _hi, _last, _base; + void *_node; +} search234_state; +void search234_start(search234_state *state, tree234 *t); +void search234_step(search234_state *state, int direction); + /* * Delete an element e in a 2-3-4 tree. Does not free the element, * merely removes all links to it from the tree nodes.