Merge pull request #139 from Shopify/moar_c_cursor

More C cursor improvements
pull/34/head
Ben Johnson 2014-04-23 12:05:53 -06:00
commit 5524825919
2 changed files with 171 additions and 44 deletions

View File

@ -12,7 +12,7 @@ package c
//------------------------------------------------------------------------------
// This represents the maximum number of levels that a cursor can traverse.
#define MAX_DEPTH 100
#define MAX_DEPTH 64
// These flags mark the type of page and are set in the page.flags.
#define PAGE_BRANCH 0x01
@ -88,8 +88,6 @@ elem_ref *cursor_current(bolt_cursor *c);
elem_ref *cursor_pop(bolt_cursor *c);
void cursor_first_leaf(bolt_cursor *c);
void cursor_key_value(bolt_cursor *c, bolt_val *key, bolt_val *value, uint32_t *flags);
void cursor_search(bolt_cursor *c, bolt_val key, pgid id);
@ -116,7 +114,6 @@ void bolt_cursor_first(bolt_cursor *c, bolt_val *key, bolt_val *value, uint32_t
elem_ref *ref = cursor_push(c, c->root);
// Find first leaf and return key/value.
cursor_first_leaf(c);
cursor_key_value(c, key, value, flags);
}
@ -133,16 +130,7 @@ void bolt_cursor_next(bolt_cursor *c, bolt_val *key, bolt_val *value, uint32_t *
cursor_pop(c);
};
// If we are at the top of the stack then return a blank key/value pair.
if (ref == NULL) {
key->size = value->size = 0;
key->data = value->data = NULL;
*flags = 0;
return;
};
// Find first leaf and return key/value.
cursor_first_leaf(c);
cursor_key_value(c, key, value, flags);
}
@ -153,18 +141,8 @@ void bolt_cursor_seek(bolt_cursor *c, bolt_val seek, bolt_val *key, bolt_val *va
// Start from root page/node and traverse to correct page.
cursor_push(c, c->root);
if (seek.size > 0) cursor_search(c, seek, c->root);
elem_ref *ref = cursor_current(c);
// If the cursor is pointing to the end of page then return nil.
if (ref == NULL) {
key->size = value->size = 0;
key->data = value->data = NULL;
*flags = 0;
return;
};
// Find first leaf and return key/value.
cursor_first_leaf(c);
cursor_key_value(c, key, value, flags);
}
@ -195,6 +173,7 @@ elem_ref *cursor_current(bolt_cursor *c) {
}
// Pop current element ref off the cursor stack
// If stack is empty return null
elem_ref *cursor_pop(bolt_cursor *c) {
elem_ref *ref = cursor_current(c);
if (ref != NULL) c->top--;
@ -216,6 +195,21 @@ leaf_element *page_leaf_element(page *p, uint16_t index) {
// Returns the key/value pair for the current position of the cursor.
void cursor_key_value(bolt_cursor *c, bolt_val *key, bolt_val *value, uint32_t *flags) {
elem_ref *ref = cursor_current(c);
// If stack or current page is empty return null.
if (ref == NULL || ref->page->count == 0) {
key->size = value->size = 0;
key->data = value->data = NULL;
*flags = 0;
return;
};
// Descend to the current leaf page if we're on branch page.
while (ref->page->flags & PAGE_BRANCH) {
branch_element *elem = page_branch_element(ref->page,ref->index);
ref = cursor_push(c, elem->pgid);
};
leaf_element *elem = page_leaf_element(ref->page,ref->index);
// Assign key pointer.
@ -230,20 +224,14 @@ void cursor_key_value(bolt_cursor *c, bolt_val *key, bolt_val *value, uint32_t *
*flags = elem->flags;
}
// Traverses from the current stack position down to the first leaf element.
void cursor_first_leaf(bolt_cursor *c) {
elem_ref *ref = cursor_current(c);
while (ref->page->flags & PAGE_BRANCH) {
branch_element *elem = page_branch_element(ref->page,ref->index);
ref = cursor_push(c, elem->pgid);
};
}
// Recursively performs a binary search against a given page/node until it finds a given key.
void cursor_search(bolt_cursor *c, bolt_val key, pgid id) {
// Push page onto the cursor stack.
elem_ref *ref = cursor_push(c, id);
// int len = key.size > 10 ? 10 : key.size;
// printf("\npage=%d, depth=%d, seek=...%.*s[%d]", (int)id, c->top, len, ((char*)(key.data)) + key.size - len, key.size);
// If we're on a leaf page/node then find the specific node.
if (ref->page->flags & PAGE_LEAF) {
cursor_search_leaf(c, key);
@ -265,9 +253,9 @@ void cursor_search_leaf(bolt_cursor *c, bolt_val key) {
leaf_element *elem = &elems[i];
int rc = memcmp(key.data, ((void*)elem) + elem->pos, (elem->ksize < key.size ? elem->ksize : key.size));
// printf("? %.*s | %.*s\n", key.size, key.data, elem->ksize, ((void*)elem) + elem->pos);
// printf("rc=%d; key.size(%d) >= elem->ksize(%d)\n", rc, key.size, elem->ksize);
if ((rc == 0 && key.size >= elem->ksize) || rc < 0) {
// int len = key.size > 10 ? 10 : key.size;
// printf("\n?L rc=%d; elem=...%.*s[%d]", rc, len, ((char*)elem) + elem->pos + elem->ksize - len, elem->ksize);
if ((rc == 0 && key.size <= elem->ksize) || rc < 0) {
ref->index = i;
return;
}
@ -288,16 +276,19 @@ void cursor_search_branch(bolt_cursor *c, bolt_val key) {
branch_element *elem = &elems[i];
int rc = memcmp(key.data, ((void*)elem) + elem->pos, (elem->ksize < key.size ? elem->ksize : key.size));
// int len = key.size > 10 ? 10 : key.size;
// printf("\n?B rc=%d; elem=...%.*s[%d]", rc, len, ((char*)elem) + elem->pos + elem->ksize - len, elem->ksize);
if (rc == 0 && key.size == elem->ksize) {
// exact match, done
// Exact match, done.
ref->index = i;
return;
} else if ((rc == 0 && key.size < elem->ksize) || rc < 0) {
// if key is less than anything in this subtree we are done
// If key is less than anything in this subtree we are done.
// This should really only happen for key that's less than anything in the tree.
if (i == 0) return;
// otherwise search the previous subtree
// Otherwise search the previous subtree.
cursor_search(c, key, elems[i-1].pgid);
// didn't find anything greater than key?
// Didn't find anything greater than key?
if (cursor_current(c) == ref)
ref->index = i;
else
@ -306,8 +297,13 @@ void cursor_search_branch(bolt_cursor *c, bolt_val key) {
}
}
// If nothing was greater than the key then pop the current page off the stack.
cursor_pop(c);
// If nothing was greater than the key then search the last child.
cursor_search(c, key, elems[ref->page->count-1].pgid);
// If still didn't find anything greater than key, then pop the page off the stack.
if (cursor_current(c) == ref)
cursor_pop(c);
else
ref->index = ref->page->count-1;
}
*/
@ -341,6 +337,9 @@ func (c *Cursor) First() (key, value []byte) {
var k, v C.bolt_val
var flags C.uint32_t
C.bolt_cursor_first(c.C, &k, &v, &flags)
if k.data == nil {
return nil, nil
}
return C.GoBytes(k.data, C.int(k.size)), C.GoBytes(v.data, C.int(v.size))
}
@ -350,6 +349,9 @@ func (c *Cursor) Next() (key, value []byte) {
var k, v C.bolt_val
var flags C.uint32_t
C.bolt_cursor_next(c.C, &k, &v, &flags)
if k.data == nil {
return nil, nil
}
return C.GoBytes(k.data, C.int(k.size)), C.GoBytes(v.data, C.int(v.size))
}
@ -364,8 +366,6 @@ func (c *Cursor) Seek(seek []byte) (key, value []byte, flags int) {
_seek.data = unsafe.Pointer(&seek[0])
}
C.bolt_cursor_seek(c.C, _seek, &k, &v, &_flags)
//fmt.Printf("Key %v [%v]\n", k.data, k.size)
//fmt.Printf("Value %v [%v]\n", k.data, k.size)
if k.data == nil {
return nil, nil, 0
}

View File

@ -11,7 +11,7 @@ import (
"github.com/stretchr/testify/assert"
)
// Ensure that the C cursor can
// Ensure that the C cursor can seek to first element.
func TestCursor_First(t *testing.T) {
withDB(func(db *bolt.DB) {
db.Update(func(tx *bolt.Tx) error {
@ -28,6 +28,30 @@ func TestCursor_First(t *testing.T) {
})
}
// Ensure that a C cursor handles empty bucket properly
func TestCursor_Empty(t *testing.T) {
withDB(func(db *bolt.DB) {
db.Update(func(tx *bolt.Tx) error {
tx.CreateBucket([]byte("widgets"))
return nil
})
db.View(func(tx *bolt.Tx) error {
c := NewCursor(tx.Bucket([]byte("widgets")))
key, value := c.First()
assert.Nil(t, key)
assert.Nil(t, value)
key, value = c.Next()
assert.Nil(t, key)
assert.Nil(t, value)
key, value, flags := c.Seek([]byte("bar"))
assert.Nil(t, key)
assert.Nil(t, value)
assert.Equal(t, 0, flags)
return nil
})
})
}
// Ensure that a C cursor can seek to the appropriate keys.
func TestCursor_Seek(t *testing.T) {
withDB(func(db *bolt.DB) {
@ -56,6 +80,18 @@ func TestCursor_Seek(t *testing.T) {
assert.Equal(t, "0003", string(v))
assert.Equal(t, 0, flags)
// Inexact match with smaller db key should go to the next key.
k, v, flags = c.Seek([]byte("barrrr"))
assert.Equal(t, "baz", string(k))
assert.Equal(t, "0003", string(v))
assert.Equal(t, 0, flags)
// Inexact match with smaller seek key should go to the next key.
k, v, flags = c.Seek([]byte("ba"))
assert.Equal(t, "bar", string(k))
assert.Equal(t, "0002", string(v))
assert.Equal(t, 0, flags)
// Low key should go to the first key.
k, v, flags = c.Seek([]byte(""))
assert.Equal(t, "bar", string(k))
@ -140,6 +176,40 @@ func TestCursor_Iterate_Large(t *testing.T) {
})
}
// Ensure that a C cursor can iterate over branches and leafs.
func TestCursor_Iterate_Deep(t *testing.T) {
withDB(func(db *bolt.DB) {
pgsz := db.Info().PageSize / 10
assert.True(t, pgsz > 100)
db.Update(func(tx *bolt.Tx) error {
b, _ := tx.CreateBucket([]byte("widgets"))
for i := 0; i < 1000; i++ {
kv := []byte(fmt.Sprintf("%0*d", pgsz, i))
b.Put(kv, kv)
}
return nil
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("widgets"))
s := b.Stats()
assert.True(t, s.Depth > 3)
var index int
c := NewCursor(b)
for k, v := c.First(); len(k) > 0; k, v = c.Next() {
kv := fmt.Sprintf("%0*d", pgsz, index)
assert.Equal(t, kv, string(k))
assert.Equal(t, kv, string(v))
index++
}
assert.Equal(t, 1000, index)
k, _ := c.Next()
assert.Nil(t, k)
return nil
})
})
}
// Ensure that a C cursor can seek over branches and leafs.
func TestCursor_Seek_Large(t *testing.T) {
withDB(func(db *bolt.DB) {
@ -178,6 +248,63 @@ func TestCursor_Seek_Large(t *testing.T) {
})
}
// Ensure that a C cursor can seek over branches and leafs.
func TestCursor_Seek_Deep(t *testing.T) {
withDB(func(db *bolt.DB) {
pgsz := db.Info().PageSize / 10
assert.True(t, pgsz > 100)
db.Update(func(tx *bolt.Tx) error {
b, _ := tx.CreateBucket([]byte("widgets"))
for i := 1; i < 1000; i++ {
kv := []byte(fmt.Sprintf("%0*d", pgsz, i*10))
b.Put(kv, kv)
}
return nil
})
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("widgets"))
s := b.Stats()
assert.True(t, s.Depth > 3)
c := NewCursor(b)
// Exact match should go to the key.
seek := fmt.Sprintf("%0*d", pgsz, 5000)
k, v, _ := c.Seek([]byte(seek))
assert.Equal(t, seek, string(k))
assert.Equal(t, seek, string(v))
// Inexact match should go to the next key.
seek = fmt.Sprintf("%0*d", pgsz, 7495)
found := fmt.Sprintf("%0*d", pgsz, 7500)
k, v, _ = c.Seek([]byte(seek))
assert.Equal(t, found, string(k))
assert.Equal(t, found, string(v))
// Low key should go to the first key.
seek = fmt.Sprintf("%0*d", pgsz, 0)
found = fmt.Sprintf("%0*d", pgsz, 10)
k, v, _ = c.Seek([]byte(seek))
assert.Equal(t, found, string(k))
assert.Equal(t, found, string(v))
// High key should return no key.
seek = fmt.Sprintf("%0*d", pgsz, 40000)
k, v, _ = c.Seek([]byte(seek))
assert.Equal(t, "", string(k))
assert.Equal(t, "", string(v))
// Exact match in the middle of a branch page.
seek = fmt.Sprintf("%0*d", pgsz, 4170)
k, v, _ = c.Seek([]byte(seek))
assert.Equal(t, seek, string(k))
assert.Equal(t, seek, string(v))
return nil
})
})
}
// tempfile returns a temporary path.
func tempfile() string {
f, _ := ioutil.TempFile("", "bolt-c-")