diff --git a/branch.go b/branch.go new file mode 100644 index 0000000..77cc1ca --- /dev/null +++ b/branch.go @@ -0,0 +1,60 @@ +package bolt + +import ( + "bytes" + "sort" + "unsafe" +) + +// branch represents a temporary in-memory branch page. +type branch struct { + parent *branch + items branchItems +} + +// insert inserts a new item after a given pgid. +func (b *branch) insert(key []byte, previd pgid, id pgid) { + // Find previous insertion index. + index := sort.Search(len(b.items), func(i int) bool { return b.items[i].pgid >= previd }) + + // If there is no existing key then add a new item. + b.items = append(b.items, branchItem{}) + if index < len(b.items) { + copy(b.items[index+1:], b.items[index:]) + } + + b.items[index].pgid = id + b.items[index].key = key +} + +// replace swaps out an existing node id for a new one id. +func (b *branch) replace(oldid pgid, newid pgid, key []byte) { + index := sort.Search(len(b.items), func(i int) bool { return b.items[i].pgid >= oldid }) + b.items[index].pgid = newid + b.items[index].key = key +} + +// read initializes the item data from an on-disk page. +func (b *branch) read(page *page) { + ncount := int(page.count) + b.items = make(branchItems, ncount) + bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&page.ptr)) + for i := 0; i < ncount; i++ { + bnode := &bnodes[i] + item := &b.items[i] + item.key = bnode.key() + } +} + + +type branchItems []branchItem + +type branchItem struct { + pgid pgid + key []byte +} + +func (s branchItems) Len() int { return len(s) } +func (s branchItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s branchItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 } + diff --git a/leaf.go b/leaf.go index 23b35eb..dcd343f 100644 --- a/leaf.go +++ b/leaf.go @@ -9,16 +9,10 @@ import ( // leaf represents a temporary in-memory leaf page. // It is deserialized from an memory-mapped page and is not restricted by page size. type leaf struct { + parent *branch items leafItems } -type leafItems []leafItem - -type leafItem struct { - key []byte - value []byte -} - // put inserts or replaces a key on a leaf page. func (l *leaf) put(key []byte, value []byte) { // Find insertion index. @@ -35,11 +29,20 @@ func (l *leaf) put(key []byte, value []byte) { l.items[index].value = value } +// size returns the size of the leaf after serialization. +func (l *leaf) size() int { + var size int = pageHeaderSize + for _, item := range l.items { + size += lnodeSize + len(item.key) + len(item.value) + } + return size +} + // read initializes the item data from an on-disk page. -func (l *leaf) read(page *page) { - ncount := int(page.count) +func (l *leaf) read(p *page) { + ncount := int(p.count) l.items = make(leafItems, ncount) - lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&page.ptr)) + lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)) for i := 0; i < ncount; i++ { lnode := &lnodes[i] item := &l.items[i] @@ -49,86 +52,70 @@ func (l *leaf) read(page *page) { } // write writes the items onto one or more leaf pages. -func (l *leaf) write(pageSize int, allocate func(size int) (*page, error)) ([]*page, error) { - var pages []*page +func (l *leaf) write(p *page) { + // Initialize page. + p.flags |= p_leaf + p.count = uint16(len(l.items)) - for _, items := range l.split(pageSize) { - // Determine the total page size. - var size int = pageHeaderSize - for _, item := range l.items { - size += lnodeSize + len(item.key) + len(item.value) - } + // Loop over each item and write it to the page. + lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)) + b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(l.items):] + for index, item := range l.items { + // Write node. + lnode := &lnodes[index] + lnode.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(lnode))) + lnode.ksize = uint32(len(item.key)) + lnode.vsize = uint32(len(item.value)) - // Allocate pages. - page, err := allocate(size) - if err != nil { - return nil, err - } - page.flags |= p_leaf - page.count = uint16(len(items)) - - // Loop over each item and write it to the page. - lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&page.ptr)) - b := (*[maxAllocSize]byte)(unsafe.Pointer(&page.ptr))[lnodeSize*len(items):] - for index, item := range items { - // Write item. - lnode := &lnodes[index] - lnode.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(lnode))) - lnode.ksize = uint32(len(item.key)) - lnode.vsize = uint32(len(item.value)) - - // Write data to the end of the page. - copy(b[0:], item.key) - b = b[len(item.key):] - copy(b[0:], item.value) - b = b[len(item.value):] - } - - pages = append(pages, page) + // Write data to the end of the page. + copy(b[0:], item.key) + b = b[len(item.key):] + copy(b[0:], item.value) + b = b[len(item.value):] } - - return pages, nil } // split divides up the noes in the page into appropriately sized groups. -func (l *leaf) split(pageSize int) []leafItems { - // If we don't have enough items for multiple pages then just return the items. - if len(l.items) <= (minKeysPerPage * 2) { - return []leafItems{l.items} +func (l *leaf) split(pageSize int) []*leaf { + // Ignore the split if the page doesn't have at least enough nodes for + // multiple pages or if the data can fit on a single page. + if len(l.items) <= (minKeysPerPage * 2) || l.size() < pageSize { + return []*leaf{l} } - // If we're not larger than one page then just return the items. - var totalSize int = pageHeaderSize - for _, item := range l.items { - totalSize += lnodeSize + len(item.key) + len(item.value) - } - if totalSize < pageSize { - return []leafItems{l.items} - } - - // Otherwise group into smaller pages and target a given fill size. - var size int - var group leafItems - var groups []leafItems - // Set fill threshold to 25%. threshold := pageSize >> 4 + // Otherwise group into smaller pages and target a given fill size. + size := 0 + current := &leaf{} + leafs := make([]*leaf, 0) + for index, item := range l.items { nodeSize := lnodeSize + len(item.key) + len(item.value) - if group == nil || (len(group) >= minKeysPerPage && index < len(l.items)-minKeysPerPage && size+nodeSize > threshold) { + if (len(current.items) >= minKeysPerPage && index < len(l.items)-minKeysPerPage && size+nodeSize > threshold) { size = pageHeaderSize - if group != nil { - groups = append(groups, group) - } - group = make(leafItems, 0) + leafs = append(leafs, current) + current = &leaf{} } size += nodeSize - group = append(group, item) + current.items = append(current.items, item) } - groups = append(groups, group) + leafs = append(leafs, current) - return groups + return leafs } + +type leafItems []leafItem + +type leafItem struct { + key []byte + value []byte +} + +func (s leafItems) Len() int { return len(s) } +func (s leafItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s leafItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 } + diff --git a/leaf_test.go b/leaf_test.go index ad23074..3b2235f 100644 --- a/leaf_test.go +++ b/leaf_test.go @@ -59,15 +59,12 @@ func TestLeafWrite(t *testing.T) { // Write it to a page. var buf [4096]byte - allocate := func(size int) (*page, error) { - return (*page)(unsafe.Pointer(&buf[0])), nil - } - pages, err := l.write(4096, allocate) - assert.NoError(t, err) + p := (*page)(unsafe.Pointer(&buf[0])) + l.write(p) // Read the page back in. l2 := &leaf{} - l2.read(pages[0]) + l2.read(p) // Check that the two pages are the same. assert.Equal(t, len(l2.items), 3) @@ -79,22 +76,6 @@ func TestLeafWrite(t *testing.T) { assert.Equal(t, l2.items[2].value, []byte("que")) } -// Ensure that an error that an allocation error during writing is returned. -func TestLeafWriteError(t *testing.T) { - // Create a temp page. - l := &leaf{items: make(leafItems, 0)} - l.put([]byte("susy"), []byte("que")) - - // Write it to a page. - exp := &Error{} - allocate := func(size int) (*page, error) { - return nil, exp - } - pages, err := l.write(4096, allocate) - assert.Nil(t, pages) - assert.Equal(t, err, exp) -} - // Ensure that a temporary page can split into appropriate subgroups. func TestLeafSplit(t *testing.T) { // Create a temp page. @@ -106,11 +87,11 @@ func TestLeafSplit(t *testing.T) { l.put([]byte("00000005"), []byte("0123456701234567")) // Split between 3 & 4. - groups := l.split(100) + leafs := l.split(100) - assert.Equal(t, len(groups), 2) - assert.Equal(t, len(groups[0]), 2) - assert.Equal(t, len(groups[1]), 3) + assert.Equal(t, len(leafs), 2) + assert.Equal(t, len(leafs[0].items), 2) + assert.Equal(t, len(leafs[1].items), 3) } // Ensure that a temporary page with the minimum number of items just returns a single split group. @@ -121,9 +102,9 @@ func TestLeafSplitWithMinKeys(t *testing.T) { l.put([]byte("00000002"), []byte("0123456701234567")) // Split. - groups := l.split(20) - assert.Equal(t, len(groups), 1) - assert.Equal(t, len(groups[0]), 2) + leafs := l.split(20) + assert.Equal(t, len(leafs), 1) + assert.Equal(t, len(leafs[0].items), 2) } // Ensure that a temporary page that has keys that all fit on a page just returns one split group. @@ -137,7 +118,7 @@ func TestLeafSplitFitsInPage(t *testing.T) { l.put([]byte("00000005"), []byte("0123456701234567")) // Split. - groups := l.split(4096) - assert.Equal(t, len(groups), 1) - assert.Equal(t, len(groups[0]), 5) + leafs := l.split(4096) + assert.Equal(t, len(leafs), 1) + assert.Equal(t, len(leafs[0].items), 5) } diff --git a/rwtransaction.go b/rwtransaction.go index 4e576cd..5ec0d6e 100644 --- a/rwtransaction.go +++ b/rwtransaction.go @@ -8,6 +8,7 @@ import ( // Only one read/write transaction can be active for a DB at a time. type RWTransaction struct { Transaction + branches map[pgid]*branch leafs map[pgid]*leaf }