mirror of https://github.com/etcd-io/bbolt.git
Refactor leaf.write() / leaf.split().
parent
c94f1fcb11
commit
4fb62e8980
|
@ -0,0 +1,60 @@
|
|||
package bolt
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sort"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// branch represents a temporary in-memory branch page.
|
||||
type branch struct {
|
||||
parent *branch
|
||||
items branchItems
|
||||
}
|
||||
|
||||
// insert inserts a new item after a given pgid.
|
||||
func (b *branch) insert(key []byte, previd pgid, id pgid) {
|
||||
// Find previous insertion index.
|
||||
index := sort.Search(len(b.items), func(i int) bool { return b.items[i].pgid >= previd })
|
||||
|
||||
// If there is no existing key then add a new item.
|
||||
b.items = append(b.items, branchItem{})
|
||||
if index < len(b.items) {
|
||||
copy(b.items[index+1:], b.items[index:])
|
||||
}
|
||||
|
||||
b.items[index].pgid = id
|
||||
b.items[index].key = key
|
||||
}
|
||||
|
||||
// replace swaps out an existing node id for a new one id.
|
||||
func (b *branch) replace(oldid pgid, newid pgid, key []byte) {
|
||||
index := sort.Search(len(b.items), func(i int) bool { return b.items[i].pgid >= oldid })
|
||||
b.items[index].pgid = newid
|
||||
b.items[index].key = key
|
||||
}
|
||||
|
||||
// read initializes the item data from an on-disk page.
|
||||
func (b *branch) read(page *page) {
|
||||
ncount := int(page.count)
|
||||
b.items = make(branchItems, ncount)
|
||||
bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&page.ptr))
|
||||
for i := 0; i < ncount; i++ {
|
||||
bnode := &bnodes[i]
|
||||
item := &b.items[i]
|
||||
item.key = bnode.key()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
type branchItems []branchItem
|
||||
|
||||
type branchItem struct {
|
||||
pgid pgid
|
||||
key []byte
|
||||
}
|
||||
|
||||
func (s branchItems) Len() int { return len(s) }
|
||||
func (s branchItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s branchItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
|
||||
|
131
leaf.go
131
leaf.go
|
@ -9,16 +9,10 @@ import (
|
|||
// leaf represents a temporary in-memory leaf page.
|
||||
// It is deserialized from an memory-mapped page and is not restricted by page size.
|
||||
type leaf struct {
|
||||
parent *branch
|
||||
items leafItems
|
||||
}
|
||||
|
||||
type leafItems []leafItem
|
||||
|
||||
type leafItem struct {
|
||||
key []byte
|
||||
value []byte
|
||||
}
|
||||
|
||||
// put inserts or replaces a key on a leaf page.
|
||||
func (l *leaf) put(key []byte, value []byte) {
|
||||
// Find insertion index.
|
||||
|
@ -35,11 +29,20 @@ func (l *leaf) put(key []byte, value []byte) {
|
|||
l.items[index].value = value
|
||||
}
|
||||
|
||||
// size returns the size of the leaf after serialization.
|
||||
func (l *leaf) size() int {
|
||||
var size int = pageHeaderSize
|
||||
for _, item := range l.items {
|
||||
size += lnodeSize + len(item.key) + len(item.value)
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
||||
// read initializes the item data from an on-disk page.
|
||||
func (l *leaf) read(page *page) {
|
||||
ncount := int(page.count)
|
||||
func (l *leaf) read(p *page) {
|
||||
ncount := int(p.count)
|
||||
l.items = make(leafItems, ncount)
|
||||
lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&page.ptr))
|
||||
lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr))
|
||||
for i := 0; i < ncount; i++ {
|
||||
lnode := &lnodes[i]
|
||||
item := &l.items[i]
|
||||
|
@ -49,86 +52,70 @@ func (l *leaf) read(page *page) {
|
|||
}
|
||||
|
||||
// write writes the items onto one or more leaf pages.
|
||||
func (l *leaf) write(pageSize int, allocate func(size int) (*page, error)) ([]*page, error) {
|
||||
var pages []*page
|
||||
func (l *leaf) write(p *page) {
|
||||
// Initialize page.
|
||||
p.flags |= p_leaf
|
||||
p.count = uint16(len(l.items))
|
||||
|
||||
for _, items := range l.split(pageSize) {
|
||||
// Determine the total page size.
|
||||
var size int = pageHeaderSize
|
||||
for _, item := range l.items {
|
||||
size += lnodeSize + len(item.key) + len(item.value)
|
||||
}
|
||||
// Loop over each item and write it to the page.
|
||||
lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr))
|
||||
b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(l.items):]
|
||||
for index, item := range l.items {
|
||||
// Write node.
|
||||
lnode := &lnodes[index]
|
||||
lnode.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(lnode)))
|
||||
lnode.ksize = uint32(len(item.key))
|
||||
lnode.vsize = uint32(len(item.value))
|
||||
|
||||
// Allocate pages.
|
||||
page, err := allocate(size)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
page.flags |= p_leaf
|
||||
page.count = uint16(len(items))
|
||||
|
||||
// Loop over each item and write it to the page.
|
||||
lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&page.ptr))
|
||||
b := (*[maxAllocSize]byte)(unsafe.Pointer(&page.ptr))[lnodeSize*len(items):]
|
||||
for index, item := range items {
|
||||
// Write item.
|
||||
lnode := &lnodes[index]
|
||||
lnode.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(lnode)))
|
||||
lnode.ksize = uint32(len(item.key))
|
||||
lnode.vsize = uint32(len(item.value))
|
||||
|
||||
// Write data to the end of the page.
|
||||
copy(b[0:], item.key)
|
||||
b = b[len(item.key):]
|
||||
copy(b[0:], item.value)
|
||||
b = b[len(item.value):]
|
||||
}
|
||||
|
||||
pages = append(pages, page)
|
||||
// Write data to the end of the page.
|
||||
copy(b[0:], item.key)
|
||||
b = b[len(item.key):]
|
||||
copy(b[0:], item.value)
|
||||
b = b[len(item.value):]
|
||||
}
|
||||
|
||||
return pages, nil
|
||||
}
|
||||
|
||||
// split divides up the noes in the page into appropriately sized groups.
|
||||
func (l *leaf) split(pageSize int) []leafItems {
|
||||
// If we don't have enough items for multiple pages then just return the items.
|
||||
if len(l.items) <= (minKeysPerPage * 2) {
|
||||
return []leafItems{l.items}
|
||||
func (l *leaf) split(pageSize int) []*leaf {
|
||||
// Ignore the split if the page doesn't have at least enough nodes for
|
||||
// multiple pages or if the data can fit on a single page.
|
||||
if len(l.items) <= (minKeysPerPage * 2) || l.size() < pageSize {
|
||||
return []*leaf{l}
|
||||
}
|
||||
|
||||
// If we're not larger than one page then just return the items.
|
||||
var totalSize int = pageHeaderSize
|
||||
for _, item := range l.items {
|
||||
totalSize += lnodeSize + len(item.key) + len(item.value)
|
||||
}
|
||||
if totalSize < pageSize {
|
||||
return []leafItems{l.items}
|
||||
}
|
||||
|
||||
// Otherwise group into smaller pages and target a given fill size.
|
||||
var size int
|
||||
var group leafItems
|
||||
var groups []leafItems
|
||||
|
||||
// Set fill threshold to 25%.
|
||||
threshold := pageSize >> 4
|
||||
|
||||
// Otherwise group into smaller pages and target a given fill size.
|
||||
size := 0
|
||||
current := &leaf{}
|
||||
leafs := make([]*leaf, 0)
|
||||
|
||||
for index, item := range l.items {
|
||||
nodeSize := lnodeSize + len(item.key) + len(item.value)
|
||||
|
||||
if group == nil || (len(group) >= minKeysPerPage && index < len(l.items)-minKeysPerPage && size+nodeSize > threshold) {
|
||||
if (len(current.items) >= minKeysPerPage && index < len(l.items)-minKeysPerPage && size+nodeSize > threshold) {
|
||||
size = pageHeaderSize
|
||||
if group != nil {
|
||||
groups = append(groups, group)
|
||||
}
|
||||
group = make(leafItems, 0)
|
||||
leafs = append(leafs, current)
|
||||
current = &leaf{}
|
||||
}
|
||||
|
||||
size += nodeSize
|
||||
group = append(group, item)
|
||||
current.items = append(current.items, item)
|
||||
}
|
||||
groups = append(groups, group)
|
||||
leafs = append(leafs, current)
|
||||
|
||||
return groups
|
||||
return leafs
|
||||
}
|
||||
|
||||
type leafItems []leafItem
|
||||
|
||||
type leafItem struct {
|
||||
key []byte
|
||||
value []byte
|
||||
}
|
||||
|
||||
func (s leafItems) Len() int { return len(s) }
|
||||
func (s leafItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s leafItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
|
||||
|
||||
|
|
45
leaf_test.go
45
leaf_test.go
|
@ -59,15 +59,12 @@ func TestLeafWrite(t *testing.T) {
|
|||
|
||||
// Write it to a page.
|
||||
var buf [4096]byte
|
||||
allocate := func(size int) (*page, error) {
|
||||
return (*page)(unsafe.Pointer(&buf[0])), nil
|
||||
}
|
||||
pages, err := l.write(4096, allocate)
|
||||
assert.NoError(t, err)
|
||||
p := (*page)(unsafe.Pointer(&buf[0]))
|
||||
l.write(p)
|
||||
|
||||
// Read the page back in.
|
||||
l2 := &leaf{}
|
||||
l2.read(pages[0])
|
||||
l2.read(p)
|
||||
|
||||
// Check that the two pages are the same.
|
||||
assert.Equal(t, len(l2.items), 3)
|
||||
|
@ -79,22 +76,6 @@ func TestLeafWrite(t *testing.T) {
|
|||
assert.Equal(t, l2.items[2].value, []byte("que"))
|
||||
}
|
||||
|
||||
// Ensure that an error that an allocation error during writing is returned.
|
||||
func TestLeafWriteError(t *testing.T) {
|
||||
// Create a temp page.
|
||||
l := &leaf{items: make(leafItems, 0)}
|
||||
l.put([]byte("susy"), []byte("que"))
|
||||
|
||||
// Write it to a page.
|
||||
exp := &Error{}
|
||||
allocate := func(size int) (*page, error) {
|
||||
return nil, exp
|
||||
}
|
||||
pages, err := l.write(4096, allocate)
|
||||
assert.Nil(t, pages)
|
||||
assert.Equal(t, err, exp)
|
||||
}
|
||||
|
||||
// Ensure that a temporary page can split into appropriate subgroups.
|
||||
func TestLeafSplit(t *testing.T) {
|
||||
// Create a temp page.
|
||||
|
@ -106,11 +87,11 @@ func TestLeafSplit(t *testing.T) {
|
|||
l.put([]byte("00000005"), []byte("0123456701234567"))
|
||||
|
||||
// Split between 3 & 4.
|
||||
groups := l.split(100)
|
||||
leafs := l.split(100)
|
||||
|
||||
assert.Equal(t, len(groups), 2)
|
||||
assert.Equal(t, len(groups[0]), 2)
|
||||
assert.Equal(t, len(groups[1]), 3)
|
||||
assert.Equal(t, len(leafs), 2)
|
||||
assert.Equal(t, len(leafs[0].items), 2)
|
||||
assert.Equal(t, len(leafs[1].items), 3)
|
||||
}
|
||||
|
||||
// Ensure that a temporary page with the minimum number of items just returns a single split group.
|
||||
|
@ -121,9 +102,9 @@ func TestLeafSplitWithMinKeys(t *testing.T) {
|
|||
l.put([]byte("00000002"), []byte("0123456701234567"))
|
||||
|
||||
// Split.
|
||||
groups := l.split(20)
|
||||
assert.Equal(t, len(groups), 1)
|
||||
assert.Equal(t, len(groups[0]), 2)
|
||||
leafs := l.split(20)
|
||||
assert.Equal(t, len(leafs), 1)
|
||||
assert.Equal(t, len(leafs[0].items), 2)
|
||||
}
|
||||
|
||||
// Ensure that a temporary page that has keys that all fit on a page just returns one split group.
|
||||
|
@ -137,7 +118,7 @@ func TestLeafSplitFitsInPage(t *testing.T) {
|
|||
l.put([]byte("00000005"), []byte("0123456701234567"))
|
||||
|
||||
// Split.
|
||||
groups := l.split(4096)
|
||||
assert.Equal(t, len(groups), 1)
|
||||
assert.Equal(t, len(groups[0]), 5)
|
||||
leafs := l.split(4096)
|
||||
assert.Equal(t, len(leafs), 1)
|
||||
assert.Equal(t, len(leafs[0].items), 5)
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
// Only one read/write transaction can be active for a DB at a time.
|
||||
type RWTransaction struct {
|
||||
Transaction
|
||||
branches map[pgid]*branch
|
||||
leafs map[pgid]*leaf
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue