mirror of
https://github.com/etcd-io/bbolt.git
synced 2025-05-31 11:42:30 +00:00
Fix merge-split spill issues.
This commit is contained in:
parent
a96185e8b6
commit
54cad40a78
@ -613,6 +613,7 @@ func (b *Bucket) rebalance() {
|
|||||||
// node creates a node from a page and associates it with a given parent.
|
// node creates a node from a page and associates it with a given parent.
|
||||||
func (b *Bucket) node(pgid pgid, parent *node) *node {
|
func (b *Bucket) node(pgid pgid, parent *node) *node {
|
||||||
_assert(b.nodes != nil, "nodes map expected")
|
_assert(b.nodes != nil, "nodes map expected")
|
||||||
|
|
||||||
// Retrieve node if it's already been created.
|
// Retrieve node if it's already been created.
|
||||||
if n := b.nodes[pgid]; n != nil {
|
if n := b.nodes[pgid]; n != nil {
|
||||||
return n
|
return n
|
||||||
|
@ -580,17 +580,17 @@ func TestBucket_Stats(t *testing.T) {
|
|||||||
stats := b.Stats()
|
stats := b.Stats()
|
||||||
assert.Equal(t, 1, stats.BranchPageN, "BranchPageN")
|
assert.Equal(t, 1, stats.BranchPageN, "BranchPageN")
|
||||||
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
|
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
|
||||||
assert.Equal(t, 6, stats.LeafPageN, "LeafPageN")
|
assert.Equal(t, 7, stats.LeafPageN, "LeafPageN")
|
||||||
assert.Equal(t, 2, stats.LeafOverflowN, "LeafOverflowN")
|
assert.Equal(t, 2, stats.LeafOverflowN, "LeafOverflowN")
|
||||||
assert.Equal(t, 501, stats.KeyN, "KeyN")
|
assert.Equal(t, 501, stats.KeyN, "KeyN")
|
||||||
assert.Equal(t, 2, stats.Depth, "Depth")
|
assert.Equal(t, 2, stats.Depth, "Depth")
|
||||||
|
|
||||||
branchInuse := pageHeaderSize // branch page header
|
branchInuse := pageHeaderSize // branch page header
|
||||||
branchInuse += 6 * branchPageElementSize // branch elements
|
branchInuse += 7 * branchPageElementSize // branch elements
|
||||||
branchInuse += 6 * 3 // branch keys (6 3-byte keys)
|
branchInuse += 7 * 3 // branch keys (6 3-byte keys)
|
||||||
assert.Equal(t, branchInuse, stats.BranchInuse, "BranchInuse")
|
assert.Equal(t, branchInuse, stats.BranchInuse, "BranchInuse")
|
||||||
|
|
||||||
leafInuse := 6 * pageHeaderSize // leaf page header
|
leafInuse := 7 * pageHeaderSize // leaf page header
|
||||||
leafInuse += 501 * leafPageElementSize // leaf elements
|
leafInuse += 501 * leafPageElementSize // leaf elements
|
||||||
leafInuse += 500*3 + len(big_key) // leaf keys
|
leafInuse += 500*3 + len(big_key) // leaf keys
|
||||||
leafInuse += 1*10 + 2*90 + 3*400 + 10000 // leaf values
|
leafInuse += 1*10 + 2*90 + 3*400 + 10000 // leaf values
|
||||||
@ -599,7 +599,7 @@ func TestBucket_Stats(t *testing.T) {
|
|||||||
if os.Getpagesize() == 4096 {
|
if os.Getpagesize() == 4096 {
|
||||||
// Incompatible page size
|
// Incompatible page size
|
||||||
assert.Equal(t, 4096, stats.BranchAlloc, "BranchAlloc")
|
assert.Equal(t, 4096, stats.BranchAlloc, "BranchAlloc")
|
||||||
assert.Equal(t, 32768, stats.LeafAlloc, "LeafAlloc")
|
assert.Equal(t, 36864, stats.LeafAlloc, "LeafAlloc")
|
||||||
}
|
}
|
||||||
|
|
||||||
assert.Equal(t, 1, stats.BucketN, "BucketN")
|
assert.Equal(t, 1, stats.BucketN, "BucketN")
|
||||||
@ -612,6 +612,9 @@ func TestBucket_Stats(t *testing.T) {
|
|||||||
|
|
||||||
// Ensure a bucket with random insertion utilizes fill percentage correctly.
|
// Ensure a bucket with random insertion utilizes fill percentage correctly.
|
||||||
func TestBucket_Stats_RandomFill(t *testing.T) {
|
func TestBucket_Stats_RandomFill(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping test in short mode.")
|
||||||
|
}
|
||||||
if os.Getpagesize() != 4096 {
|
if os.Getpagesize() != 4096 {
|
||||||
t.Skip("invalid page size for test")
|
t.Skip("invalid page size for test")
|
||||||
}
|
}
|
||||||
@ -621,6 +624,7 @@ func TestBucket_Stats_RandomFill(t *testing.T) {
|
|||||||
|
|
||||||
// Add a set of values in random order. It will be the same random
|
// Add a set of values in random order. It will be the same random
|
||||||
// order so we can maintain consistency between test runs.
|
// order so we can maintain consistency between test runs.
|
||||||
|
var count int
|
||||||
r := rand.New(rand.NewSource(42))
|
r := rand.New(rand.NewSource(42))
|
||||||
for _, i := range r.Perm(1000) {
|
for _, i := range r.Perm(1000) {
|
||||||
db.Update(func(tx *Tx) error {
|
db.Update(func(tx *Tx) error {
|
||||||
@ -628,11 +632,12 @@ func TestBucket_Stats_RandomFill(t *testing.T) {
|
|||||||
for _, j := range r.Perm(100) {
|
for _, j := range r.Perm(100) {
|
||||||
index := (j * 10000) + i
|
index := (j * 10000) + i
|
||||||
b.Put([]byte(fmt.Sprintf("%d000000000000000", index)), []byte("0000000000"))
|
b.Put([]byte(fmt.Sprintf("%d000000000000000", index)), []byte("0000000000"))
|
||||||
|
count++
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
mustCheck(db)
|
|
||||||
}
|
}
|
||||||
|
mustCheck(db)
|
||||||
|
|
||||||
db.View(func(tx *Tx) error {
|
db.View(func(tx *Tx) error {
|
||||||
s := tx.Bucket([]byte("woojits")).Stats()
|
s := tx.Bucket([]byte("woojits")).Stats()
|
||||||
@ -640,13 +645,13 @@ func TestBucket_Stats_RandomFill(t *testing.T) {
|
|||||||
|
|
||||||
assert.Equal(t, 22, s.BranchPageN, "BranchPageN")
|
assert.Equal(t, 22, s.BranchPageN, "BranchPageN")
|
||||||
assert.Equal(t, 0, s.BranchOverflowN, "BranchOverflowN")
|
assert.Equal(t, 0, s.BranchOverflowN, "BranchOverflowN")
|
||||||
assert.Equal(t, 62963, s.BranchInuse, "BranchInuse")
|
assert.Equal(t, 61708, s.BranchInuse, "BranchInuse")
|
||||||
assert.Equal(t, 90112, s.BranchAlloc, "BranchAlloc")
|
assert.Equal(t, 90112, s.BranchAlloc, "BranchAlloc")
|
||||||
|
|
||||||
assert.Equal(t, 1677, s.LeafPageN, "LeafPageN")
|
assert.Equal(t, 1643, s.LeafPageN, "LeafPageN")
|
||||||
assert.Equal(t, 0, s.LeafOverflowN, "LeafOverflowN")
|
assert.Equal(t, 0, s.LeafOverflowN, "LeafOverflowN")
|
||||||
assert.Equal(t, 4714722, s.LeafInuse, "LeafInuse")
|
assert.Equal(t, 4714178, s.LeafInuse, "LeafInuse")
|
||||||
assert.Equal(t, 6868992, s.LeafAlloc, "LeafAlloc")
|
assert.Equal(t, 6729728, s.LeafAlloc, "LeafAlloc")
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
34
db_test.go
34
db_test.go
@ -7,6 +7,8 @@ import (
|
|||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
@ -520,6 +522,38 @@ func mustCheck(db *DB) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// mustContainKeys checks that a bucket contains a given set of keys.
|
||||||
|
func mustContainKeys(b *Bucket, m map[string]string) {
|
||||||
|
found := make(map[string]string)
|
||||||
|
b.ForEach(func(k, _ []byte) error {
|
||||||
|
found[string(k)] = ""
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
// Check for keys found in bucket that shouldn't be there.
|
||||||
|
var keys []string
|
||||||
|
for k, _ := range found {
|
||||||
|
if _, ok := m[string(k)]; !ok {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(keys) > 0 {
|
||||||
|
sort.Strings(keys)
|
||||||
|
panic(fmt.Sprintf("keys found(%d): %s", len(keys), strings.Join(keys, ",")))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for keys not found in bucket that should be there.
|
||||||
|
for k, _ := range m {
|
||||||
|
if _, ok := found[string(k)]; !ok {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(keys) > 0 {
|
||||||
|
sort.Strings(keys)
|
||||||
|
panic(fmt.Sprintf("keys not found(%d): %s", len(keys), strings.Join(keys, ",")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func trunc(b []byte, length int) []byte {
|
func trunc(b []byte, length int) []byte {
|
||||||
if length < len(b) {
|
if length < len(b) {
|
||||||
return b[:length]
|
return b[:length]
|
||||||
|
@ -70,14 +70,12 @@ func (f *freelist) allocate(n int) pgid {
|
|||||||
// free releases a page and its overflow for a given transaction id.
|
// free releases a page and its overflow for a given transaction id.
|
||||||
// If the page is already free then a panic will occur.
|
// If the page is already free then a panic will occur.
|
||||||
func (f *freelist) free(txid txid, p *page) {
|
func (f *freelist) free(txid txid, p *page) {
|
||||||
warn("free:", txid, p.id, p.overflow)
|
|
||||||
_assert(p.id > 1, "cannot free page 0 or 1: %d", p.id)
|
_assert(p.id > 1, "cannot free page 0 or 1: %d", p.id)
|
||||||
|
|
||||||
// Verify that page is not already free.
|
// Verify that page is not already free.
|
||||||
minid, maxid := p.id, p.id+pgid(p.overflow)
|
minid, maxid := p.id, p.id+pgid(p.overflow)
|
||||||
for _, id := range f.ids {
|
for _, id := range f.ids {
|
||||||
if id >= minid && id <= maxid {
|
if id >= minid && id <= maxid {
|
||||||
warn(" ‡", id, "|", minid, maxid)
|
|
||||||
panic(fmt.Sprintf("page %d already freed in tx", id))
|
panic(fmt.Sprintf("page %d already freed in tx", id))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -92,9 +90,6 @@ func (f *freelist) free(txid txid, p *page) {
|
|||||||
// Free page and all its overflow pages.
|
// Free page and all its overflow pages.
|
||||||
var ids = f.pending[txid]
|
var ids = f.pending[txid]
|
||||||
for i := 0; i < int(p.overflow+1); i++ {
|
for i := 0; i < int(p.overflow+1); i++ {
|
||||||
if p.id+pgid(i) == 55 {
|
|
||||||
warn(" •", txid, p.id+pgid(i))
|
|
||||||
}
|
|
||||||
ids = append(ids, p.id+pgid(i))
|
ids = append(ids, p.id+pgid(i))
|
||||||
}
|
}
|
||||||
f.pending[txid] = ids
|
f.pending[txid] = ids
|
||||||
|
34
node.go
34
node.go
@ -14,7 +14,7 @@ type node struct {
|
|||||||
key []byte
|
key []byte
|
||||||
pgid pgid
|
pgid pgid
|
||||||
parent *node
|
parent *node
|
||||||
children []*node
|
children nodes
|
||||||
inodes inodes
|
inodes inodes
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -231,6 +231,7 @@ func (n *node) split(pageSize int) []*node {
|
|||||||
// If we can fit our extra keys on the next page then merge into it.
|
// If we can fit our extra keys on the next page then merge into it.
|
||||||
if next := n.nextSibling(); next != nil && next.size()+sz1 < threshold {
|
if next := n.nextSibling(); next != nil && next.size()+sz1 < threshold {
|
||||||
next.inodes = append(n.inodes[splitIndex:], next.inodes...)
|
next.inodes = append(n.inodes[splitIndex:], next.inodes...)
|
||||||
|
n.inodes = n.inodes[:splitIndex]
|
||||||
return []*node{n}
|
return []*node{n}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -284,22 +285,29 @@ func (n *node) splitIndex(threshold int) (index, sz int) {
|
|||||||
func (n *node) spill() error {
|
func (n *node) spill() error {
|
||||||
var tx = n.bucket.tx
|
var tx = n.bucket.tx
|
||||||
|
|
||||||
// Spill child nodes first.
|
// Spill child nodes first. Child nodes can materialize sibling nodes in
|
||||||
for _, child := range n.children {
|
// the case of split-merge so we cannot use a range loop. We have to check
|
||||||
if err := child.spill(); err != nil {
|
// the children size on every loop iteration.
|
||||||
|
sort.Sort(n.children)
|
||||||
|
for i := 0; i < len(n.children); i++ {
|
||||||
|
if err := n.children[i].spill(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add node's page to the freelist if it's not new.
|
// We no longer need the child list because it's only used for spill tracking.
|
||||||
if n.pgid > 0 {
|
n.children = nil
|
||||||
tx.db.freelist.free(tx.id(), tx.page(n.pgid))
|
|
||||||
n.pgid = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spill nodes by deepest first.
|
// Spill nodes by deepest first. The first node returned from split() will
|
||||||
|
// always be "n".
|
||||||
var nodes = n.split(tx.db.pageSize)
|
var nodes = n.split(tx.db.pageSize)
|
||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
|
// Add node's page to the freelist if it's not new.
|
||||||
|
if node.pgid > 0 {
|
||||||
|
tx.db.freelist.free(tx.id(), tx.page(node.pgid))
|
||||||
|
node.pgid = 0
|
||||||
|
}
|
||||||
|
|
||||||
// Allocate contiguous space for the node.
|
// Allocate contiguous space for the node.
|
||||||
p, err := tx.allocate((node.size() / tx.db.pageSize) + 1)
|
p, err := tx.allocate((node.size() / tx.db.pageSize) + 1)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -565,6 +573,12 @@ func (n *node) dump() {
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
type nodes []*node
|
||||||
|
|
||||||
|
func (s nodes) Len() int { return len(s) }
|
||||||
|
func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
|
func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
|
||||||
|
|
||||||
// inode represents an internal node inside of a node.
|
// inode represents an internal node inside of a node.
|
||||||
// It can be used to point to elements in a page or point
|
// It can be used to point to elements in a page or point
|
||||||
// to an element which hasn't been added to a page yet.
|
// to an element which hasn't been added to a page yet.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user