Fix merge-split spill issues.

2025-05-31 11:42:30 +00:00 · 2014-06-03 13:21:28 -06:00 · 2014-06-03 13:21:28 -06:00 · 54cad40a78
commit 54cad40a78
parent a96185e8b6
5 changed files with 74 additions and 25 deletions
--- a/bucket.go
+++ b/bucket.go
@ -613,6 +613,7 @@ func (b *Bucket) rebalance() {
 // node creates a node from a page and associates it with a given parent.
 func (b *Bucket) node(pgid pgid, parent *node) *node {
 	_assert(b.nodes != nil, "nodes map expected")
 	// Retrieve node if it's already been created.
 	if n := b.nodes[pgid]; n != nil {
 		return n
--- a/bucket_test.go
+++ b/bucket_test.go
@ -580,17 +580,17 @@ func TestBucket_Stats(t *testing.T) {
 			stats := b.Stats()
 			assert.Equal(t, 1, stats.BranchPageN, "BranchPageN")
 			assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
-			assert.Equal(t, 6, stats.LeafPageN, "LeafPageN")
+			assert.Equal(t, 7, stats.LeafPageN, "LeafPageN")
 			assert.Equal(t, 2, stats.LeafOverflowN, "LeafOverflowN")
 			assert.Equal(t, 501, stats.KeyN, "KeyN")
 			assert.Equal(t, 2, stats.Depth, "Depth")
 			branchInuse := pageHeaderSize            // branch page header
-			branchInuse += 6 * branchPageElementSize // branch elements
+			branchInuse += 7 * branchPageElementSize // branch elements
-			branchInuse += 6 * 3                     // branch keys (6 3-byte keys)
+			branchInuse += 7 * 3                     // branch keys (6 3-byte keys)
 			assert.Equal(t, branchInuse, stats.BranchInuse, "BranchInuse")
-			leafInuse := 6 * pageHeaderSize          // leaf page header
+			leafInuse := 7 * pageHeaderSize          // leaf page header
 			leafInuse += 501 * leafPageElementSize   // leaf elements
 			leafInuse += 500*3 + len(big_key)        // leaf keys
 			leafInuse += 1*10 + 2*90 + 3*400 + 10000 // leaf values
@ -599,7 +599,7 @@ func TestBucket_Stats(t *testing.T) {
 			if os.Getpagesize() == 4096 {
 				// Incompatible page size
 				assert.Equal(t, 4096, stats.BranchAlloc, "BranchAlloc")
-				assert.Equal(t, 32768, stats.LeafAlloc, "LeafAlloc")
+				assert.Equal(t, 36864, stats.LeafAlloc, "LeafAlloc")
 			}
 			assert.Equal(t, 1, stats.BucketN, "BucketN")
@ -612,6 +612,9 @@ func TestBucket_Stats(t *testing.T) {
 // Ensure a bucket with random insertion utilizes fill percentage correctly.
 func TestBucket_Stats_RandomFill(t *testing.T) {
 	if testing.Short() {
 		t.Skip("skipping test in short mode.")
 	}
 	if os.Getpagesize() != 4096 {
 		t.Skip("invalid page size for test")
 	}
@ -621,6 +624,7 @@ func TestBucket_Stats_RandomFill(t *testing.T) {
 		// Add a set of values in random order. It will be the same random
 		// order so we can maintain consistency between test runs.
 		var count int
 		r := rand.New(rand.NewSource(42))
 		for _, i := range r.Perm(1000) {
 			db.Update(func(tx *Tx) error {
@ -628,11 +632,12 @@ func TestBucket_Stats_RandomFill(t *testing.T) {
 				for _, j := range r.Perm(100) {
 					index := (j * 10000) + i
 					b.Put([]byte(fmt.Sprintf("%d000000000000000", index)), []byte("0000000000"))
 					count++
 				}
 				return nil
 			})
 			mustCheck(db)
 		}
 		mustCheck(db)
 		db.View(func(tx *Tx) error {
 			s := tx.Bucket([]byte("woojits")).Stats()
@ -640,13 +645,13 @@ func TestBucket_Stats_RandomFill(t *testing.T) {
 			assert.Equal(t, 22, s.BranchPageN, "BranchPageN")
 			assert.Equal(t, 0, s.BranchOverflowN, "BranchOverflowN")
-			assert.Equal(t, 62963, s.BranchInuse, "BranchInuse")
+			assert.Equal(t, 61708, s.BranchInuse, "BranchInuse")
 			assert.Equal(t, 90112, s.BranchAlloc, "BranchAlloc")
-			assert.Equal(t, 1677, s.LeafPageN, "LeafPageN")
+			assert.Equal(t, 1643, s.LeafPageN, "LeafPageN")
 			assert.Equal(t, 0, s.LeafOverflowN, "LeafOverflowN")
-			assert.Equal(t, 4714722, s.LeafInuse, "LeafInuse")
+			assert.Equal(t, 4714178, s.LeafInuse, "LeafInuse")
-			assert.Equal(t, 6868992, s.LeafAlloc, "LeafAlloc")
+			assert.Equal(t, 6729728, s.LeafAlloc, "LeafAlloc")
 			return nil
 		})
 	})
--- a/db_test.go
+++ b/db_test.go
@ -7,6 +7,8 @@ import (
 	"io/ioutil"
 	"os"
 	"regexp"
 	"sort"
 	"strings"
 	"testing"
 	"time"
 	"unsafe"
@ -520,6 +522,38 @@ func mustCheck(db *DB) {
 	}
 }
 // mustContainKeys checks that a bucket contains a given set of keys.
 func mustContainKeys(b *Bucket, m map[string]string) {
 	found := make(map[string]string)
 	b.ForEach(func(k, _ []byte) error {
 		found[string(k)] = ""
 		return nil
 	})
 	// Check for keys found in bucket that shouldn't be there.
 	var keys []string
 	for k, _ := range found {
 		if _, ok := m[string(k)]; !ok {
 			keys = append(keys, k)
 		}
 	}
 	if len(keys) > 0 {
 		sort.Strings(keys)
 		panic(fmt.Sprintf("keys found(%d): %s", len(keys), strings.Join(keys, ",")))
 	}
 	// Check for keys not found in bucket that should be there.
 	for k, _ := range m {
 		if _, ok := found[string(k)]; !ok {
 			keys = append(keys, k)
 		}
 	}
 	if len(keys) > 0 {
 		sort.Strings(keys)
 		panic(fmt.Sprintf("keys not found(%d): %s", len(keys), strings.Join(keys, ",")))
 	}
 }
 func trunc(b []byte, length int) []byte {
 	if length < len(b) {
 		return b[:length]
--- a/freelist.go
+++ b/freelist.go
@ -70,14 +70,12 @@ func (f *freelist) allocate(n int) pgid {
 // free releases a page and its overflow for a given transaction id.
 // If the page is already free then a panic will occur.
 func (f *freelist) free(txid txid, p *page) {
 	warn("free:", txid, p.id, p.overflow)
 	_assert(p.id > 1, "cannot free page 0 or 1: %d", p.id)
 	// Verify that page is not already free.
 	minid, maxid := p.id, p.id+pgid(p.overflow)
 	for _, id := range f.ids {
 		if id >= minid && id <= maxid {
 			warn("  ‡", id, "|", minid, maxid)
 			panic(fmt.Sprintf("page %d already freed in tx", id))
 		}
 	}
@ -92,9 +90,6 @@ func (f *freelist) free(txid txid, p *page) {
 	// Free page and all its overflow pages.
 	var ids = f.pending[txid]
 	for i := 0; i < int(p.overflow+1); i++ {
 		if p.id+pgid(i) == 55 {
 			warn("  •", txid, p.id+pgid(i))
 		}
 		ids = append(ids, p.id+pgid(i))
 	}
 	f.pending[txid] = ids
--- a/node.go
+++ b/node.go
@ -14,7 +14,7 @@ type node struct {
 	key        []byte
 	pgid       pgid
 	parent     *node
-	children   []*node
+	children   nodes
 	inodes     inodes
 }
@ -231,6 +231,7 @@ func (n *node) split(pageSize int) []*node {
 	// If we can fit our extra keys on the next page then merge into it.
 	if next := n.nextSibling(); next != nil && next.size()+sz1 < threshold {
 		next.inodes = append(n.inodes[splitIndex:], next.inodes...)
 		n.inodes = n.inodes[:splitIndex]
 		return []*node{n}
 	}
@ -284,22 +285,29 @@ func (n *node) splitIndex(threshold int) (index, sz int) {
 func (n *node) spill() error {
 	var tx = n.bucket.tx
-	// Spill child nodes first.
+	// Spill child nodes first. Child nodes can materialize sibling nodes in
-	for _, child := range n.children {
+	// the case of split-merge so we cannot use a range loop. We have to check
-		if err := child.spill(); err != nil {
+	// the children size on every loop iteration.
 	sort.Sort(n.children)
 	for i := 0; i < len(n.children); i++ {
 		if err := n.children[i].spill(); err != nil {
 			return err
 		}
 	}
-	// Add node's page to the freelist if it's not new.
+	// We no longer need the child list because it's only used for spill tracking.
-	if n.pgid > 0 {
+	n.children = nil
 		tx.db.freelist.free(tx.id(), tx.page(n.pgid))
 		n.pgid = 0
 	}
-	// Spill nodes by deepest first.
+	// Spill nodes by deepest first. The first node returned from split() will
 	// always be "n".
 	var nodes = n.split(tx.db.pageSize)
 	for _, node := range nodes {
 		// Add node's page to the freelist if it's not new.
 		if node.pgid > 0 {
 			tx.db.freelist.free(tx.id(), tx.page(node.pgid))
 			node.pgid = 0
 		}
 		// Allocate contiguous space for the node.
 		p, err := tx.allocate((node.size() / tx.db.pageSize) + 1)
 		if err != nil {
@ -565,6 +573,12 @@ func (n *node) dump() {
 }
 */
 type nodes []*node
 func (s nodes) Len() int           { return len(s) }
 func (s nodes) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
 func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
 // inode represents an internal node inside of a node.
 // It can be used to point to elements in a page or point
 // to an element which hasn't been added to a page yet.