Merge pull request #162 from Shopify/nested_stats2

Recursive/aggregate bucket stats
pull/34/head
Ben Johnson 2014-05-14 12:20:46 -06:00
commit a6d6d964b6
5 changed files with 395 additions and 48 deletions

105
bucket.go
View File

@ -129,14 +129,22 @@ func (b *Bucket) Bucket(name []byte) *Bucket {
}
// Otherwise create a bucket and cache it.
var child = b.openBucket(v)
b.buckets[string(name)] = child
return child
}
// Helper method that re-interprets a sub-bucket value
// from a parent into a Bucket
func (b *Bucket) openBucket(value []byte) *Bucket {
var child = newBucket(b.tx)
child.bucket = &bucket{}
*child.bucket = *(*bucket)(unsafe.Pointer(&v[0]))
b.buckets[string(name)] = &child
*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
// Save a reference to the inline page if the bucket is inline.
if child.root == 0 {
child.page = (*page)(unsafe.Pointer(&v[bucketHeaderSize]))
child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
}
return &child
@ -354,32 +362,83 @@ func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
// Stat returns stats on a bucket.
func (b *Bucket) Stats() BucketStats {
var s BucketStats
var s, subStats BucketStats
pageSize := b.tx.db.pageSize
s.BucketN += 1
if b.root == 0 {
s.InlineBucketN += 1
}
b.forEachPage(func(p *page, depth int) {
if (p.flags & leafPageFlag) != 0 {
s.LeafPageN++
s.KeyN += int(p.count)
lastElement := p.leafPageElement(p.count - 1)
used := pageHeaderSize + (leafPageElementSize * int(p.count-1))
used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
s.LeafInuse += used
s.LeafOverflowN += int(p.overflow)
// used totals the used bytes for the page
used := pageHeaderSize
if p.count != 0 {
// If page has any elements, add all element headers.
used += leafPageElementSize * int(p.count-1)
// Add all element key, value sizes.
// The computation takes advantage of the fact that the position
// of the last element's key/value equals to the total of the sizes
// of all previous elements' keys and values.
// It also includes the last element's header.
lastElement := p.leafPageElement(p.count - 1)
used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
}
if b.root == 0 {
// For inlined bucket just update the inline stats
s.InlineBucketInuse += used
} else {
// For non-inlined bucket update all the leaf stats
s.LeafPageN++
s.LeafInuse += used
s.LeafOverflowN += int(p.overflow)
// Collect stats from sub-buckets.
// Do that by iterating over all element headers
// looking for the ones with the bucketLeafFlag.
for i := uint16(0); i < p.count; i++ {
e := p.leafPageElement(i)
if (e.flags & bucketLeafFlag) != 0 {
// For any bucket element, open the element value
// and recursively call Stats on the contained bucket.
subStats.Add(b.openBucket(e.value()).Stats())
}
}
}
} else if (p.flags & branchPageFlag) != 0 {
s.BranchPageN++
lastElement := p.branchPageElement(p.count - 1)
// used totals the used bytes for the page
// Add header and all element headers.
used := pageHeaderSize + (branchPageElementSize * int(p.count-1))
// Add size of all keys and values.
// Again, use the fact that last element's position equals to
// the total of key, value sizes of all previous elements.
used += int(lastElement.pos + lastElement.ksize)
s.BranchInuse += used
s.BranchOverflowN += int(p.overflow)
}
// Keep track of maximum page depth.
if depth+1 > s.Depth {
s.Depth = (depth + 1)
}
})
// Alloc stats can be computed from page counts and pageSize.
s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
// Add the max depth of sub-buckets to get total nested depth.
s.Depth += subStats.Depth
// Add the stats for all sub-buckets
s.Add(subStats)
return s
}
@ -517,7 +576,7 @@ func (b *Bucket) maxInlineBucketSize() int {
func (b *Bucket) write() []byte {
// Allocate the appropriate size.
var n = b.rootNode
var value = make([]byte, bucketHeaderSize+pageHeaderSize+n.size())
var value = make([]byte, bucketHeaderSize+n.size())
// Write a bucket header.
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
@ -641,6 +700,30 @@ type BucketStats struct {
BranchInuse int // bytes actually used for branch data
LeafAlloc int // bytes allocated for physical leaf pages
LeafInuse int // bytes actually used for leaf data
// Bucket statistics
BucketN int // total number of buckets including the top bucket
InlineBucketN int // total number on inlined buckets
InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
}
func (s *BucketStats) Add(other BucketStats) {
s.BranchPageN += other.BranchPageN
s.BranchOverflowN += other.BranchOverflowN
s.LeafPageN += other.LeafPageN
s.LeafOverflowN += other.LeafOverflowN
s.KeyN += other.KeyN
if s.Depth < other.Depth {
s.Depth = other.Depth
}
s.BranchAlloc += other.BranchAlloc
s.BranchInuse += other.BranchInuse
s.LeafAlloc += other.LeafAlloc
s.LeafInuse += other.LeafInuse
s.BucketN += other.BucketN
s.InlineBucketN += other.InlineBucketN
s.InlineBucketInuse += other.InlineBucketInuse
}
// cloneBytes returns a copy of a given slice.

View File

@ -560,15 +560,15 @@ func TestBucket_Put_KeyTooLarge(t *testing.T) {
// Ensure a bucket can calculate stats.
func TestBucket_Stats(t *testing.T) {
withOpenDB(func(db *DB, path string) {
big_key := []byte("really-big-value")
db.Update(func(tx *Tx) error {
// Add bucket with fewer keys but one big value.
_, err := tx.CreateBucket([]byte("woojits"))
b, err := tx.CreateBucket([]byte("woojits"))
assert.NoError(t, err)
b := tx.Bucket([]byte("woojits"))
for i := 0; i < 500; i++ {
b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))
b.Put([]byte(fmt.Sprintf("%03d", i)), []byte(strconv.Itoa(i)))
}
b.Put([]byte("really-big-value"), []byte(strings.Repeat("*", 10000)))
b.Put(big_key, []byte(strings.Repeat("*", 10000)))
return nil
})
@ -576,19 +576,33 @@ func TestBucket_Stats(t *testing.T) {
db.View(func(tx *Tx) error {
b := tx.Bucket([]byte("woojits"))
stats := b.Stats()
assert.Equal(t, stats.BranchPageN, 1)
assert.Equal(t, stats.BranchOverflowN, 0)
assert.Equal(t, stats.LeafPageN, 6)
assert.Equal(t, stats.LeafOverflowN, 2)
assert.Equal(t, stats.KeyN, 501)
assert.Equal(t, stats.Depth, 2)
if os.Getpagesize() != 4096 {
assert.Equal(t, 1, stats.BranchPageN, "BranchPageN")
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
assert.Equal(t, 6, stats.LeafPageN, "LeafPageN")
assert.Equal(t, 2, stats.LeafOverflowN, "LeafOverflowN")
assert.Equal(t, 501, stats.KeyN, "KeyN")
assert.Equal(t, 2, stats.Depth, "Depth")
branchInuse := pageHeaderSize // branch page header
branchInuse += 6 * branchPageElementSize // branch elements
branchInuse += 6 * 3 // branch keys (6 3-byte keys)
assert.Equal(t, branchInuse, stats.BranchInuse, "BranchInuse")
leafInuse := 6 * pageHeaderSize // leaf page header
leafInuse += 501 * leafPageElementSize // leaf elements
leafInuse += 500*3 + len(big_key) // leaf keys
leafInuse += 1*10 + 2*90 + 3*400 + 10000 // leaf values
assert.Equal(t, leafInuse, stats.LeafInuse, "LeafInuse")
if os.Getpagesize() == 4096 {
// Incompatible page size
assert.Equal(t, stats.BranchInuse, 125)
assert.Equal(t, stats.BranchAlloc, 4096)
assert.Equal(t, stats.LeafInuse, 20908)
assert.Equal(t, stats.LeafAlloc, 32768)
assert.Equal(t, 4096, stats.BranchAlloc, "BranchAlloc")
assert.Equal(t, 32768, stats.LeafAlloc, "LeafAlloc")
}
assert.Equal(t, 1, stats.BucketN, "BucketN")
assert.Equal(t, 0, stats.InlineBucketN, "InlineBucketN")
assert.Equal(t, 0, stats.InlineBucketInuse, "InlineBucketInuse")
return nil
})
})
@ -610,19 +624,119 @@ func TestBucket_Stats_Small(t *testing.T) {
db.View(func(tx *Tx) error {
b := tx.Bucket([]byte("whozawhats"))
stats := b.Stats()
assert.Equal(t, 0, stats.BranchPageN)
assert.Equal(t, 0, stats.BranchOverflowN)
assert.Equal(t, 1, stats.LeafPageN)
assert.Equal(t, 0, stats.LeafOverflowN)
assert.Equal(t, 1, stats.KeyN)
assert.Equal(t, 1, stats.Depth)
if os.Getpagesize() != 4096 {
assert.Equal(t, 0, stats.BranchPageN, "BranchPageN")
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
assert.Equal(t, 0, stats.LeafPageN, "LeafPageN")
assert.Equal(t, 0, stats.LeafOverflowN, "LeafOverflowN")
assert.Equal(t, 1, stats.KeyN, "KeyN")
assert.Equal(t, 1, stats.Depth, "Depth")
assert.Equal(t, 0, stats.BranchInuse, "BranchInuse")
assert.Equal(t, 0, stats.LeafInuse, "LeafInuse")
if os.Getpagesize() == 4096 {
// Incompatible page size
assert.Equal(t, 0, stats.BranchInuse)
assert.Equal(t, 0, stats.BranchAlloc)
assert.Equal(t, 38, stats.LeafInuse)
assert.Equal(t, 4096, stats.LeafAlloc)
assert.Equal(t, 0, stats.BranchAlloc, "BranchAlloc")
assert.Equal(t, 0, stats.LeafAlloc, "LeafAlloc")
}
assert.Equal(t, 1, stats.BucketN, "BucketN")
assert.Equal(t, 1, stats.InlineBucketN, "InlineBucketN")
assert.Equal(t, pageHeaderSize+leafPageElementSize+6, stats.InlineBucketInuse, "InlineBucketInuse")
return nil
})
})
}
func TestBucket_Stats_EmptyBucket(t *testing.T) {
withOpenDB(func(db *DB, path string) {
db.Update(func(tx *Tx) error {
// Add a bucket that fits on a single root leaf.
_, err := tx.CreateBucket([]byte("whozawhats"))
assert.NoError(t, err)
return nil
})
mustCheck(db)
db.View(func(tx *Tx) error {
b := tx.Bucket([]byte("whozawhats"))
stats := b.Stats()
assert.Equal(t, 0, stats.BranchPageN, "BranchPageN")
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
assert.Equal(t, 0, stats.LeafPageN, "LeafPageN")
assert.Equal(t, 0, stats.LeafOverflowN, "LeafOverflowN")
assert.Equal(t, 0, stats.KeyN, "KeyN")
assert.Equal(t, 1, stats.Depth, "Depth")
assert.Equal(t, 0, stats.BranchInuse, "BranchInuse")
assert.Equal(t, 0, stats.LeafInuse, "LeafInuse")
if os.Getpagesize() == 4096 {
// Incompatible page size
assert.Equal(t, 0, stats.BranchAlloc, "BranchAlloc")
assert.Equal(t, 0, stats.LeafAlloc, "LeafAlloc")
}
assert.Equal(t, 1, stats.BucketN, "BucketN")
assert.Equal(t, 1, stats.InlineBucketN, "InlineBucketN")
assert.Equal(t, pageHeaderSize, stats.InlineBucketInuse, "InlineBucketInuse")
return nil
})
})
}
// Ensure a bucket can calculate stats.
func TestBucket_Stats_Nested(t *testing.T) {
withOpenDB(func(db *DB, path string) {
db.Update(func(tx *Tx) error {
b, err := tx.CreateBucket([]byte("foo"))
assert.NoError(t, err)
for i := 0; i < 100; i++ {
b.Put([]byte(fmt.Sprintf("%02d", i)), []byte(fmt.Sprintf("%02d", i)))
}
bar, err := b.CreateBucket([]byte("bar"))
assert.NoError(t, err)
for i := 0; i < 10; i++ {
bar.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))
}
baz, err := bar.CreateBucket([]byte("baz"))
assert.NoError(t, err)
for i := 0; i < 10; i++ {
baz.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))
}
return nil
})
mustCheck(db)
db.View(func(tx *Tx) error {
b := tx.Bucket([]byte("foo"))
stats := b.Stats()
assert.Equal(t, 0, stats.BranchPageN, "BranchPageN")
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
assert.Equal(t, 2, stats.LeafPageN, "LeafPageN")
assert.Equal(t, 0, stats.LeafOverflowN, "LeafOverflowN")
assert.Equal(t, 122, stats.KeyN, "KeyN")
assert.Equal(t, 3, stats.Depth, "Depth")
assert.Equal(t, 0, stats.BranchInuse, "BranchInuse")
foo := pageHeaderSize // foo
foo += 101 * leafPageElementSize // foo leaf elements
foo += 100*2 + 100*2 // foo leaf key/values
foo += 3 + bucketHeaderSize // foo -> bar key/value
bar := pageHeaderSize // bar
bar += 11 * leafPageElementSize // bar leaf elements
bar += 10 + 10 // bar leaf key/values
bar += 3 + bucketHeaderSize // bar -> baz key/value
baz := pageHeaderSize // baz (inline)
baz += 10 * leafPageElementSize // baz leaf elements
baz += 10 + 10 // baz leaf key/values
assert.Equal(t, foo+bar+baz, stats.LeafInuse, "LeafInuse")
if os.Getpagesize() == 4096 {
// Incompatible page size
assert.Equal(t, 0, stats.BranchAlloc, "BranchAlloc")
assert.Equal(t, 8192, stats.LeafAlloc, "LeafAlloc")
}
assert.Equal(t, 3, stats.BucketN, "BucketN")
assert.Equal(t, 1, stats.InlineBucketN, "InlineBucketN")
assert.Equal(t, baz, stats.InlineBucketInuse, "InlineBucketInuse")
return nil
})
})
@ -652,19 +766,22 @@ func TestBucket_Stats_Large(t *testing.T) {
db.View(func(tx *Tx) error {
b := tx.Bucket([]byte("widgets"))
stats := b.Stats()
assert.Equal(t, 19, stats.BranchPageN)
assert.Equal(t, 0, stats.BranchOverflowN)
assert.Equal(t, 1291, stats.LeafPageN)
assert.Equal(t, 0, stats.LeafOverflowN)
assert.Equal(t, 100000, stats.KeyN)
assert.Equal(t, 3, stats.Depth)
if os.Getpagesize() != 4096 {
assert.Equal(t, 19, stats.BranchPageN, "BranchPageN")
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
assert.Equal(t, 1291, stats.LeafPageN, "LeafPageN")
assert.Equal(t, 0, stats.LeafOverflowN, "LeafOverflowN")
assert.Equal(t, 100000, stats.KeyN, "KeyN")
assert.Equal(t, 3, stats.Depth, "Depth")
assert.Equal(t, 27007, stats.BranchInuse, "BranchInuse")
assert.Equal(t, 2598436, stats.LeafInuse, "LeafInuse")
if os.Getpagesize() == 4096 {
// Incompatible page size
assert.Equal(t, 27289, stats.BranchInuse)
assert.Equal(t, 61440, stats.BranchAlloc)
assert.Equal(t, 2598276, stats.LeafInuse)
assert.Equal(t, 5246976, stats.LeafAlloc)
assert.Equal(t, 77824, stats.BranchAlloc, "BranchAlloc")
assert.Equal(t, 5287936, stats.LeafAlloc, "LeafAlloc")
}
assert.Equal(t, 1, stats.BucketN, "BucketN")
assert.Equal(t, 0, stats.InlineBucketN, "InlineBucketN")
assert.Equal(t, 0, stats.InlineBucketInuse, "InlineBucketInuse")
return nil
})
})

View File

@ -91,6 +91,14 @@ func NewApp() *cli.App {
Check(path)
},
},
{
Name: "stats",
Usage: "Aggregate statistics for all buckets matching specified prefix",
Action: func(c *cli.Context) {
path, name := c.Args().Get(0), c.Args().Get(1)
Stats(path, name)
},
},
{
Name: "bench",
Usage: "Performs a synthetic benchmark",

77
cmd/bolt/stats.go Normal file
View File

@ -0,0 +1,77 @@
package main
import (
"bytes"
"os"
"github.com/boltdb/bolt"
)
// Collect stats for all top level buckets matching the prefix.
func Stats(path, prefix string) {
if _, err := os.Stat(path); os.IsNotExist(err) {
fatal(err)
return
}
db, err := bolt.Open(path, 0600)
if err != nil {
fatal(err)
return
}
defer db.Close()
err = db.View(func(tx *bolt.Tx) error {
var s bolt.BucketStats
var count int
var prefix = []byte(prefix)
tx.ForEach(func(name []byte, b *bolt.Bucket) error {
if bytes.HasPrefix(name, prefix) {
s.Add(b.Stats())
count += 1
}
return nil
})
printf("Aggregate statistics for %d buckets\n\n", count)
println("Page count statistics")
printf("\tNumber of logical branch pages: %d\n", s.BranchPageN)
printf("\tNumber of physical branch overflow pages: %d\n", s.BranchOverflowN)
printf("\tNumber of logical leaf pages: %d\n", s.LeafPageN)
printf("\tNumber of physical leaf overflow pages: %d\n", s.LeafOverflowN)
println("Tree statistics")
printf("\tNumber of keys/value pairs: %d\n", s.KeyN)
printf("\tNumber of levels in B+tree: %d\n", s.Depth)
println("Page size utilization")
printf("\tBytes allocated for physical branch pages: %d\n", s.BranchAlloc)
var percentage int
if s.BranchAlloc != 0 {
percentage = int(float32(s.BranchInuse) * 100.0 / float32(s.BranchAlloc))
}
printf("\tBytes actually used for branch data: %d (%d%%)\n", s.BranchInuse, percentage)
printf("\tBytes allocated for physical leaf pages: %d\n", s.LeafAlloc)
percentage = 0
if s.LeafAlloc != 0 {
percentage = int(float32(s.LeafInuse) * 100.0 / float32(s.LeafAlloc))
}
printf("\tBytes actually used for leaf data: %d (%d%%)\n", s.LeafInuse, percentage)
println("Bucket statistics")
printf("\tTotal number of buckets: %d\n", s.BucketN)
percentage = int(float32(s.InlineBucketN) * 100.0 / float32(s.BucketN))
printf("\tTotal number on inlined buckets: %d (%d%%)\n", s.InlineBucketN, percentage)
percentage = 0
if s.LeafInuse != 0 {
percentage = int(float32(s.InlineBucketInuse) * 100.0 / float32(s.LeafInuse))
}
printf("\tBytes used for inlined buckets: %d (%d%%)\n", s.InlineBucketInuse, percentage)
return nil
})
if err != nil {
fatal(err)
return
}
}

62
cmd/bolt/stats_test.go Normal file
View File

@ -0,0 +1,62 @@
package main_test
import (
"os"
"strconv"
"testing"
"github.com/boltdb/bolt"
. "github.com/boltdb/bolt/cmd/bolt"
"github.com/stretchr/testify/assert"
)
func TestStats(t *testing.T) {
if os.Getpagesize() != 4096 {
t.Skip()
}
SetTestMode(true)
open(func(db *bolt.DB, path string) {
db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucket([]byte("foo"))
if err != nil {
return err
}
for i := 0; i < 10; i++ {
b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))
}
b, err = tx.CreateBucket([]byte("bar"))
if err != nil {
return err
}
for i := 0; i < 100; i++ {
b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))
}
b, err = tx.CreateBucket([]byte("baz"))
if err != nil {
return err
}
b.Put([]byte("key"), []byte("value"))
return nil
})
db.Close()
output := run("stats", path, "b")
assert.Equal(t, "Aggregate statistics for 2 buckets\n\n"+
"Page count statistics\n"+
"\tNumber of logical branch pages: 0\n"+
"\tNumber of physical branch overflow pages: 0\n"+
"\tNumber of logical leaf pages: 1\n"+
"\tNumber of physical leaf overflow pages: 0\n"+
"Tree statistics\n"+
"\tNumber of keys/value pairs: 101\n"+
"\tNumber of levels in B+tree: 1\n"+
"Page size utilization\n"+
"\tBytes allocated for physical branch pages: 0\n"+
"\tBytes actually used for branch data: 0 (0%)\n"+
"\tBytes allocated for physical leaf pages: 4096\n"+
"\tBytes actually used for leaf data: 1996 (48%)\n"+
"Bucket statistics\n"+
"\tTotal number of buckets: 2\n"+
"\tTotal number on inlined buckets: 1 (50%)\n"+
"\tBytes used for inlined buckets: 40 (2%)", output)
})
}