refactor both bolt and guts_cli based on the common package

Signed-off-by: Benjamin Wang <wachao@vmware.com>
pull/407/head
Benjamin Wang 2023-01-28 14:37:24 +08:00
parent 34595e7231
commit ea511567eb
30 changed files with 750 additions and 1454 deletions

View File

@ -2,20 +2,22 @@ package bbolt
import (
"testing"
"go.etcd.io/bbolt/internal/common"
)
func TestTx_allocatePageStats(t *testing.T) {
f := newTestFreelist()
ids := []pgid{2, 3}
ids := []common.Pgid{2, 3}
f.readIDs(ids)
tx := &Tx{
db: &DB{
freelist: f,
pageSize: defaultPageSize,
pageSize: common.DefaultPageSize,
},
meta: &meta{},
pages: make(map[pgid]*page),
meta: &common.Meta{},
pages: make(map[common.Pgid]*common.Page),
}
txStats := tx.Stats()

View File

@ -10,6 +10,8 @@ import (
"unsafe"
"golang.org/x/sys/unix"
"go.etcd.io/bbolt/internal/common"
)
// flock acquires an advisory lock on a file descriptor.
@ -36,7 +38,7 @@ func flock(db *DB, exclusive bool, timeout time.Duration) error {
// If we timed out then return an error.
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
return ErrTimeout
return common.ErrTimeout
}
// Wait for a bit and try again.

View File

@ -8,6 +8,8 @@ import (
"unsafe"
"golang.org/x/sys/windows"
"go.etcd.io/bbolt/internal/common"
)
// fdatasync flushes written data to a file descriptor.
@ -42,7 +44,7 @@ func flock(db *DB, exclusive bool, timeout time.Duration) error {
// If we timed oumercit then return an error.
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
return ErrTimeout
return common.ErrTimeout
}
// Wait for a bit and try again.
@ -93,7 +95,7 @@ func mmap(db *DB, sz int) error {
}
// Convert to a byte array.
db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr)))
db.data = (*[maxMapSize]byte)(unsafe.Pointer(addr))
db.datasz = sz
return nil

237
bucket.go
View File

@ -4,6 +4,8 @@ import (
"bytes"
"fmt"
"unsafe"
"go.etcd.io/bbolt/internal/common"
)
const (
@ -14,8 +16,6 @@ const (
MaxValueSize = (1 << 31) - 2
)
const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
const (
minFillPercent = 0.1
maxFillPercent = 1.0
@ -27,12 +27,12 @@ const DefaultFillPercent = 0.5
// Bucket represents a collection of key/value pairs inside the database.
type Bucket struct {
*bucket
tx *Tx // the associated transaction
buckets map[string]*Bucket // subbucket cache
page *page // inline page reference
rootNode *node // materialized node for the root page.
nodes map[pgid]*node // node cache
*common.InBucket
tx *Tx // the associated transaction
buckets map[string]*Bucket // subbucket cache
page *common.Page // inline page reference
rootNode *node // materialized node for the root page.
nodes map[common.Pgid]*node // node cache
// Sets the threshold for filling nodes when they split. By default,
// the bucket will fill to 50% but it can be useful to increase this
@ -42,21 +42,12 @@ type Bucket struct {
FillPercent float64
}
// bucket represents the on-file representation of a bucket.
// This is stored as the "value" of a bucket key. If the bucket is small enough,
// then its root page can be stored inline in the "value", after the bucket
// header. In the case of inline buckets, the "root" will be 0.
type bucket struct {
root pgid // page id of the bucket's root-level page
sequence uint64 // monotonically incrementing, used by NextSequence()
}
// newBucket returns a new bucket associated with a transaction.
func newBucket(tx *Tx) Bucket {
var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
if tx.writable {
b.buckets = make(map[string]*Bucket)
b.nodes = make(map[pgid]*node)
b.nodes = make(map[common.Pgid]*node)
}
return b
}
@ -67,8 +58,8 @@ func (b *Bucket) Tx() *Tx {
}
// Root returns the root of the bucket.
func (b *Bucket) Root() pgid {
return b.root
func (b *Bucket) Root() common.Pgid {
return b.RootPage()
}
// Writable returns whether the bucket is writable.
@ -105,7 +96,7 @@ func (b *Bucket) Bucket(name []byte) *Bucket {
k, v, flags := c.seek(name)
// Return nil if the key doesn't exist or it is not a bucket.
if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
if !bytes.Equal(name, k) || (flags&common.BucketLeafFlag) == 0 {
return nil
}
@ -125,8 +116,8 @@ func (b *Bucket) openBucket(value []byte) *Bucket {
// Unaligned access requires a copy to be made.
const unalignedMask = unsafe.Alignof(struct {
bucket
page
common.InBucket
common.Page
}{}) - 1
unaligned := uintptr(unsafe.Pointer(&value[0]))&unalignedMask != 0
if unaligned {
@ -136,15 +127,15 @@ func (b *Bucket) openBucket(value []byte) *Bucket {
// If this is a writable transaction then we need to copy the bucket entry.
// Read-only transactions can point directly at the mmap entry.
if b.tx.writable && !unaligned {
child.bucket = &bucket{}
*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
child.InBucket = &common.InBucket{}
*child.InBucket = *(*common.InBucket)(unsafe.Pointer(&value[0]))
} else {
child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
child.InBucket = (*common.InBucket)(unsafe.Pointer(&value[0]))
}
// Save a reference to the inline page if the bucket is inline.
if child.root == 0 {
child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
if child.RootPage() == 0 {
child.page = (*common.Page)(unsafe.Pointer(&value[common.BucketHeaderSize]))
}
return &child
@ -155,11 +146,11 @@ func (b *Bucket) openBucket(value []byte) *Bucket {
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
if b.tx.db == nil {
return nil, ErrTxClosed
return nil, common.ErrTxClosed
} else if !b.tx.writable {
return nil, ErrTxNotWritable
return nil, common.ErrTxNotWritable
} else if len(key) == 0 {
return nil, ErrBucketNameRequired
return nil, common.ErrBucketNameRequired
}
// Move cursor to correct position.
@ -168,15 +159,15 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
// Return an error if there is an existing key.
if bytes.Equal(key, k) {
if (flags & bucketLeafFlag) != 0 {
return nil, ErrBucketExists
if (flags & common.BucketLeafFlag) != 0 {
return nil, common.ErrBucketExists
}
return nil, ErrIncompatibleValue
return nil, common.ErrIncompatibleValue
}
// Create empty, inline bucket.
var bucket = Bucket{
bucket: &bucket{},
InBucket: &common.InBucket{},
rootNode: &node{isLeaf: true},
FillPercent: DefaultFillPercent,
}
@ -184,7 +175,7 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
// Insert into node.
key = cloneBytes(key)
c.node().put(key, key, value, 0, bucketLeafFlag)
c.node().put(key, key, value, 0, common.BucketLeafFlag)
// Since subbuckets are not allowed on inline buckets, we need to
// dereference the inline page, if it exists. This will cause the bucket
@ -199,7 +190,7 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
// The bucket instance is only valid for the lifetime of the transaction.
func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
child, err := b.CreateBucket(key)
if err == ErrBucketExists {
if err == common.ErrBucketExists {
return b.Bucket(key), nil
} else if err != nil {
return nil, err
@ -211,9 +202,9 @@ func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
// Returns an error if the bucket does not exist, or if the key represents a non-bucket value.
func (b *Bucket) DeleteBucket(key []byte) error {
if b.tx.db == nil {
return ErrTxClosed
return common.ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
return common.ErrTxNotWritable
}
// Move cursor to correct position.
@ -222,9 +213,9 @@ func (b *Bucket) DeleteBucket(key []byte) error {
// Return an error if bucket doesn't exist or is not a bucket.
if !bytes.Equal(key, k) {
return ErrBucketNotFound
} else if (flags & bucketLeafFlag) == 0 {
return ErrIncompatibleValue
return common.ErrBucketNotFound
} else if (flags & common.BucketLeafFlag) == 0 {
return common.ErrIncompatibleValue
}
// Recursively delete all child buckets.
@ -260,7 +251,7 @@ func (b *Bucket) Get(key []byte) []byte {
k, v, flags := b.Cursor().seek(key)
// Return nil if this is a bucket.
if (flags & bucketLeafFlag) != 0 {
if (flags & common.BucketLeafFlag) != 0 {
return nil
}
@ -277,15 +268,15 @@ func (b *Bucket) Get(key []byte) []byte {
// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
func (b *Bucket) Put(key []byte, value []byte) error {
if b.tx.db == nil {
return ErrTxClosed
return common.ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
return common.ErrTxNotWritable
} else if len(key) == 0 {
return ErrKeyRequired
return common.ErrKeyRequired
} else if len(key) > MaxKeySize {
return ErrKeyTooLarge
return common.ErrKeyTooLarge
} else if int64(len(value)) > MaxValueSize {
return ErrValueTooLarge
return common.ErrValueTooLarge
}
// Move cursor to correct position.
@ -293,8 +284,8 @@ func (b *Bucket) Put(key []byte, value []byte) error {
k, _, flags := c.seek(key)
// Return an error if there is an existing key with a bucket value.
if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
return ErrIncompatibleValue
if bytes.Equal(key, k) && (flags&common.BucketLeafFlag) != 0 {
return common.ErrIncompatibleValue
}
// Insert into node.
@ -309,9 +300,9 @@ func (b *Bucket) Put(key []byte, value []byte) error {
// Returns an error if the bucket was created from a read-only transaction.
func (b *Bucket) Delete(key []byte) error {
if b.tx.db == nil {
return ErrTxClosed
return common.ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
return common.ErrTxNotWritable
}
// Move cursor to correct position.
@ -324,8 +315,8 @@ func (b *Bucket) Delete(key []byte) error {
}
// Return an error if there is already existing bucket value.
if (flags & bucketLeafFlag) != 0 {
return ErrIncompatibleValue
if (flags & common.BucketLeafFlag) != 0 {
return common.ErrIncompatibleValue
}
// Delete the node if we have a matching key.
@ -335,44 +326,46 @@ func (b *Bucket) Delete(key []byte) error {
}
// Sequence returns the current integer for the bucket without incrementing it.
func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
func (b *Bucket) Sequence() uint64 {
return b.InSequence()
}
// SetSequence updates the sequence number for the bucket.
func (b *Bucket) SetSequence(v uint64) error {
if b.tx.db == nil {
return ErrTxClosed
return common.ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
return common.ErrTxNotWritable
}
// Materialize the root node if it hasn't been already so that the
// bucket will be saved during commit.
if b.rootNode == nil {
_ = b.node(b.root, nil)
_ = b.node(b.RootPage(), nil)
}
// Set the sequence.
b.bucket.sequence = v
b.SetInSequence(v)
return nil
}
// NextSequence returns an autoincrementing integer for the bucket.
func (b *Bucket) NextSequence() (uint64, error) {
if b.tx.db == nil {
return 0, ErrTxClosed
return 0, common.ErrTxClosed
} else if !b.Writable() {
return 0, ErrTxNotWritable
return 0, common.ErrTxNotWritable
}
// Materialize the root node if it hasn't been already so that the
// bucket will be saved during commit.
if b.rootNode == nil {
_ = b.node(b.root, nil)
_ = b.node(b.RootPage(), nil)
}
// Increment and return the sequence.
b.bucket.sequence++
return b.bucket.sequence, nil
b.IncSequence()
return b.Sequence(), nil
}
// ForEach executes a function for each key/value pair in a bucket.
@ -382,7 +375,7 @@ func (b *Bucket) NextSequence() (uint64, error) {
// the bucket; this will result in undefined behavior.
func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
if b.tx.db == nil {
return ErrTxClosed
return common.ErrTxClosed
}
c := b.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
@ -395,11 +388,11 @@ func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
func (b *Bucket) ForEachBucket(fn func(k []byte) error) error {
if b.tx.db == nil {
return ErrTxClosed
return common.ErrTxClosed
}
c := b.Cursor()
for k, _, flags := c.first(); k != nil; k, _, flags = c.next() {
if flags&bucketLeafFlag != 0 {
if flags&common.BucketLeafFlag != 0 {
if err := fn(k); err != nil {
return err
}
@ -413,64 +406,64 @@ func (b *Bucket) Stats() BucketStats {
var s, subStats BucketStats
pageSize := b.tx.db.pageSize
s.BucketN += 1
if b.root == 0 {
if b.RootPage() == 0 {
s.InlineBucketN += 1
}
b.forEachPage(func(p *page, depth int, pgstack []pgid) {
if (p.flags & leafPageFlag) != 0 {
s.KeyN += int(p.count)
b.forEachPage(func(p *common.Page, depth int, pgstack []common.Pgid) {
if (p.Flags() & common.LeafPageFlag) != 0 {
s.KeyN += int(p.Count())
// used totals the used bytes for the page
used := pageHeaderSize
used := common.PageHeaderSize
if p.count != 0 {
if p.Count() != 0 {
// If page has any elements, add all element headers.
used += leafPageElementSize * uintptr(p.count-1)
used += common.LeafPageElementSize * uintptr(p.Count()-1)
// Add all element key, value sizes.
// The computation takes advantage of the fact that the position
// of the last element's key/value equals to the total of the sizes
// of all previous elements' keys and values.
// It also includes the last element's header.
lastElement := p.leafPageElement(p.count - 1)
used += uintptr(lastElement.pos + lastElement.ksize + lastElement.vsize)
lastElement := p.LeafPageElement(p.Count() - 1)
used += uintptr(lastElement.Pos() + lastElement.Ksize() + lastElement.Vsize())
}
if b.root == 0 {
if b.RootPage() == 0 {
// For inlined bucket just update the inline stats
s.InlineBucketInuse += int(used)
} else {
// For non-inlined bucket update all the leaf stats
s.LeafPageN++
s.LeafInuse += int(used)
s.LeafOverflowN += int(p.overflow)
s.LeafOverflowN += int(p.Overflow())
// Collect stats from sub-buckets.
// Do that by iterating over all element headers
// looking for the ones with the bucketLeafFlag.
for i := uint16(0); i < p.count; i++ {
e := p.leafPageElement(i)
if (e.flags & bucketLeafFlag) != 0 {
for i := uint16(0); i < p.Count(); i++ {
e := p.LeafPageElement(i)
if (e.Flags() & common.BucketLeafFlag) != 0 {
// For any bucket element, open the element value
// and recursively call Stats on the contained bucket.
subStats.Add(b.openBucket(e.value()).Stats())
subStats.Add(b.openBucket(e.Value()).Stats())
}
}
}
} else if (p.flags & branchPageFlag) != 0 {
} else if (p.Flags() & common.BranchPageFlag) != 0 {
s.BranchPageN++
lastElement := p.branchPageElement(p.count - 1)
lastElement := p.BranchPageElement(p.Count() - 1)
// used totals the used bytes for the page
// Add header and all element headers.
used := pageHeaderSize + (branchPageElementSize * uintptr(p.count-1))
used := common.PageHeaderSize + (common.BranchPageElementSize * uintptr(p.Count()-1))
// Add size of all keys and values.
// Again, use the fact that last element's position equals to
// the total of key, value sizes of all previous elements.
used += uintptr(lastElement.pos + lastElement.ksize)
used += uintptr(lastElement.Pos() + lastElement.Ksize())
s.BranchInuse += int(used)
s.BranchOverflowN += int(p.overflow)
s.BranchOverflowN += int(p.Overflow())
}
// Keep track of maximum page depth.
@ -491,29 +484,29 @@ func (b *Bucket) Stats() BucketStats {
}
// forEachPage iterates over every page in a bucket, including inline pages.
func (b *Bucket) forEachPage(fn func(*page, int, []pgid)) {
func (b *Bucket) forEachPage(fn func(*common.Page, int, []common.Pgid)) {
// If we have an inline page then just use that.
if b.page != nil {
fn(b.page, 0, []pgid{b.root})
fn(b.page, 0, []common.Pgid{b.RootPage()})
return
}
// Otherwise traverse the page hierarchy.
b.tx.forEachPage(b.root, fn)
b.tx.forEachPage(b.RootPage(), fn)
}
// forEachPageNode iterates over every page (or node) in a bucket.
// This also includes inline pages.
func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
func (b *Bucket) forEachPageNode(fn func(*common.Page, *node, int)) {
// If we have an inline page or root node then just use that.
if b.page != nil {
fn(b.page, nil, 0)
return
}
b._forEachPageNode(b.root, 0, fn)
b._forEachPageNode(b.RootPage(), 0, fn)
}
func (b *Bucket) _forEachPageNode(pgId pgid, depth int, fn func(*page, *node, int)) {
func (b *Bucket) _forEachPageNode(pgId common.Pgid, depth int, fn func(*common.Page, *node, int)) {
var p, n = b.pageNode(pgId)
// Execute function.
@ -521,10 +514,10 @@ func (b *Bucket) _forEachPageNode(pgId pgid, depth int, fn func(*page, *node, in
// Recursively loop over children.
if p != nil {
if (p.flags & branchPageFlag) != 0 {
for i := 0; i < int(p.count); i++ {
elem := p.branchPageElement(uint16(i))
b._forEachPageNode(elem.pgid, depth+1, fn)
if (p.Flags() & common.BranchPageFlag) != 0 {
for i := 0; i < int(p.Count()); i++ {
elem := p.BranchPageElement(uint16(i))
b._forEachPageNode(elem.Pgid(), depth+1, fn)
}
}
} else {
@ -553,9 +546,9 @@ func (b *Bucket) spill() error {
}
// Update the child bucket header in this bucket.
value = make([]byte, unsafe.Sizeof(bucket{}))
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
*bucket = *child.bucket
value = make([]byte, unsafe.Sizeof(common.InBucket{}))
var bucket = (*common.InBucket)(unsafe.Pointer(&value[0]))
*bucket = *child.InBucket
}
// Skip writing the bucket if there are no materialized nodes.
@ -569,10 +562,10 @@ func (b *Bucket) spill() error {
if !bytes.Equal([]byte(name), k) {
panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
}
if flags&bucketLeafFlag == 0 {
if flags&common.BucketLeafFlag == 0 {
panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
}
c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
c.node().put([]byte(name), []byte(name), value, 0, common.BucketLeafFlag)
}
// Ignore if there's not a materialized root node.
@ -587,16 +580,16 @@ func (b *Bucket) spill() error {
b.rootNode = b.rootNode.root()
// Update the root node for this bucket.
if b.rootNode.pgid >= b.tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
if b.rootNode.pgid >= b.tx.meta.Pgid() {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.Pgid()))
}
b.root = b.rootNode.pgid
b.SetRootPage(b.rootNode.pgid)
return nil
}
// inlineable returns true if a bucket is small enough to be written inline
// and if it contains no subbuckets. Otherwise returns false.
// and if it contains no subbuckets. Otherwise, returns false.
func (b *Bucket) inlineable() bool {
var n = b.rootNode
@ -607,11 +600,11 @@ func (b *Bucket) inlineable() bool {
// Bucket is not inlineable if it contains subbuckets or if it goes beyond
// our threshold for inline bucket size.
var size = pageHeaderSize
var size = common.PageHeaderSize
for _, inode := range n.inodes {
size += leafPageElementSize + uintptr(len(inode.key)) + uintptr(len(inode.value))
size += common.LeafPageElementSize + uintptr(len(inode.key)) + uintptr(len(inode.value))
if inode.flags&bucketLeafFlag != 0 {
if inode.flags&common.BucketLeafFlag != 0 {
return false
} else if size > b.maxInlineBucketSize() {
return false
@ -630,14 +623,14 @@ func (b *Bucket) maxInlineBucketSize() uintptr {
func (b *Bucket) write() []byte {
// Allocate the appropriate size.
var n = b.rootNode
var value = make([]byte, bucketHeaderSize+n.size())
var value = make([]byte, common.BucketHeaderSize+n.size())
// Write a bucket header.
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
*bucket = *b.bucket
var bucket = (*common.InBucket)(unsafe.Pointer(&value[0]))
*bucket = *b.InBucket
// Convert byte slice to a fake page and write the root node.
var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
var p = (*common.Page)(unsafe.Pointer(&value[common.BucketHeaderSize]))
n.write(p)
return value
@ -654,8 +647,8 @@ func (b *Bucket) rebalance() {
}
// node creates a node from a page and associates it with a given parent.
func (b *Bucket) node(pgId pgid, parent *node) *node {
_assert(b.nodes != nil, "nodes map expected")
func (b *Bucket) node(pgId common.Pgid, parent *node) *node {
common.Assert(b.nodes != nil, "nodes map expected")
// Retrieve node if it's already been created.
if n := b.nodes[pgId]; n != nil {
@ -688,19 +681,19 @@ func (b *Bucket) node(pgId pgid, parent *node) *node {
// free recursively frees all pages in the bucket.
func (b *Bucket) free() {
if b.root == 0 {
if b.RootPage() == 0 {
return
}
var tx = b.tx
b.forEachPageNode(func(p *page, n *node, _ int) {
b.forEachPageNode(func(p *common.Page, n *node, _ int) {
if p != nil {
tx.db.freelist.free(tx.meta.txid, p)
tx.db.freelist.free(tx.meta.Txid(), p)
} else {
n.free()
}
})
b.root = 0
b.SetRootPage(0)
}
// dereference removes all references to the old mmap.
@ -715,11 +708,11 @@ func (b *Bucket) dereference() {
}
// pageNode returns the in-memory node, if it exists.
// Otherwise returns the underlying page.
func (b *Bucket) pageNode(id pgid) (*page, *node) {
// Otherwise, returns the underlying page.
func (b *Bucket) pageNode(id common.Pgid) (*common.Page, *node) {
// Inline buckets have a fake page embedded in their value so treat them
// differently. We'll return the rootNode (if available) or the fake page.
if b.root == 0 {
if b.RootPage() == 0 {
if id != 0 {
panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
}

View File

@ -18,6 +18,7 @@ import (
bolt "go.etcd.io/bbolt"
"go.etcd.io/bbolt/internal/btesting"
"go.etcd.io/bbolt/internal/common"
)
// Ensure that a bucket that gets a non-existent key returns nil.
@ -246,7 +247,7 @@ func TestBucket_Put_IncompatibleValue(t *testing.T) {
if _, err := tx.Bucket([]byte("widgets")).CreateBucket([]byte("foo")); err != nil {
t.Fatal(err)
}
if err := b0.Put([]byte("foo"), []byte("bar")); err != bolt.ErrIncompatibleValue {
if err := b0.Put([]byte("foo"), []byte("bar")); err != common.ErrIncompatibleValue {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -272,7 +273,7 @@ func TestBucket_Put_Closed(t *testing.T) {
t.Fatal(err)
}
if err := b.Put([]byte("foo"), []byte("bar")); err != bolt.ErrTxClosed {
if err := b.Put([]byte("foo"), []byte("bar")); err != common.ErrTxClosed {
t.Fatalf("unexpected error: %s", err)
}
}
@ -292,7 +293,7 @@ func TestBucket_Put_ReadOnly(t *testing.T) {
if err := db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("widgets"))
if err := b.Put([]byte("foo"), []byte("bar")); err != bolt.ErrTxNotWritable {
if err := b.Put([]byte("foo"), []byte("bar")); err != common.ErrTxNotWritable {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -560,7 +561,7 @@ func TestBucket_Delete_Bucket(t *testing.T) {
if _, err := b.CreateBucket([]byte("foo")); err != nil {
t.Fatal(err)
}
if err := b.Delete([]byte("foo")); err != bolt.ErrIncompatibleValue {
if err := b.Delete([]byte("foo")); err != common.ErrIncompatibleValue {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -583,7 +584,7 @@ func TestBucket_Delete_ReadOnly(t *testing.T) {
}
if err := db.View(func(tx *bolt.Tx) error {
if err := tx.Bucket([]byte("widgets")).Delete([]byte("foo")); err != bolt.ErrTxNotWritable {
if err := tx.Bucket([]byte("widgets")).Delete([]byte("foo")); err != common.ErrTxNotWritable {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -609,7 +610,7 @@ func TestBucket_Delete_Closed(t *testing.T) {
if err := tx.Rollback(); err != nil {
t.Fatal(err)
}
if err := b.Delete([]byte("foo")); err != bolt.ErrTxClosed {
if err := b.Delete([]byte("foo")); err != common.ErrTxClosed {
t.Fatalf("unexpected error: %s", err)
}
}
@ -780,7 +781,7 @@ func TestBucket_CreateBucket_IncompatibleValue(t *testing.T) {
if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil {
t.Fatal(err)
}
if _, err := widgets.CreateBucket([]byte("foo")); err != bolt.ErrIncompatibleValue {
if _, err := widgets.CreateBucket([]byte("foo")); err != common.ErrIncompatibleValue {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -801,7 +802,7 @@ func TestBucket_DeleteBucket_IncompatibleValue(t *testing.T) {
if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil {
t.Fatal(err)
}
if err := tx.Bucket([]byte("widgets")).DeleteBucket([]byte("foo")); err != bolt.ErrIncompatibleValue {
if err := tx.Bucket([]byte("widgets")).DeleteBucket([]byte("foo")); err != common.ErrIncompatibleValue {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -943,7 +944,7 @@ func TestBucket_NextSequence_ReadOnly(t *testing.T) {
if err := db.View(func(tx *bolt.Tx) error {
_, err := tx.Bucket([]byte("widgets")).NextSequence()
if err != bolt.ErrTxNotWritable {
if err != common.ErrTxNotWritable {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -966,7 +967,7 @@ func TestBucket_NextSequence_Closed(t *testing.T) {
if err := tx.Rollback(); err != nil {
t.Fatal(err)
}
if _, err := b.NextSequence(); err != bolt.ErrTxClosed {
if _, err := b.NextSequence(); err != common.ErrTxClosed {
t.Fatal(err)
}
}
@ -1158,7 +1159,7 @@ func TestBucket_ForEach_Closed(t *testing.T) {
t.Fatal(err)
}
if err := b.ForEach(func(k, v []byte) error { return nil }); err != bolt.ErrTxClosed {
if err := b.ForEach(func(k, v []byte) error { return nil }); err != common.ErrTxClosed {
t.Fatalf("unexpected error: %s", err)
}
}
@ -1172,10 +1173,10 @@ func TestBucket_Put_EmptyKey(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if err := b.Put([]byte(""), []byte("bar")); err != bolt.ErrKeyRequired {
if err := b.Put([]byte(""), []byte("bar")); err != common.ErrKeyRequired {
t.Fatalf("unexpected error: %s", err)
}
if err := b.Put(nil, []byte("bar")); err != bolt.ErrKeyRequired {
if err := b.Put(nil, []byte("bar")); err != common.ErrKeyRequired {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -1192,7 +1193,7 @@ func TestBucket_Put_KeyTooLarge(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if err := b.Put(make([]byte, 32769), []byte("bar")); err != bolt.ErrKeyTooLarge {
if err := b.Put(make([]byte, 32769), []byte("bar")); err != common.ErrKeyTooLarge {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -1215,7 +1216,7 @@ func TestBucket_Put_ValueTooLarge(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if err := b.Put([]byte("foo"), make([]byte, bolt.MaxValueSize+1)); err != bolt.ErrValueTooLarge {
if err := b.Put([]byte("foo"), make([]byte, bolt.MaxValueSize+1)); err != common.ErrValueTooLarge {
t.Fatalf("unexpected error: %s", err)
}
return nil

View File

@ -18,11 +18,10 @@ import (
"time"
"unicode"
"unicode/utf8"
"unsafe"
"go.etcd.io/bbolt/internal/guts_cli"
bolt "go.etcd.io/bbolt"
"go.etcd.io/bbolt/internal/common"
"go.etcd.io/bbolt/internal/guts_cli"
)
var (
@ -52,12 +51,6 @@ var (
// ErrBucketRequired is returned when a bucket is not specified.
ErrBucketRequired = errors.New("bucket required")
// ErrBucketNotFound is returned when a bucket is not found.
ErrBucketNotFound = errors.New("bucket not found")
// ErrKeyRequired is returned when a key is not specified.
ErrKeyRequired = errors.New("key required")
// ErrKeyNotFound is returned when a key is not found.
ErrKeyNotFound = errors.New("key not found")
)
@ -509,16 +502,16 @@ func (cmd *pageItemCommand) Run(args ...string) error {
return nil
}
// leafPageElement retrieves a leaf page element.
func (cmd *pageItemCommand) leafPageElement(pageBytes []byte, index uint16) (*guts_cli.LeafPageElement, error) {
p := (*guts_cli.Page)(unsafe.Pointer(&pageBytes[0]))
func (cmd *pageItemCommand) validateLeafPage(pageBytes []byte, index uint16) (*common.Page, error) {
p := common.LoadPage(pageBytes)
if index >= p.Count() {
return nil, fmt.Errorf("leafPageElement: expected item index less than %d, but got %d.", p.Count(), index)
return nil, fmt.Errorf("leafPageElement: expected item index less than %d, but got %d", p.Count(), index)
}
if p.Type() != "leaf" {
return nil, fmt.Errorf("leafPageElement: expected page type of 'leaf', but got '%s'", p.Type())
if p.Typ() != "leaf" {
return nil, fmt.Errorf("leafPageElement: expected page type of 'leaf', but got '%s'", p.Typ())
}
return p.LeafPageElement(index), nil
return p, nil
}
const FORMAT_MODES = "auto|ascii-encoded|hex|bytes|redacted"
@ -568,19 +561,21 @@ func writelnBytes(w io.Writer, b []byte, format string) error {
// PrintLeafItemKey writes the bytes of a leaf element's key.
func (cmd *pageItemCommand) PrintLeafItemKey(w io.Writer, pageBytes []byte, index uint16, format string) error {
e, err := cmd.leafPageElement(pageBytes, index)
p, err := cmd.validateLeafPage(pageBytes, index)
if err != nil {
return err
}
e := p.LeafPageElement(index)
return writelnBytes(w, e.Key(), format)
}
// PrintLeafItemKey writes the bytes of a leaf element's value.
// PrintLeafItemValue writes the bytes of a leaf element's value.
func (cmd *pageItemCommand) PrintLeafItemValue(w io.Writer, pageBytes []byte, index uint16, format string) error {
e, err := cmd.leafPageElement(pageBytes, index)
p, err := cmd.validateLeafPage(pageBytes, index)
if err != nil {
return err
}
e := p.LeafPageElement(index)
return writelnBytes(w, e.Value(), format)
}
@ -931,12 +926,12 @@ func (cmd *keysCommand) Run(args ...string) error {
// Find bucket.
var lastbucket *bolt.Bucket = tx.Bucket([]byte(buckets[0]))
if lastbucket == nil {
return ErrBucketNotFound
return common.ErrBucketNotFound
}
for _, bucket := range buckets[1:] {
lastbucket = lastbucket.Bucket([]byte(bucket))
if lastbucket == nil {
return ErrBucketNotFound
return common.ErrBucketNotFound
}
}
@ -1007,7 +1002,7 @@ func (cmd *getCommand) Run(args ...string) error {
} else if len(buckets) == 0 {
return ErrBucketRequired
} else if len(key) == 0 {
return ErrKeyRequired
return common.ErrKeyRequired
}
// Open database.
@ -1022,12 +1017,12 @@ func (cmd *getCommand) Run(args ...string) error {
// Find bucket.
var lastbucket *bolt.Bucket = tx.Bucket([]byte(buckets[0]))
if lastbucket == nil {
return ErrBucketNotFound
return common.ErrBucketNotFound
}
for _, bucket := range buckets[1:] {
lastbucket = lastbucket.Bucket([]byte(bucket))
if lastbucket == nil {
return ErrBucketNotFound
return common.ErrBucketNotFound
}
}

View File

@ -8,6 +8,7 @@ import (
"os"
"strings"
"go.etcd.io/bbolt/internal/common"
"go.etcd.io/bbolt/internal/guts_cli"
)
@ -113,12 +114,12 @@ func (cmd *pageCommand) printPage(path string, pageID uint64, formatValue string
// Print basic page info.
fmt.Fprintf(cmd.Stdout, "Page ID: %d\n", p.Id())
fmt.Fprintf(cmd.Stdout, "Page Type: %s\n", p.Type())
fmt.Fprintf(cmd.Stdout, "Page Type: %s\n", p.Typ())
fmt.Fprintf(cmd.Stdout, "Total Size: %d bytes\n", len(buf))
fmt.Fprintf(cmd.Stdout, "Overflow pages: %d\n", p.Overflow())
// Print type-specific data.
switch p.Type() {
switch p.Typ() {
case "meta":
err = cmd.PrintMeta(cmd.Stdout, buf)
case "leaf":
@ -136,14 +137,14 @@ func (cmd *pageCommand) printPage(path string, pageID uint64, formatValue string
// PrintMeta prints the data from the meta page.
func (cmd *pageCommand) PrintMeta(w io.Writer, buf []byte) error {
m := guts_cli.LoadPageMeta(buf)
m := common.LoadPageMeta(buf)
m.Print(w)
return nil
}
// PrintLeaf prints the data for a leaf page.
func (cmd *pageCommand) PrintLeaf(w io.Writer, buf []byte, formatValue string) error {
p := guts_cli.LoadPage(buf)
p := common.LoadPage(buf)
// Print number of items.
fmt.Fprintf(w, "Item Count: %d\n", p.Count())
@ -182,7 +183,7 @@ func (cmd *pageCommand) PrintLeaf(w io.Writer, buf []byte, formatValue string) e
// PrintBranch prints the data for a leaf page.
func (cmd *pageCommand) PrintBranch(w io.Writer, buf []byte) error {
p := guts_cli.LoadPage(buf)
p := common.LoadPage(buf)
// Print number of items.
fmt.Fprintf(w, "Item Count: %d\n", p.Count())
@ -200,7 +201,7 @@ func (cmd *pageCommand) PrintBranch(w io.Writer, buf []byte) error {
k = fmt.Sprintf("%x", string(e.Key()))
}
fmt.Fprintf(w, "%s: <pgid=%d>\n", k, e.PgId())
fmt.Fprintf(w, "%s: <pgid=%d>\n", k, e.Pgid())
}
fmt.Fprintf(w, "\n")
return nil
@ -208,16 +209,17 @@ func (cmd *pageCommand) PrintBranch(w io.Writer, buf []byte) error {
// PrintFreelist prints the data for a freelist page.
func (cmd *pageCommand) PrintFreelist(w io.Writer, buf []byte) error {
p := guts_cli.LoadPage(buf)
p := common.LoadPage(buf)
// Print number of items.
fmt.Fprintf(w, "Item Count: %d\n", p.FreelistPageCount())
_, cnt := p.FreelistPageCount()
fmt.Fprintf(w, "Item Count: %d\n", cnt)
fmt.Fprintf(w, "Overflow: %d\n", p.Overflow())
fmt.Fprintf(w, "\n")
// Print each page in the freelist.
ids := p.FreelistPagePages()
ids := p.FreelistPageIds()
for _, ids := range ids {
fmt.Fprintf(w, "%d\n", ids)
}
@ -244,7 +246,7 @@ func (cmd *pageCommand) PrintPage(w io.Writer, r io.ReaderAt, pageID int, pageSi
for offset := 0; offset < pageSize; offset += bytesPerLineN {
// Retrieve current 16-byte line.
line := buf[offset : offset+bytesPerLineN]
isLastLine := (offset == (pageSize - bytesPerLineN))
isLastLine := offset == (pageSize - bytesPerLineN)
// If it's the same as the previous line then print a skip.
if bytes.Equal(line, prev) && !isLastLine {

View File

@ -9,7 +9,7 @@ import (
"strconv"
"strings"
"go.etcd.io/bbolt/internal/guts_cli"
"go.etcd.io/bbolt/internal/common"
"go.etcd.io/bbolt/internal/surgeon"
)
@ -224,7 +224,7 @@ func (cmd *copyPageCommand) Run(args ...string) error {
}
// copy the page
if err := surgeon.CopyPage(cmd.dstPath, guts_cli.Pgid(srcPageId), guts_cli.Pgid(dstPageId)); err != nil {
if err := surgeon.CopyPage(cmd.dstPath, common.Pgid(srcPageId), common.Pgid(dstPageId)); err != nil {
return fmt.Errorf("copyPageCommand failed: %w", err)
}
@ -279,7 +279,7 @@ func (cmd *clearPageCommand) Run(args ...string) error {
return err
}
if err := surgeon.ClearPage(cmd.dstPath, guts_cli.Pgid(pageId)); err != nil {
if err := surgeon.ClearPage(cmd.dstPath, common.Pgid(pageId)); err != nil {
return fmt.Errorf("clearPageCommand failed: %w", err)
}

View File

@ -11,7 +11,7 @@ import (
bolt "go.etcd.io/bbolt"
"go.etcd.io/bbolt/internal/btesting"
"go.etcd.io/bbolt/internal/guts_cli"
"go.etcd.io/bbolt/internal/common"
)
func TestSurgery_RevertMetaPage(t *testing.T) {
@ -28,8 +28,8 @@ func TestSurgery_RevertMetaPage(t *testing.T) {
// Read both meta0 and meta1 from srcFile
srcBuf0 := readPage(t, srcPath, 0, pageSize)
srcBuf1 := readPage(t, srcPath, 1, pageSize)
meta0Page := guts_cli.LoadPageMeta(srcBuf0)
meta1Page := guts_cli.LoadPageMeta(srcBuf1)
meta0Page := common.LoadPageMeta(srcBuf0)
meta1Page := common.LoadPageMeta(srcBuf1)
// Get the non-active meta page
nonActiveSrcBuf := srcBuf0
@ -115,7 +115,7 @@ func TestSurgery_ClearPage(t *testing.T) {
t.Log("Verify result")
dstPageId3Data := readPage(t, dstPath, 3, pageSize)
p := guts_cli.LoadPage(dstPageId3Data)
p := common.LoadPage(dstPageId3Data)
assert.Equal(t, uint16(0), p.Count())
assert.Equal(t, uint32(0), p.Overflow())
}

View File

@ -4,6 +4,8 @@ import (
"bytes"
"fmt"
"sort"
"go.etcd.io/bbolt/internal/common"
)
// Cursor represents an iterator that can traverse over all key/value pairs in a bucket
@ -30,9 +32,9 @@ func (c *Cursor) Bucket() *Bucket {
// If the bucket is empty then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) First() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
common.Assert(c.bucket.tx.db != nil, "tx closed")
k, v, flags := c.first()
if (flags & uint32(bucketLeafFlag)) != 0 {
if (flags & uint32(common.BucketLeafFlag)) != 0 {
return k, nil
}
return k, v
@ -40,7 +42,7 @@ func (c *Cursor) First() (key []byte, value []byte) {
func (c *Cursor) first() (key []byte, value []byte, flags uint32) {
c.stack = c.stack[:0]
p, n := c.bucket.pageNode(c.bucket.root)
p, n := c.bucket.pageNode(c.bucket.RootPage())
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
c.goToFirstElementOnTheStack()
@ -51,7 +53,7 @@ func (c *Cursor) first() (key []byte, value []byte, flags uint32) {
}
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
if (flags & uint32(common.BucketLeafFlag)) != 0 {
return k, nil, flags
}
return k, v, flags
@ -61,9 +63,9 @@ func (c *Cursor) first() (key []byte, value []byte, flags uint32) {
// If the bucket is empty then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Last() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
common.Assert(c.bucket.tx.db != nil, "tx closed")
c.stack = c.stack[:0]
p, n := c.bucket.pageNode(c.bucket.root)
p, n := c.bucket.pageNode(c.bucket.RootPage())
ref := elemRef{page: p, node: n}
ref.index = ref.count() - 1
c.stack = append(c.stack, ref)
@ -80,7 +82,7 @@ func (c *Cursor) Last() (key []byte, value []byte) {
}
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
if (flags & uint32(common.BucketLeafFlag)) != 0 {
return k, nil
}
return k, v
@ -90,9 +92,9 @@ func (c *Cursor) Last() (key []byte, value []byte) {
// If the cursor is at the end of the bucket then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Next() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
common.Assert(c.bucket.tx.db != nil, "tx closed")
k, v, flags := c.next()
if (flags & uint32(bucketLeafFlag)) != 0 {
if (flags & uint32(common.BucketLeafFlag)) != 0 {
return k, nil
}
return k, v
@ -102,9 +104,9 @@ func (c *Cursor) Next() (key []byte, value []byte) {
// If the cursor is at the beginning of the bucket then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Prev() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
common.Assert(c.bucket.tx.db != nil, "tx closed")
k, v, flags := c.prev()
if (flags & uint32(bucketLeafFlag)) != 0 {
if (flags & uint32(common.BucketLeafFlag)) != 0 {
return k, nil
}
return k, v
@ -115,7 +117,7 @@ func (c *Cursor) Prev() (key []byte, value []byte) {
// follow, a nil key is returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
common.Assert(c.bucket.tx.db != nil, "tx closed")
k, v, flags := c.seek(seek)
@ -126,7 +128,7 @@ func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
if k == nil {
return nil, nil
} else if (flags & uint32(bucketLeafFlag)) != 0 {
} else if (flags & uint32(common.BucketLeafFlag)) != 0 {
return k, nil
}
return k, v
@ -136,15 +138,15 @@ func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
// Delete fails if current key/value is a bucket or if the transaction is not writable.
func (c *Cursor) Delete() error {
if c.bucket.tx.db == nil {
return ErrTxClosed
return common.ErrTxClosed
} else if !c.bucket.Writable() {
return ErrTxNotWritable
return common.ErrTxNotWritable
}
key, _, flags := c.keyValue()
// Return an error if current value is a bucket.
if (flags & bucketLeafFlag) != 0 {
return ErrIncompatibleValue
if (flags & common.BucketLeafFlag) != 0 {
return common.ErrIncompatibleValue
}
c.node().del(key)
@ -156,7 +158,7 @@ func (c *Cursor) Delete() error {
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
// Start from root page/node and traverse to correct page.
c.stack = c.stack[:0]
c.search(seek, c.bucket.root)
c.search(seek, c.bucket.RootPage())
// If this is a bucket then return a nil value.
return c.keyValue()
@ -172,11 +174,11 @@ func (c *Cursor) goToFirstElementOnTheStack() {
}
// Keep adding pages pointing to the first element to the stack.
var pgId pgid
var pgId common.Pgid
if ref.node != nil {
pgId = ref.node.inodes[ref.index].pgid
} else {
pgId = ref.page.branchPageElement(uint16(ref.index)).pgid
pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid()
}
p, n := c.bucket.pageNode(pgId)
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
@ -193,11 +195,11 @@ func (c *Cursor) last() {
}
// Keep adding pages pointing to the last element in the stack.
var pgId pgid
var pgId common.Pgid
if ref.node != nil {
pgId = ref.node.inodes[ref.index].pgid
} else {
pgId = ref.page.branchPageElement(uint16(ref.index)).pgid
pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid()
}
p, n := c.bucket.pageNode(pgId)
@ -268,10 +270,10 @@ func (c *Cursor) prev() (key []byte, value []byte, flags uint32) {
}
// search recursively performs a binary search against a given page/node until it finds a given key.
func (c *Cursor) search(key []byte, pgId pgid) {
func (c *Cursor) search(key []byte, pgId common.Pgid) {
p, n := c.bucket.pageNode(pgId)
if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
if p != nil && (p.Flags()&(common.BranchPageFlag|common.LeafPageFlag)) == 0 {
panic(fmt.Sprintf("invalid page type: %d: %x", p.Id(), p.Flags()))
}
e := elemRef{page: p, node: n}
c.stack = append(c.stack, e)
@ -309,15 +311,15 @@ func (c *Cursor) searchNode(key []byte, n *node) {
c.search(key, n.inodes[index].pgid)
}
func (c *Cursor) searchPage(key []byte, p *page) {
func (c *Cursor) searchPage(key []byte, p *common.Page) {
// Binary search for the correct range.
inodes := p.branchPageElements()
inodes := p.BranchPageElements()
var exact bool
index := sort.Search(int(p.count), func(i int) bool {
index := sort.Search(int(p.Count()), func(i int) bool {
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
ret := bytes.Compare(inodes[i].key(), key)
ret := bytes.Compare(inodes[i].Key(), key)
if ret == 0 {
exact = true
}
@ -329,7 +331,7 @@ func (c *Cursor) searchPage(key []byte, p *page) {
c.stack[len(c.stack)-1].index = index
// Recursively search to the next page.
c.search(key, inodes[index].pgid)
c.search(key, inodes[index].Pgid())
}
// nsearch searches the leaf node on the top of the stack for a key.
@ -347,9 +349,9 @@ func (c *Cursor) nsearch(key []byte) {
}
// If we have a page then search its leaf elements.
inodes := p.leafPageElements()
index := sort.Search(int(p.count), func(i int) bool {
return bytes.Compare(inodes[i].key(), key) != -1
inodes := p.LeafPageElements()
index := sort.Search(int(p.Count()), func(i int) bool {
return bytes.Compare(inodes[i].Key(), key) != -1
})
e.index = index
}
@ -370,13 +372,13 @@ func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
}
// Or retrieve value from page.
elem := ref.page.leafPageElement(uint16(ref.index))
return elem.key(), elem.value(), elem.flags
elem := ref.page.LeafPageElement(uint16(ref.index))
return elem.Key(), elem.Value(), elem.Flags()
}
// node returns the node that the cursor is currently positioned on.
func (c *Cursor) node() *node {
_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
common.Assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
// If the top of the stack is a leaf node then just return it.
if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
@ -386,19 +388,19 @@ func (c *Cursor) node() *node {
// Start from root and traverse down the hierarchy.
var n = c.stack[0].node
if n == nil {
n = c.bucket.node(c.stack[0].page.id, nil)
n = c.bucket.node(c.stack[0].page.Id(), nil)
}
for _, ref := range c.stack[:len(c.stack)-1] {
_assert(!n.isLeaf, "expected branch node")
common.Assert(!n.isLeaf, "expected branch node")
n = n.childAt(ref.index)
}
_assert(n.isLeaf, "expected leaf node")
common.Assert(n.isLeaf, "expected leaf node")
return n
}
// elemRef represents a reference to an element on a given page/node.
type elemRef struct {
page *page
page *common.Page
node *node
index int
}
@ -408,7 +410,7 @@ func (r *elemRef) isLeaf() bool {
if r.node != nil {
return r.node.isLeaf
}
return (r.page.flags & leafPageFlag) != 0
return (r.page.Flags() & common.LeafPageFlag) != 0
}
// count returns the number of inodes or page elements.
@ -416,5 +418,5 @@ func (r *elemRef) count() int {
if r.node != nil {
return len(r.node.inodes)
}
return int(r.page.count)
return int(r.page.Count())
}

View File

@ -13,6 +13,7 @@ import (
bolt "go.etcd.io/bbolt"
"go.etcd.io/bbolt/internal/btesting"
"go.etcd.io/bbolt/internal/common"
)
// Ensure that a cursor can return a reference to the bucket that created it.
@ -139,7 +140,7 @@ func TestCursor_Delete(t *testing.T) {
}
c.Seek([]byte("sub"))
if err := c.Delete(); err != bolt.ErrIncompatibleValue {
if err := c.Delete(); err != common.ErrIncompatibleValue {
t.Fatalf("unexpected error: %s", err)
}

260
db.go
View File

@ -3,7 +3,6 @@ package bbolt
import (
"errors"
"fmt"
"hash/fnv"
"io"
"os"
"runtime"
@ -11,48 +10,13 @@ import (
"sync"
"time"
"unsafe"
"go.etcd.io/bbolt/internal/common"
)
// The largest step that can be taken when remapping the mmap.
const maxMmapStep = 1 << 30 // 1GB
// The data file format version.
const version = 2
// Represents a marker value to indicate that a file is a Bolt DB.
const magic uint32 = 0xED0CDAED
const pgidNoFreelist pgid = 0xffffffffffffffff
// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
// syncing changes to a file. This is required as some operating systems,
// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
// must be synchronized using the msync(2) syscall.
const IgnoreNoSync = runtime.GOOS == "openbsd"
// Default values if not set in a DB instance.
const (
DefaultMaxBatchSize int = 1000
DefaultMaxBatchDelay = 10 * time.Millisecond
DefaultAllocSize = 16 * 1024 * 1024
)
// default page size for db is set to the OS page size.
var defaultPageSize = os.Getpagesize()
// The time elapsed between consecutive file locking attempts.
const flockRetryTimeout = 50 * time.Millisecond
// FreelistType is the type of the freelist backend
type FreelistType string
const (
// FreelistArrayType indicates backend freelist type is array
FreelistArrayType = FreelistType("array")
// FreelistMapType indicates backend freelist type is hashmap
FreelistMapType = FreelistType("hashmap")
)
// DB represents a collection of buckets persisted to a file on disk.
// All data access is performed through transactions which can be obtained through the DB.
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
@ -85,7 +49,7 @@ type DB struct {
// The alternative one is using hashmap, it is faster in almost all circumstances
// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
// The default type is array
FreelistType FreelistType
FreelistType common.FreelistType
// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
@ -141,8 +105,8 @@ type DB struct {
data *[maxMapSize]byte
datasz int
filesz int // current on disk file size
meta0 *meta
meta1 *meta
meta0 *common.Meta
meta1 *common.Meta
pageSize int
opened bool
rwtx *Tx
@ -206,9 +170,9 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
db.Mlock = options.Mlock
// Set default values for later DB operations.
db.MaxBatchSize = DefaultMaxBatchSize
db.MaxBatchDelay = DefaultMaxBatchDelay
db.AllocSize = DefaultAllocSize
db.MaxBatchSize = common.DefaultMaxBatchSize
db.MaxBatchDelay = common.DefaultMaxBatchDelay
db.AllocSize = common.DefaultAllocSize
flag := os.O_RDWR
if options.ReadOnly {
@ -249,7 +213,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
if db.pageSize = options.PageSize; db.pageSize == 0 {
// Set the default page size to the OS page size.
db.pageSize = defaultPageSize
db.pageSize = common.DefaultPageSize
}
// Initialize the database if it doesn't exist.
@ -269,7 +233,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
db.pageSize = pgSize
} else {
_ = db.close()
return nil, ErrInvalid
return nil, common.ErrInvalid
}
}
@ -347,7 +311,7 @@ func (db *DB) getPageSize() (int, error) {
return db.pageSize, nil
}
return 0, ErrInvalid
return 0, common.ErrInvalid
}
// getPageSizeFromFirstMeta reads the pageSize from the first meta page
@ -356,11 +320,11 @@ func (db *DB) getPageSizeFromFirstMeta() (int, bool, error) {
var metaCanRead bool
if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
metaCanRead = true
if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
return int(m.pageSize), metaCanRead, nil
if m := db.pageInBuffer(buf[:], 0).Meta(); m.Validate() == nil {
return int(m.PageSize()), metaCanRead, nil
}
}
return 0, metaCanRead, ErrInvalid
return 0, metaCanRead, common.ErrInvalid
}
// getPageSizeFromSecondMeta reads the pageSize from the second meta page
@ -392,13 +356,13 @@ func (db *DB) getPageSizeFromSecondMeta() (int, bool, error) {
bw, err := db.file.ReadAt(buf[:], pos)
if (err == nil && bw == len(buf)) || (err == io.EOF && int64(bw) == (fileSize-pos)) {
metaCanRead = true
if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
return int(m.pageSize), metaCanRead, nil
if m := db.pageInBuffer(buf[:], 0).Meta(); m.Validate() == nil {
return int(m.PageSize()), metaCanRead, nil
}
}
}
return 0, metaCanRead, ErrInvalid
return 0, metaCanRead, common.ErrInvalid
}
// loadFreelist reads the freelist if it is synced, or reconstructs it
@ -412,14 +376,14 @@ func (db *DB) loadFreelist() {
db.freelist.readIDs(db.freepages())
} else {
// Read free list from freelist page.
db.freelist.read(db.page(db.meta().freelist))
db.freelist.read(db.page(db.meta().Freelist()))
}
db.stats.FreePageN = db.freelist.free_count()
})
}
func (db *DB) hasSyncedFreelist() bool {
return db.meta().freelist != pgidNoFreelist
return db.meta().Freelist() != common.PgidNoFreelist
}
// mmap opens the underlying memory-mapped file and initializes the meta references.
@ -478,14 +442,14 @@ func (db *DB) mmap(minsz int) error {
}
// Save references to the meta pages.
db.meta0 = db.page(0).meta()
db.meta1 = db.page(1).meta()
db.meta0 = db.page(0).Meta()
db.meta1 = db.page(1).Meta()
// Validate the meta pages. We only return an error if both meta pages fail
// validation, since meta0 failing validation means that it wasn't saved
// properly -- but we can recover using meta1. And vice-versa.
err0 := db.meta0.validate()
err1 := db.meta1.validate()
err0 := db.meta0.Validate()
err1 := db.meta1.Validate()
if err0 != nil && err1 != nil {
return err0
}
@ -533,8 +497,8 @@ func (db *DB) mmapSize(size int) (int, error) {
// If larger than 1GB then grow by 1GB at a time.
sz := int64(size)
if remainder := sz % int64(maxMmapStep); remainder > 0 {
sz += int64(maxMmapStep) - remainder
if remainder := sz % int64(common.MaxMmapStep); remainder > 0 {
sz += int64(common.MaxMmapStep) - remainder
}
// Ensure that the mmap size is a multiple of the page size.
@ -581,33 +545,33 @@ func (db *DB) init() error {
// Create two meta pages on a buffer.
buf := make([]byte, db.pageSize*4)
for i := 0; i < 2; i++ {
p := db.pageInBuffer(buf, pgid(i))
p.id = pgid(i)
p.flags = metaPageFlag
p := db.pageInBuffer(buf, common.Pgid(i))
p.SetId(common.Pgid(i))
p.SetFlags(common.MetaPageFlag)
// Initialize the meta page.
m := p.meta()
m.magic = magic
m.version = version
m.pageSize = uint32(db.pageSize)
m.freelist = 2
m.root = bucket{root: 3}
m.pgid = 4
m.txid = txid(i)
m.checksum = m.sum64()
m := p.Meta()
m.SetMagic(common.Magic)
m.SetVersion(common.Version)
m.SetPageSize(uint32(db.pageSize))
m.SetFreelist(2)
m.SetRootBucket(common.NewInBucket(3, 0))
m.SetPgid(4)
m.SetTxid(common.Txid(i))
m.SetChecksum(m.Sum64())
}
// Write an empty freelist at page 3.
p := db.pageInBuffer(buf, pgid(2))
p.id = pgid(2)
p.flags = freelistPageFlag
p.count = 0
p := db.pageInBuffer(buf, common.Pgid(2))
p.SetId(2)
p.SetFlags(common.FreelistPageFlag)
p.SetCount(0)
// Write an empty leaf page at page 4.
p = db.pageInBuffer(buf, pgid(3))
p.id = pgid(3)
p.flags = leafPageFlag
p.count = 0
p = db.pageInBuffer(buf, common.Pgid(3))
p.SetId(3)
p.SetFlags(common.LeafPageFlag)
p.SetCount(0)
// Write the buffer to our data file.
if _, err := db.ops.writeAt(buf, 0); err != nil {
@ -719,14 +683,14 @@ func (db *DB) beginTx() (*Tx, error) {
if !db.opened {
db.mmaplock.RUnlock()
db.metalock.Unlock()
return nil, ErrDatabaseNotOpen
return nil, common.ErrDatabaseNotOpen
}
// Exit if the database is not correctly mapped.
if db.data == nil {
db.mmaplock.RUnlock()
db.metalock.Unlock()
return nil, ErrInvalidMapping
return nil, common.ErrInvalidMapping
}
// Create a transaction associated with the database.
@ -752,7 +716,7 @@ func (db *DB) beginTx() (*Tx, error) {
func (db *DB) beginRWTx() (*Tx, error) {
// If the database was opened with Options.ReadOnly, return an error.
if db.readOnly {
return nil, ErrDatabaseReadOnly
return nil, common.ErrDatabaseReadOnly
}
// Obtain writer lock. This is released by the transaction when it closes.
@ -767,13 +731,13 @@ func (db *DB) beginRWTx() (*Tx, error) {
// Exit if the database is not open yet.
if !db.opened {
db.rwlock.Unlock()
return nil, ErrDatabaseNotOpen
return nil, common.ErrDatabaseNotOpen
}
// Exit if the database is not correctly mapped.
if db.data == nil {
db.rwlock.Unlock()
return nil, ErrInvalidMapping
return nil, common.ErrInvalidMapping
}
// Create a transaction associated with the database.
@ -788,19 +752,19 @@ func (db *DB) beginRWTx() (*Tx, error) {
func (db *DB) freePages() {
// Free all pending pages prior to earliest open transaction.
sort.Sort(txsById(db.txs))
minid := txid(0xFFFFFFFFFFFFFFFF)
minid := common.Txid(0xFFFFFFFFFFFFFFFF)
if len(db.txs) > 0 {
minid = db.txs[0].meta.txid
minid = db.txs[0].meta.Txid()
}
if minid > 0 {
db.freelist.release(minid - 1)
}
// Release unused txid extents.
for _, t := range db.txs {
db.freelist.releaseRange(minid, t.meta.txid-1)
minid = t.meta.txid + 1
db.freelist.releaseRange(minid, t.meta.Txid()-1)
minid = t.meta.Txid() + 1
}
db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
db.freelist.releaseRange(minid, common.Txid(0xFFFFFFFFFFFFFFFF))
// Any page both allocated and freed in an extent is safe to release.
}
@ -808,7 +772,7 @@ type txsById []*Tx
func (t txsById) Len() int { return len(t) }
func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
func (t txsById) Less(i, j int) bool { return t[i].meta.Txid() < t[j].meta.Txid() }
// removeTx removes a transaction from the database.
func (db *DB) removeTx(tx *Tx) {
@ -1050,37 +1014,37 @@ func (db *DB) Stats() Stats {
// This is for internal access to the raw data bytes from the C cursor, use
// carefully, or not at all.
func (db *DB) Info() *Info {
_assert(db.data != nil, "database file isn't correctly mapped")
common.Assert(db.data != nil, "database file isn't correctly mapped")
return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
}
// page retrieves a page reference from the mmap based on the current page size.
func (db *DB) page(id pgid) *page {
pos := id * pgid(db.pageSize)
return (*page)(unsafe.Pointer(&db.data[pos]))
func (db *DB) page(id common.Pgid) *common.Page {
pos := id * common.Pgid(db.pageSize)
return (*common.Page)(unsafe.Pointer(&db.data[pos]))
}
// pageInBuffer retrieves a page reference from a given byte array based on the current page size.
func (db *DB) pageInBuffer(b []byte, id pgid) *page {
return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
func (db *DB) pageInBuffer(b []byte, id common.Pgid) *common.Page {
return (*common.Page)(unsafe.Pointer(&b[id*common.Pgid(db.pageSize)]))
}
// meta retrieves the current meta page reference.
func (db *DB) meta() *meta {
func (db *DB) meta() *common.Meta {
// We have to return the meta with the highest txid which doesn't fail
// validation. Otherwise, we can cause errors when in fact the database is
// in a consistent state. metaA is the one with the higher txid.
metaA := db.meta0
metaB := db.meta1
if db.meta1.txid > db.meta0.txid {
if db.meta1.Txid() > db.meta0.Txid() {
metaA = db.meta1
metaB = db.meta0
}
// Use higher meta page if valid. Otherwise, fallback to previous, if valid.
if err := metaA.validate(); err == nil {
if err := metaA.Validate(); err == nil {
return metaA
} else if err := metaB.validate(); err == nil {
} else if err := metaB.Validate(); err == nil {
return metaB
}
@ -1090,7 +1054,7 @@ func (db *DB) meta() *meta {
}
// allocate returns a contiguous block of memory starting at a given page.
func (db *DB) allocate(txid txid, count int) (*page, error) {
func (db *DB) allocate(txid common.Txid, count int) (*common.Page, error) {
// Allocate a temporary buffer for the page.
var buf []byte
if count == 1 {
@ -1098,17 +1062,18 @@ func (db *DB) allocate(txid txid, count int) (*page, error) {
} else {
buf = make([]byte, count*db.pageSize)
}
p := (*page)(unsafe.Pointer(&buf[0]))
p.overflow = uint32(count - 1)
p := (*common.Page)(unsafe.Pointer(&buf[0]))
p.SetOverflow(uint32(count - 1))
// Use pages from the freelist if they are available.
if p.id = db.freelist.allocate(txid, count); p.id != 0 {
p.SetId(db.freelist.allocate(txid, count))
if p.Id() != 0 {
return p, nil
}
// Resize mmap() if we're at the end.
p.id = db.rwtx.meta.pgid
var minsz = int((p.id+pgid(count))+1) * db.pageSize
p.SetId(db.rwtx.meta.Pgid())
var minsz = int((p.Id()+common.Pgid(count))+1) * db.pageSize
if minsz >= db.datasz {
if err := db.mmap(minsz); err != nil {
return nil, fmt.Errorf("mmap allocate error: %s", err)
@ -1116,7 +1081,8 @@ func (db *DB) allocate(txid txid, count int) (*page, error) {
}
// Move the page id high water mark.
db.rwtx.meta.pgid += pgid(count)
curPgid := db.rwtx.meta.Pgid()
db.rwtx.meta.SetPgid(curPgid + common.Pgid(count))
return p, nil
}
@ -1163,7 +1129,7 @@ func (db *DB) IsReadOnly() bool {
return db.readOnly
}
func (db *DB) freepages() []pgid {
func (db *DB) freepages() []common.Pgid {
tx, err := db.beginTx()
defer func() {
err = tx.Rollback()
@ -1175,8 +1141,8 @@ func (db *DB) freepages() []pgid {
panic("freepages: failed to open read only tx")
}
reachable := make(map[pgid]*page)
nofreed := make(map[pgid]bool)
reachable := make(map[common.Pgid]*common.Page)
nofreed := make(map[common.Pgid]bool)
ech := make(chan error)
go func() {
for e := range ech {
@ -1188,8 +1154,8 @@ func (db *DB) freepages() []pgid {
// TODO: If check bucket reported any corruptions (ech) we shouldn't proceed to freeing the pages.
var fids []pgid
for i := pgid(2); i < db.meta().pgid; i++ {
var fids []common.Pgid
for i := common.Pgid(2); i < db.meta().Pgid(); i++ {
if _, ok := reachable[i]; !ok {
fids = append(fids, i)
}
@ -1221,7 +1187,7 @@ type Options struct {
// The alternative one is using hashmap, it is faster in almost all circumstances
// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
// The default type is array
FreelistType FreelistType
FreelistType common.FreelistType
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
@ -1263,7 +1229,7 @@ type Options struct {
var DefaultOptions = &Options{
Timeout: 0,
NoGrowSync: false,
FreelistType: FreelistArrayType,
FreelistType: common.FreelistArrayType,
}
// Stats represents statistics about the database.
@ -1302,65 +1268,3 @@ type Info struct {
Data uintptr
PageSize int
}
type meta struct {
magic uint32
version uint32
pageSize uint32
flags uint32
root bucket
freelist pgid
pgid pgid
txid txid
checksum uint64
}
// validate checks the marker bytes and version of the meta page to ensure it matches this binary.
func (m *meta) validate() error {
if m.magic != magic {
return ErrInvalid
} else if m.version != version {
return ErrVersionMismatch
} else if m.checksum != m.sum64() {
return ErrChecksum
}
return nil
}
// copy copies one meta object to another.
func (m *meta) copy(dest *meta) {
*dest = *m
}
// write writes the meta onto a page.
func (m *meta) write(p *page) {
if m.root.root >= m.pgid {
panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
} else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
// TODO: reject pgidNoFreeList if !NoFreelistSync
panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
}
// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
p.id = pgid(m.txid % 2)
p.flags |= metaPageFlag
// Calculate the checksum.
m.checksum = m.sum64()
m.copy(p.meta())
}
// generates the checksum for the meta.
func (m *meta) sum64() uint64 {
var h = fnv.New64a()
_, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
return h.Sum64()
}
// _assert will panic with a given formatted message if the given condition is false.
func _assert(condition bool, msg string, v ...interface{}) {
if !condition {
panic(fmt.Sprintf("assertion failed: "+msg, v...))
}
}

View File

@ -21,6 +21,7 @@ import (
bolt "go.etcd.io/bbolt"
"go.etcd.io/bbolt/internal/btesting"
"go.etcd.io/bbolt/internal/common"
)
// pageSize is the size of one page in the data file.
@ -136,7 +137,7 @@ func TestOpen_ErrInvalid(t *testing.T) {
t.Fatal(err)
}
if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrInvalid {
if _, err := bolt.Open(path, 0666, nil); err != common.ErrInvalid {
t.Fatalf("unexpected error: %s", err)
}
}
@ -172,7 +173,7 @@ func TestOpen_ErrVersionMismatch(t *testing.T) {
}
// Reopen data file.
if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrVersionMismatch {
if _, err := bolt.Open(path, 0666, nil); err != common.ErrVersionMismatch {
t.Fatalf("unexpected error: %s", err)
}
}
@ -208,7 +209,7 @@ func TestOpen_ErrChecksum(t *testing.T) {
}
// Reopen data file.
if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrChecksum {
if _, err := bolt.Open(path, 0666, nil); err != common.ErrChecksum {
t.Fatalf("unexpected error: %s", err)
}
}
@ -552,7 +553,7 @@ func TestDB_Open_ReadOnly(t *testing.T) {
}
// Can't launch read-write transaction.
if _, err := readOnlyDB.Begin(true); err != bolt.ErrDatabaseReadOnly {
if _, err := readOnlyDB.Begin(true); err != common.ErrDatabaseReadOnly {
t.Fatalf("unexpected error: %s", err)
}
@ -641,7 +642,7 @@ func TestOpen_RecoverFreeList(t *testing.T) {
// Ensure that a database cannot open a transaction when it's not open.
func TestDB_Begin_ErrDatabaseNotOpen(t *testing.T) {
var db bolt.DB
if _, err := db.Begin(false); err != bolt.ErrDatabaseNotOpen {
if _, err := db.Begin(false); err != common.ErrDatabaseNotOpen {
t.Fatalf("unexpected error: %s", err)
}
}
@ -727,7 +728,7 @@ func TestDB_Concurrent_WriteTo(t *testing.T) {
// Ensure that opening a transaction while the DB is closed returns an error.
func TestDB_BeginRW_Closed(t *testing.T) {
var db bolt.DB
if _, err := db.Begin(true); err != bolt.ErrDatabaseNotOpen {
if _, err := db.Begin(true); err != common.ErrDatabaseNotOpen {
t.Fatalf("unexpected error: %s", err)
}
}
@ -828,7 +829,7 @@ func TestDB_Update_Closed(t *testing.T) {
t.Fatal(err)
}
return nil
}); err != bolt.ErrDatabaseNotOpen {
}); err != common.ErrDatabaseNotOpen {
t.Fatalf("unexpected error: %s", err)
}
}

View File

@ -6,6 +6,8 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.etcd.io/bbolt/internal/common"
)
func TestOpenWithPreLoadFreelist(t *testing.T) {
@ -76,7 +78,7 @@ func TestMethodPage(t *testing.T) {
name: "readonly mode without preloading free pages",
readonly: true,
preLoadFreePage: false,
expectedError: ErrFreePagesNotLoaded,
expectedError: common.ErrFreePagesNotLoaded,
},
}

View File

@ -1,78 +0,0 @@
package bbolt
import "errors"
// These errors can be returned when opening or calling methods on a DB.
var (
// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
// is opened or after it is closed.
ErrDatabaseNotOpen = errors.New("database not open")
// ErrDatabaseOpen is returned when opening a database that is
// already open.
ErrDatabaseOpen = errors.New("database already open")
// ErrInvalid is returned when both meta pages on a database are invalid.
// This typically occurs when a file is not a bolt database.
ErrInvalid = errors.New("invalid database")
// ErrInvalidMapping is returned when the database file fails to get mapped.
ErrInvalidMapping = errors.New("database isn't correctly mapped")
// ErrVersionMismatch is returned when the data file was created with a
// different version of Bolt.
ErrVersionMismatch = errors.New("version mismatch")
// ErrChecksum is returned when either meta page checksum does not match.
ErrChecksum = errors.New("checksum error")
// ErrTimeout is returned when a database cannot obtain an exclusive lock
// on the data file after the timeout passed to Open().
ErrTimeout = errors.New("timeout")
)
// These errors can occur when beginning or committing a Tx.
var (
// ErrTxNotWritable is returned when performing a write operation on a
// read-only transaction.
ErrTxNotWritable = errors.New("tx not writable")
// ErrTxClosed is returned when committing or rolling back a transaction
// that has already been committed or rolled back.
ErrTxClosed = errors.New("tx closed")
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
// read-only database.
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
// ErrFreePagesNotLoaded is returned when a readonly transaction without
// preloading the free pages is trying to access the free pages.
ErrFreePagesNotLoaded = errors.New("free pages are not pre-loaded")
)
// These errors can occur when putting or deleting a value or a bucket.
var (
// ErrBucketNotFound is returned when trying to access a bucket that has
// not been created yet.
ErrBucketNotFound = errors.New("bucket not found")
// ErrBucketExists is returned when creating a bucket that already exists.
ErrBucketExists = errors.New("bucket already exists")
// ErrBucketNameRequired is returned when creating a bucket with a blank name.
ErrBucketNameRequired = errors.New("bucket name required")
// ErrKeyRequired is returned when inserting a zero-length key.
ErrKeyRequired = errors.New("key required")
// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
ErrKeyTooLarge = errors.New("key too large")
// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
ErrValueTooLarge = errors.New("value too large")
// ErrIncompatibleValue is returned when trying create or delete a bucket
// on an existing non-bucket key or when trying to create or delete a
// non-bucket key on an existing bucket key.
ErrIncompatibleValue = errors.New("incompatible value")
)

View File

@ -4,50 +4,52 @@ import (
"fmt"
"sort"
"unsafe"
"go.etcd.io/bbolt/internal/common"
)
// txPending holds a list of pgids and corresponding allocation txns
// that are pending to be freed.
type txPending struct {
ids []pgid
alloctx []txid // txids allocating the ids
lastReleaseBegin txid // beginning txid of last matching releaseRange
ids []common.Pgid
alloctx []common.Txid // txids allocating the ids
lastReleaseBegin common.Txid // beginning txid of last matching releaseRange
}
// pidSet holds the set of starting pgids which have the same span size
type pidSet map[pgid]struct{}
type pidSet map[common.Pgid]struct{}
// freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct {
freelistType FreelistType // freelist type
ids []pgid // all free and available free page ids.
allocs map[pgid]txid // mapping of txid that allocated a pgid.
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[pgid]struct{} // fast lookup of all free and pending page ids.
freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
forwardMap map[pgid]uint64 // key is start pgid, value is its span size
backwardMap map[pgid]uint64 // key is end pgid, value is its span size
allocate func(txid txid, n int) pgid // the freelist allocate func
free_count func() int // the function which gives you free page number
mergeSpans func(ids pgids) // the mergeSpan func
getFreePageIDs func() []pgid // get free pgids func
readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist
freelistType common.FreelistType // freelist type
ids []common.Pgid // all free and available free page ids.
allocs map[common.Pgid]common.Txid // mapping of Txid that allocated a pgid.
pending map[common.Txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[common.Pgid]struct{} // fast lookup of all free and pending page ids.
freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
forwardMap map[common.Pgid]uint64 // key is start pgid, value is its span size
backwardMap map[common.Pgid]uint64 // key is end pgid, value is its span size
allocate func(txid common.Txid, n int) common.Pgid // the freelist allocate func
free_count func() int // the function which gives you free page number
mergeSpans func(ids common.Pgids) // the mergeSpan func
getFreePageIDs func() []common.Pgid // get free pgids func
readIDs func(pgids []common.Pgid) // readIDs func reads list of pages and init the freelist
}
// newFreelist returns an empty, initialized freelist.
func newFreelist(freelistType FreelistType) *freelist {
func newFreelist(freelistType common.FreelistType) *freelist {
f := &freelist{
freelistType: freelistType,
allocs: make(map[pgid]txid),
pending: make(map[txid]*txPending),
cache: make(map[pgid]struct{}),
allocs: make(map[common.Pgid]common.Txid),
pending: make(map[common.Txid]*txPending),
cache: make(map[common.Pgid]struct{}),
freemaps: make(map[uint64]pidSet),
forwardMap: make(map[pgid]uint64),
backwardMap: make(map[pgid]uint64),
forwardMap: make(map[common.Pgid]uint64),
backwardMap: make(map[common.Pgid]uint64),
}
if freelistType == FreelistMapType {
if freelistType == common.FreelistMapType {
f.allocate = f.hashmapAllocate
f.free_count = f.hashmapFreeCount
f.mergeSpans = f.hashmapMergeSpans
@ -71,7 +73,7 @@ func (f *freelist) size() int {
// The first element will be used to store the count. See freelist.write.
n++
}
return int(pageHeaderSize) + (int(unsafe.Sizeof(pgid(0))) * n)
return int(common.PageHeaderSize) + (int(unsafe.Sizeof(common.Pgid(0))) * n)
}
// count returns count of pages on the freelist
@ -95,23 +97,23 @@ func (f *freelist) pending_count() int {
// copyall copies a list of all free ids and all pending ids in one sorted list.
// f.count returns the minimum length required for dst.
func (f *freelist) copyall(dst []pgid) {
m := make(pgids, 0, f.pending_count())
func (f *freelist) copyall(dst []common.Pgid) {
m := make(common.Pgids, 0, f.pending_count())
for _, txp := range f.pending {
m = append(m, txp.ids...)
}
sort.Sort(m)
mergepgids(dst, f.getFreePageIDs(), m)
common.Mergepgids(dst, f.getFreePageIDs(), m)
}
// arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
// If a contiguous block cannot be found then 0 is returned.
func (f *freelist) arrayAllocate(txid txid, n int) pgid {
func (f *freelist) arrayAllocate(txid common.Txid, n int) common.Pgid {
if len(f.ids) == 0 {
return 0
}
var initial, previd pgid
var initial, previd common.Pgid
for i, id := range f.ids {
if id <= 1 {
panic(fmt.Sprintf("invalid page allocation: %d", id))
@ -123,7 +125,7 @@ func (f *freelist) arrayAllocate(txid txid, n int) pgid {
}
// If we found a contiguous block then remove it and return it.
if (id-initial)+1 == pgid(n) {
if (id-initial)+1 == common.Pgid(n) {
// If we're allocating off the beginning then take the fast path
// and just adjust the existing slice. This will use extra memory
// temporarily but the append() in free() will realloc the slice
@ -136,7 +138,7 @@ func (f *freelist) arrayAllocate(txid txid, n int) pgid {
}
// Remove from the free cache.
for i := pgid(0); i < pgid(n); i++ {
for i := common.Pgid(0); i < common.Pgid(n); i++ {
delete(f.cache, initial+i)
}
f.allocs[initial] = txid
@ -150,9 +152,9 @@ func (f *freelist) arrayAllocate(txid txid, n int) pgid {
// free releases a page and its overflow for a given transaction id.
// If the page is already free then a panic will occur.
func (f *freelist) free(txid txid, p *page) {
if p.id <= 1 {
panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
func (f *freelist) free(txid common.Txid, p *common.Page) {
if p.Id() <= 1 {
panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.Id()))
}
// Free page and all its overflow pages.
@ -161,15 +163,15 @@ func (f *freelist) free(txid txid, p *page) {
txp = &txPending{}
f.pending[txid] = txp
}
allocTxid, ok := f.allocs[p.id]
allocTxid, ok := f.allocs[p.Id()]
if ok {
delete(f.allocs, p.id)
} else if (p.flags & freelistPageFlag) != 0 {
delete(f.allocs, p.Id())
} else if (p.Flags() & common.FreelistPageFlag) != 0 {
// Freelist is always allocated by prior tx.
allocTxid = txid - 1
}
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
for id := p.Id(); id <= p.Id()+common.Pgid(p.Overflow()); id++ {
// Verify that page is not already free.
if _, ok := f.cache[id]; ok {
panic(fmt.Sprintf("page %d already freed", id))
@ -182,8 +184,8 @@ func (f *freelist) free(txid txid, p *page) {
}
// release moves all page ids for a transaction id (or older) to the freelist.
func (f *freelist) release(txid txid) {
m := make(pgids, 0)
func (f *freelist) release(txid common.Txid) {
m := make(common.Pgids, 0)
for tid, txp := range f.pending {
if tid <= txid {
// Move transaction's pending pages to the available freelist.
@ -196,11 +198,11 @@ func (f *freelist) release(txid txid) {
}
// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
func (f *freelist) releaseRange(begin, end txid) {
func (f *freelist) releaseRange(begin, end common.Txid) {
if begin > end {
return
}
var m pgids
var m common.Pgids
for tid, txp := range f.pending {
if tid < begin || tid > end {
continue
@ -229,13 +231,13 @@ func (f *freelist) releaseRange(begin, end txid) {
}
// rollback removes the pages from a given pending tx.
func (f *freelist) rollback(txid txid) {
func (f *freelist) rollback(txid common.Txid) {
// Remove page ids from cache.
txp := f.pending[txid]
if txp == nil {
return
}
var m pgids
var m common.Pgids
for i, pgid := range txp.ids {
delete(f.cache, pgid)
tx := txp.alloctx[i]
@ -256,82 +258,69 @@ func (f *freelist) rollback(txid txid) {
}
// freed returns whether a given page is in the free list.
func (f *freelist) freed(pgId pgid) bool {
func (f *freelist) freed(pgId common.Pgid) bool {
_, ok := f.cache[pgId]
return ok
}
// read initializes the freelist from a freelist page.
func (f *freelist) read(p *page) {
if (p.flags & freelistPageFlag) == 0 {
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
}
// If the page.count is at the max uint16 value (64k) then it's considered
// an overflow and the size of the freelist is stored as the first element.
var idx, count = 0, int(p.count)
if count == 0xFFFF {
idx = 1
c := *(*pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
count = int(c)
if count < 0 {
panic(fmt.Sprintf("leading element count %d overflows int", c))
}
func (f *freelist) read(p *common.Page) {
if (p.Flags() & common.FreelistPageFlag) == 0 {
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.Id(), p.Typ()))
}
ids := p.FreelistPageIds()
// Copy the list of page ids from the freelist.
if count == 0 {
if len(ids) == 0 {
f.ids = nil
} else {
var ids []pgid
data := unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), unsafe.Sizeof(ids[0]), idx)
unsafeSlice(unsafe.Pointer(&ids), data, count)
// copy the ids, so we don't modify on the freelist page directly
idsCopy := make([]pgid, count)
idsCopy := make([]common.Pgid, len(ids))
copy(idsCopy, ids)
// Make sure they're sorted.
sort.Sort(pgids(idsCopy))
sort.Sort(common.Pgids(idsCopy))
f.readIDs(idsCopy)
}
}
// arrayReadIDs initializes the freelist from a given list of ids.
func (f *freelist) arrayReadIDs(ids []pgid) {
func (f *freelist) arrayReadIDs(ids []common.Pgid) {
f.ids = ids
f.reindex()
}
func (f *freelist) arrayGetFreePageIDs() []pgid {
func (f *freelist) arrayGetFreePageIDs() []common.Pgid {
return f.ids
}
// write writes the page ids onto a freelist page. All free and pending ids are
// saved to disk since in the event of a program crash, all pending ids will
// become free.
func (f *freelist) write(p *page) error {
func (f *freelist) write(p *common.Page) error {
// Combine the old free pgids and pgids waiting on an open transaction.
// Update the header flag.
p.flags |= freelistPageFlag
p.FlagsXOR(common.FreelistPageFlag)
// The page.count can only hold up to 64k elements so if we overflow that
// number then we handle it by putting the size in the first element.
l := f.count()
if l == 0 {
p.count = uint16(l)
p.SetCount(uint16(l))
} else if l < 0xFFFF {
p.count = uint16(l)
var ids []pgid
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
unsafeSlice(unsafe.Pointer(&ids), data, l)
p.SetCount(uint16(l))
var ids []common.Pgid
data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
common.UnsafeSlice(unsafe.Pointer(&ids), data, l)
f.copyall(ids)
} else {
p.count = 0xFFFF
var ids []pgid
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
unsafeSlice(unsafe.Pointer(&ids), data, l+1)
ids[0] = pgid(l)
p.SetCount(0xFFFF)
var ids []common.Pgid
data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
common.UnsafeSlice(unsafe.Pointer(&ids), data, l+1)
ids[0] = common.Pgid(l)
f.copyall(ids[1:])
}
@ -339,11 +328,11 @@ func (f *freelist) write(p *page) error {
}
// reload reads the freelist from a page and filters out pending items.
func (f *freelist) reload(p *page) {
func (f *freelist) reload(p *common.Page) {
f.read(p)
// Build a cache of only pending pages.
pcache := make(map[pgid]bool)
pcache := make(map[common.Pgid]bool)
for _, txp := range f.pending {
for _, pendingID := range txp.ids {
pcache[pendingID] = true
@ -352,7 +341,7 @@ func (f *freelist) reload(p *page) {
// Check each page in the freelist and build a new available freelist
// with any pages not in the pending lists.
var a []pgid
var a []common.Pgid
for _, id := range f.getFreePageIDs() {
if !pcache[id] {
a = append(a, id)
@ -362,10 +351,10 @@ func (f *freelist) reload(p *page) {
f.readIDs(a)
}
// noSyncReload reads the freelist from pgids and filters out pending items.
func (f *freelist) noSyncReload(pgids []pgid) {
// noSyncReload reads the freelist from Pgids and filters out pending items.
func (f *freelist) noSyncReload(Pgids []common.Pgid) {
// Build a cache of only pending pages.
pcache := make(map[pgid]bool)
pcache := make(map[common.Pgid]bool)
for _, txp := range f.pending {
for _, pendingID := range txp.ids {
pcache[pendingID] = true
@ -374,8 +363,8 @@ func (f *freelist) noSyncReload(pgids []pgid) {
// Check each page in the freelist and build a new available freelist
// with any pages not in the pending lists.
var a []pgid
for _, id := range pgids {
var a []common.Pgid
for _, id := range Pgids {
if !pcache[id] {
a = append(a, id)
}
@ -387,7 +376,7 @@ func (f *freelist) noSyncReload(pgids []pgid) {
// reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() {
ids := f.getFreePageIDs()
f.cache = make(map[pgid]struct{}, len(ids))
f.cache = make(map[common.Pgid]struct{}, len(ids))
for _, id := range ids {
f.cache[id] = struct{}{}
}
@ -399,7 +388,7 @@ func (f *freelist) reindex() {
}
// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
func (f *freelist) arrayMergeSpans(ids pgids) {
func (f *freelist) arrayMergeSpans(ids common.Pgids) {
sort.Sort(ids)
f.ids = pgids(f.ids).merge(ids)
f.ids = common.Pgids(f.ids).Merge(ids)
}

View File

@ -1,6 +1,10 @@
package bbolt
import "sort"
import (
"sort"
"go.etcd.io/bbolt/internal/common"
)
// hashmapFreeCount returns count of free pages(hashmap version)
func (f *freelist) hashmapFreeCount() int {
@ -13,7 +17,7 @@ func (f *freelist) hashmapFreeCount() int {
}
// hashmapAllocate serves the same purpose as arrayAllocate, but use hashmap as backend
func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
func (f *freelist) hashmapAllocate(txid common.Txid, n int) common.Pgid {
if n == 0 {
return 0
}
@ -26,7 +30,7 @@ func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
f.allocs[pid] = txid
for i := pgid(0); i < pgid(n); i++ {
for i := common.Pgid(0); i < common.Pgid(n); i++ {
delete(f.cache, pid+i)
}
return pid
@ -48,9 +52,9 @@ func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
remain := size - uint64(n)
// add remain span
f.addSpan(pid+pgid(n), remain)
f.addSpan(pid+common.Pgid(n), remain)
for i := pgid(0); i < pgid(n); i++ {
for i := common.Pgid(0); i < common.Pgid(n); i++ {
delete(f.cache, pid+i)
}
return pid
@ -61,7 +65,7 @@ func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
}
// hashmapReadIDs reads pgids as input an initial the freelist(hashmap version)
func (f *freelist) hashmapReadIDs(pgids []pgid) {
func (f *freelist) hashmapReadIDs(pgids []common.Pgid) {
f.init(pgids)
// Rebuild the page cache.
@ -69,25 +73,25 @@ func (f *freelist) hashmapReadIDs(pgids []pgid) {
}
// hashmapGetFreePageIDs returns the sorted free page ids
func (f *freelist) hashmapGetFreePageIDs() []pgid {
func (f *freelist) hashmapGetFreePageIDs() []common.Pgid {
count := f.free_count()
if count == 0 {
return nil
}
m := make([]pgid, 0, count)
m := make([]common.Pgid, 0, count)
for start, size := range f.forwardMap {
for i := 0; i < int(size); i++ {
m = append(m, start+pgid(i))
m = append(m, start+common.Pgid(i))
}
}
sort.Sort(pgids(m))
sort.Sort(common.Pgids(m))
return m
}
// hashmapMergeSpans try to merge list of pages(represented by pgids) with existing spans
func (f *freelist) hashmapMergeSpans(ids pgids) {
func (f *freelist) hashmapMergeSpans(ids common.Pgids) {
for _, id := range ids {
// try to see if we can merge and update
f.mergeWithExistingSpan(id)
@ -95,7 +99,7 @@ func (f *freelist) hashmapMergeSpans(ids pgids) {
}
// mergeWithExistingSpan merges pid to the existing free spans, try to merge it backward and forward
func (f *freelist) mergeWithExistingSpan(pid pgid) {
func (f *freelist) mergeWithExistingSpan(pid common.Pgid) {
prev := pid - 1
next := pid + 1
@ -106,10 +110,10 @@ func (f *freelist) mergeWithExistingSpan(pid pgid) {
if mergeWithPrev {
//merge with previous span
start := prev + 1 - pgid(preSize)
start := prev + 1 - common.Pgid(preSize)
f.delSpan(start, preSize)
newStart -= pgid(preSize)
newStart -= common.Pgid(preSize)
newSize += preSize
}
@ -122,19 +126,19 @@ func (f *freelist) mergeWithExistingSpan(pid pgid) {
f.addSpan(newStart, newSize)
}
func (f *freelist) addSpan(start pgid, size uint64) {
f.backwardMap[start-1+pgid(size)] = size
func (f *freelist) addSpan(start common.Pgid, size uint64) {
f.backwardMap[start-1+common.Pgid(size)] = size
f.forwardMap[start] = size
if _, ok := f.freemaps[size]; !ok {
f.freemaps[size] = make(map[pgid]struct{})
f.freemaps[size] = make(map[common.Pgid]struct{})
}
f.freemaps[size][start] = struct{}{}
}
func (f *freelist) delSpan(start pgid, size uint64) {
func (f *freelist) delSpan(start common.Pgid, size uint64) {
delete(f.forwardMap, start)
delete(f.backwardMap, start+pgid(size-1))
delete(f.backwardMap, start+common.Pgid(size-1))
delete(f.freemaps[size], start)
if len(f.freemaps[size]) == 0 {
delete(f.freemaps, size)
@ -143,7 +147,7 @@ func (f *freelist) delSpan(start pgid, size uint64) {
// initial from pgids using when use hashmap version
// pgids must be sorted
func (f *freelist) init(pgids []pgid) {
func (f *freelist) init(pgids []common.Pgid) {
if len(pgids) == 0 {
return
}
@ -151,13 +155,13 @@ func (f *freelist) init(pgids []pgid) {
size := uint64(1)
start := pgids[0]
if !sort.SliceIsSorted([]pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) {
if !sort.SliceIsSorted([]common.Pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) {
panic("pgids not sorted")
}
f.freemaps = make(map[uint64]pidSet)
f.forwardMap = make(map[pgid]uint64)
f.backwardMap = make(map[pgid]uint64)
f.forwardMap = make(map[common.Pgid]uint64)
f.backwardMap = make(map[common.Pgid]uint64)
for i := 1; i < len(pgids); i++ {
// continuous page

View File

@ -7,6 +7,8 @@ import (
"sort"
"testing"
"unsafe"
"go.etcd.io/bbolt/internal/common"
)
// TestFreelistType is used as a env variable for test to indicate the backend type
@ -15,17 +17,17 @@ const TestFreelistType = "TEST_FREELIST_TYPE"
// Ensure that a page is added to a transaction's freelist.
func TestFreelist_free(t *testing.T) {
f := newTestFreelist()
f.free(100, &page{id: 12})
if !reflect.DeepEqual([]pgid{12}, f.pending[100].ids) {
t.Fatalf("exp=%v; got=%v", []pgid{12}, f.pending[100].ids)
f.free(100, common.NewPage(12, 0, 0, 0))
if !reflect.DeepEqual([]common.Pgid{12}, f.pending[100].ids) {
t.Fatalf("exp=%v; got=%v", []common.Pgid{12}, f.pending[100].ids)
}
}
// Ensure that a page and its overflow is added to a transaction's freelist.
func TestFreelist_free_overflow(t *testing.T) {
f := newTestFreelist()
f.free(100, &page{id: 12, overflow: 3})
if exp := []pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100].ids) {
f.free(100, common.NewPage(12, 0, 0, 3))
if exp := []common.Pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100].ids) {
t.Fatalf("exp=%v; got=%v", exp, f.pending[100].ids)
}
}
@ -33,17 +35,17 @@ func TestFreelist_free_overflow(t *testing.T) {
// Ensure that a transaction's free pages can be released.
func TestFreelist_release(t *testing.T) {
f := newTestFreelist()
f.free(100, &page{id: 12, overflow: 1})
f.free(100, &page{id: 9})
f.free(102, &page{id: 39})
f.free(100, common.NewPage(12, 0, 0, 1))
f.free(100, common.NewPage(9, 0, 0, 0))
f.free(102, common.NewPage(39, 0, 0, 0))
f.release(100)
f.release(101)
if exp := []pgid{9, 12, 13}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
if exp := []common.Pgid{9, 12, 13}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs())
}
f.release(102)
if exp := []pgid{9, 12, 13, 39}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
if exp := []common.Pgid{9, 12, 13, 39}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs())
}
}
@ -51,33 +53,33 @@ func TestFreelist_release(t *testing.T) {
// Ensure that releaseRange handles boundary conditions correctly
func TestFreelist_releaseRange(t *testing.T) {
type testRange struct {
begin, end txid
begin, end common.Txid
}
type testPage struct {
id pgid
id common.Pgid
n int
allocTxn txid
freeTxn txid
allocTxn common.Txid
freeTxn common.Txid
}
var releaseRangeTests = []struct {
title string
pagesIn []testPage
releaseRanges []testRange
wantFree []pgid
wantFree []common.Pgid
}{
{
title: "Single pending in range",
pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
releaseRanges: []testRange{{1, 300}},
wantFree: []pgid{3},
wantFree: []common.Pgid{3},
},
{
title: "Single pending with minimum end range",
pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
releaseRanges: []testRange{{1, 200}},
wantFree: []pgid{3},
wantFree: []common.Pgid{3},
},
{
title: "Single pending outsize minimum end range",
@ -89,7 +91,7 @@ func TestFreelist_releaseRange(t *testing.T) {
title: "Single pending with minimum begin range",
pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
releaseRanges: []testRange{{100, 300}},
wantFree: []pgid{3},
wantFree: []common.Pgid{3},
},
{
title: "Single pending outside minimum begin range",
@ -101,7 +103,7 @@ func TestFreelist_releaseRange(t *testing.T) {
title: "Single pending in minimum range",
pagesIn: []testPage{{id: 3, n: 1, allocTxn: 199, freeTxn: 200}},
releaseRanges: []testRange{{199, 200}},
wantFree: []pgid{3},
wantFree: []common.Pgid{3},
},
{
title: "Single pending and read transaction at 199",
@ -146,16 +148,16 @@ func TestFreelist_releaseRange(t *testing.T) {
{id: 9, n: 2, allocTxn: 175, freeTxn: 200},
},
releaseRanges: []testRange{{50, 149}, {151, 300}},
wantFree: []pgid{4, 9, 10},
wantFree: []common.Pgid{4, 9, 10},
},
}
for _, c := range releaseRangeTests {
f := newTestFreelist()
var ids []pgid
var ids []common.Pgid
for _, p := range c.pagesIn {
for i := uint64(0); i < uint64(p.n); i++ {
ids = append(ids, pgid(uint64(p.id)+i))
ids = append(ids, common.Pgid(uint64(p.id)+i))
}
}
f.readIDs(ids)
@ -164,7 +166,7 @@ func TestFreelist_releaseRange(t *testing.T) {
}
for _, p := range c.pagesIn {
f.free(p.freeTxn, &page{id: p.id, overflow: uint32(p.n - 1)})
f.free(p.freeTxn, common.NewPage(p.id, 0, 0, uint32(p.n-1)))
}
for _, r := range c.releaseRanges {
@ -179,11 +181,11 @@ func TestFreelist_releaseRange(t *testing.T) {
func TestFreelistHashmap_allocate(t *testing.T) {
f := newTestFreelist()
if f.freelistType != FreelistMapType {
if f.freelistType != common.FreelistMapType {
t.Skip()
}
ids := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
f.readIDs(ids)
f.allocate(1, 3)
@ -209,10 +211,10 @@ func TestFreelistHashmap_allocate(t *testing.T) {
// Ensure that a freelist can find contiguous blocks of pages.
func TestFreelistArray_allocate(t *testing.T) {
f := newTestFreelist()
if f.freelistType != FreelistArrayType {
if f.freelistType != common.FreelistArrayType {
t.Skip()
}
ids := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
f.readIDs(ids)
if id := int(f.allocate(1, 3)); id != 3 {
t.Fatalf("exp=3; got=%v", id)
@ -235,7 +237,7 @@ func TestFreelistArray_allocate(t *testing.T) {
if id := int(f.allocate(1, 0)); id != 0 {
t.Fatalf("exp=0; got=%v", id)
}
if exp := []pgid{9, 18}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
if exp := []common.Pgid{9, 18}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs())
}
@ -248,7 +250,7 @@ func TestFreelistArray_allocate(t *testing.T) {
if id := int(f.allocate(1, 1)); id != 0 {
t.Fatalf("exp=0; got=%v", id)
}
if exp := []pgid{}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
if exp := []common.Pgid{}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs())
}
}
@ -257,12 +259,12 @@ func TestFreelistArray_allocate(t *testing.T) {
func TestFreelist_read(t *testing.T) {
// Create a page.
var buf [4096]byte
page := (*page)(unsafe.Pointer(&buf[0]))
page.flags = freelistPageFlag
page.count = 2
page := (*common.Page)(unsafe.Pointer(&buf[0]))
page.SetFlags(common.FreelistPageFlag)
page.SetCount(2)
// Insert 2 page ids.
ids := (*[3]pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page)))
ids := (*[3]common.Pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page)))
ids[0] = 23
ids[1] = 50
@ -271,7 +273,7 @@ func TestFreelist_read(t *testing.T) {
f.read(page)
// Ensure that there are two page ids in the freelist.
if exp := []pgid{23, 50}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
if exp := []common.Pgid{23, 50}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs())
}
}
@ -282,10 +284,10 @@ func TestFreelist_write(t *testing.T) {
var buf [4096]byte
f := newTestFreelist()
f.readIDs([]pgid{12, 39})
f.pending[100] = &txPending{ids: []pgid{28, 11}}
f.pending[101] = &txPending{ids: []pgid{3}}
p := (*page)(unsafe.Pointer(&buf[0]))
f.readIDs([]common.Pgid{12, 39})
f.pending[100] = &txPending{ids: []common.Pgid{28, 11}}
f.pending[101] = &txPending{ids: []common.Pgid{3}}
p := (*common.Page)(unsafe.Pointer(&buf[0]))
if err := f.write(p); err != nil {
t.Fatal(err)
}
@ -296,7 +298,7 @@ func TestFreelist_write(t *testing.T) {
// Ensure that the freelist is correct.
// All pages should be present and in reverse order.
if exp := []pgid{3, 11, 12, 28, 39}; !reflect.DeepEqual(exp, f2.getFreePageIDs()) {
if exp := []common.Pgid{3, 11, 12, 28, 39}; !reflect.DeepEqual(exp, f2.getFreePageIDs()) {
t.Fatalf("exp=%v; got=%v", exp, f2.getFreePageIDs())
}
}
@ -313,17 +315,17 @@ func benchmark_FreelistRelease(b *testing.B, size int) {
for i := 0; i < b.N; i++ {
txp := &txPending{ids: pending}
f := newTestFreelist()
f.pending = map[txid]*txPending{1: txp}
f.pending = map[common.Txid]*txPending{1: txp}
f.readIDs(ids)
f.release(1)
}
}
func randomPgids(n int) []pgid {
func randomPgids(n int) []common.Pgid {
rand.Seed(42)
pgids := make(pgids, n)
pgids := make(common.Pgids, n)
for i := range pgids {
pgids[i] = pgid(rand.Int63())
pgids[i] = common.Pgid(rand.Int63())
}
sort.Sort(pgids)
return pgids
@ -331,7 +333,7 @@ func randomPgids(n int) []pgid {
func Test_freelist_ReadIDs_and_getFreePageIDs(t *testing.T) {
f := newTestFreelist()
exp := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
exp := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
f.readIDs(exp)
@ -340,7 +342,7 @@ func Test_freelist_ReadIDs_and_getFreePageIDs(t *testing.T) {
}
f2 := newTestFreelist()
var exp2 []pgid
var exp2 []common.Pgid
f2.readIDs(exp2)
if got2 := f2.getFreePageIDs(); !reflect.DeepEqual(got2, exp2) {
@ -355,53 +357,53 @@ func Test_freelist_mergeWithExist(t *testing.T) {
bm2 := pidSet{5: struct{}{}}
tests := []struct {
name string
ids []pgid
pgid pgid
want []pgid
wantForwardmap map[pgid]uint64
wantBackwardmap map[pgid]uint64
ids []common.Pgid
pgid common.Pgid
want []common.Pgid
wantForwardmap map[common.Pgid]uint64
wantBackwardmap map[common.Pgid]uint64
wantfreemap map[uint64]pidSet
}{
{
name: "test1",
ids: []pgid{1, 2, 4, 5, 6},
ids: []common.Pgid{1, 2, 4, 5, 6},
pgid: 3,
want: []pgid{1, 2, 3, 4, 5, 6},
wantForwardmap: map[pgid]uint64{1: 6},
wantBackwardmap: map[pgid]uint64{6: 6},
want: []common.Pgid{1, 2, 3, 4, 5, 6},
wantForwardmap: map[common.Pgid]uint64{1: 6},
wantBackwardmap: map[common.Pgid]uint64{6: 6},
wantfreemap: map[uint64]pidSet{6: bm1},
},
{
name: "test2",
ids: []pgid{1, 2, 5, 6},
ids: []common.Pgid{1, 2, 5, 6},
pgid: 3,
want: []pgid{1, 2, 3, 5, 6},
wantForwardmap: map[pgid]uint64{1: 3, 5: 2},
wantBackwardmap: map[pgid]uint64{6: 2, 3: 3},
want: []common.Pgid{1, 2, 3, 5, 6},
wantForwardmap: map[common.Pgid]uint64{1: 3, 5: 2},
wantBackwardmap: map[common.Pgid]uint64{6: 2, 3: 3},
wantfreemap: map[uint64]pidSet{3: bm1, 2: bm2},
},
{
name: "test3",
ids: []pgid{1, 2},
ids: []common.Pgid{1, 2},
pgid: 3,
want: []pgid{1, 2, 3},
wantForwardmap: map[pgid]uint64{1: 3},
wantBackwardmap: map[pgid]uint64{3: 3},
want: []common.Pgid{1, 2, 3},
wantForwardmap: map[common.Pgid]uint64{1: 3},
wantBackwardmap: map[common.Pgid]uint64{3: 3},
wantfreemap: map[uint64]pidSet{3: bm1},
},
{
name: "test4",
ids: []pgid{2, 3},
ids: []common.Pgid{2, 3},
pgid: 1,
want: []pgid{1, 2, 3},
wantForwardmap: map[pgid]uint64{1: 3},
wantBackwardmap: map[pgid]uint64{3: 3},
want: []common.Pgid{1, 2, 3},
wantForwardmap: map[common.Pgid]uint64{1: 3},
wantBackwardmap: map[common.Pgid]uint64{3: 3},
wantfreemap: map[uint64]pidSet{3: bm1},
},
}
for _, tt := range tests {
f := newTestFreelist()
if f.freelistType == FreelistArrayType {
if f.freelistType == common.FreelistArrayType {
t.Skip()
}
f.readIDs(tt.ids)
@ -425,9 +427,9 @@ func Test_freelist_mergeWithExist(t *testing.T) {
// newTestFreelist get the freelist type from env and initial the freelist
func newTestFreelist() *freelist {
freelistType := FreelistArrayType
if env := os.Getenv(TestFreelistType); env == string(FreelistMapType) {
freelistType = FreelistMapType
freelistType := common.FreelistArrayType
if env := os.Getenv(TestFreelistType); env == string(common.FreelistMapType) {
freelistType = common.FreelistMapType
}
return newFreelist(freelistType)

View File

@ -12,6 +12,7 @@ import (
"github.com/stretchr/testify/require"
bolt "go.etcd.io/bbolt"
"go.etcd.io/bbolt/internal/common"
)
var statsFlag = flag.Bool("stats", false, "show performance stats")
@ -44,9 +45,9 @@ func MustOpenDBWithOption(t testing.TB, f string, o *bolt.Options) *DB {
o = bolt.DefaultOptions
}
freelistType := bolt.FreelistArrayType
if env := os.Getenv(TestFreelistType); env == string(bolt.FreelistMapType) {
freelistType = bolt.FreelistMapType
freelistType := common.FreelistArrayType
if env := os.Getenv(TestFreelistType); env == string(common.FreelistMapType) {
freelistType = common.FreelistMapType
}
o.FreelistType = freelistType

View File

@ -2,14 +2,13 @@ package guts_cli
// Low level access to pages / data-structures of the bbolt file.
// TODO(ptab): Merge with bbolt/page file that should get ported to internal.
import (
"errors"
"fmt"
"io"
"os"
"unsafe"
"go.etcd.io/bbolt/internal/common"
)
var (
@ -17,231 +16,9 @@ var (
ErrCorrupt = errors.New("invalid value")
)
// PageHeaderSize represents the size of the bolt.Page header.
const PageHeaderSize = 16
// Represents a marker value to indicate that a file (Meta Page) is a Bolt DB.
const magic uint32 = 0xED0CDAED
// DO NOT EDIT. Copied from the "bolt" package.
const maxAllocSize = 0xFFFFFFF
// DO NOT EDIT. Copied from the "bolt" package.
const (
branchPageFlag = 0x01
leafPageFlag = 0x02
metaPageFlag = 0x04
freelistPageFlag = 0x10
)
// DO NOT EDIT. Copied from the "bolt" package.
const bucketLeafFlag = 0x01
// DO NOT EDIT. Copied from the "bolt" package.
type Pgid uint64
// DO NOT EDIT. Copied from the "bolt" package.
type txid uint64
// DO NOT EDIT. Copied from the "bolt" package.
type Meta struct {
magic uint32
version uint32
pageSize uint32
flags uint32
root Bucket
freelist Pgid
pgid Pgid // High Water Mark (id of next added Page if the file growths)
txid txid
checksum uint64
}
func LoadPageMeta(buf []byte) *Meta {
return (*Meta)(unsafe.Pointer(&buf[PageHeaderSize]))
}
func (m *Meta) RootBucket() *Bucket {
return &m.root
}
func (m *Meta) Txid() uint64 {
return uint64(m.txid)
}
func (m *Meta) Print(w io.Writer) {
fmt.Fprintf(w, "Version: %d\n", m.version)
fmt.Fprintf(w, "Page Size: %d bytes\n", m.pageSize)
fmt.Fprintf(w, "Flags: %08x\n", m.flags)
fmt.Fprintf(w, "Root: <pgid=%d>\n", m.root.root)
fmt.Fprintf(w, "Freelist: <pgid=%d>\n", m.freelist)
fmt.Fprintf(w, "HWM: <pgid=%d>\n", m.pgid)
fmt.Fprintf(w, "Txn ID: %d\n", m.txid)
fmt.Fprintf(w, "Checksum: %016x\n", m.checksum)
fmt.Fprintf(w, "\n")
}
// DO NOT EDIT. Copied from the "bolt" package.
type Bucket struct {
root Pgid
sequence uint64
}
const bucketHeaderSize = int(unsafe.Sizeof(Bucket{}))
func LoadBucket(buf []byte) *Bucket {
return (*Bucket)(unsafe.Pointer(&buf[0]))
}
func (b *Bucket) String() string {
return fmt.Sprintf("<pgid=%d,seq=%d>", b.root, b.sequence)
}
func (b *Bucket) RootPage() Pgid {
return b.root
}
func (b *Bucket) InlinePage(v []byte) *Page {
return (*Page)(unsafe.Pointer(&v[bucketHeaderSize]))
}
// DO NOT EDIT. Copied from the "bolt" package.
type Page struct {
id Pgid
flags uint16
count uint16
overflow uint32
ptr uintptr
}
func LoadPage(buf []byte) *Page {
return (*Page)(unsafe.Pointer(&buf[0]))
}
func (p *Page) FreelistPageCount() int {
// Check for overflow and, if present, adjust actual element count.
if p.count == 0xFFFF {
return int(((*[maxAllocSize]Pgid)(unsafe.Pointer(&p.ptr)))[0])
} else {
return int(p.count)
}
}
func (p *Page) FreelistPagePages() []Pgid {
// Check for overflow and, if present, adjust starting index.
idx := 0
if p.count == 0xFFFF {
idx = 1
}
return (*[maxAllocSize]Pgid)(unsafe.Pointer(&p.ptr))[idx:p.FreelistPageCount()]
}
func (p *Page) Overflow() uint32 {
return p.overflow
}
func (p *Page) String() string {
return fmt.Sprintf("ID: %d, Type: %s, count: %d, overflow: %d", p.id, p.Type(), p.count, p.overflow)
}
// DO NOT EDIT. Copied from the "bolt" package.
// TODO(ptabor): Make the page-types an enum.
func (p *Page) Type() string {
if (p.flags & branchPageFlag) != 0 {
return "branch"
} else if (p.flags & leafPageFlag) != 0 {
return "leaf"
} else if (p.flags & metaPageFlag) != 0 {
return "meta"
} else if (p.flags & freelistPageFlag) != 0 {
return "freelist"
}
return fmt.Sprintf("unknown<%02x>", p.flags)
}
func (p *Page) Count() uint16 {
return p.count
}
func (p *Page) Id() Pgid {
return p.id
}
// DO NOT EDIT. Copied from the "bolt" package.
func (p *Page) LeafPageElement(index uint16) *LeafPageElement {
n := &((*[0x7FFFFFF]LeafPageElement)(unsafe.Pointer(&p.ptr)))[index]
return n
}
// DO NOT EDIT. Copied from the "bolt" package.
func (p *Page) BranchPageElement(index uint16) *BranchPageElement {
return &((*[0x7FFFFFF]BranchPageElement)(unsafe.Pointer(&p.ptr)))[index]
}
func (p *Page) SetId(target Pgid) {
p.id = target
}
func (p *Page) SetCount(target uint16) {
p.count = target
}
func (p *Page) SetOverflow(target uint32) {
p.overflow = target
}
// DO NOT EDIT. Copied from the "bolt" package.
type BranchPageElement struct {
pos uint32
ksize uint32
pgid Pgid
}
// DO NOT EDIT. Copied from the "bolt" package.
func (n *BranchPageElement) Key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return buf[n.pos : n.pos+n.ksize]
}
func (n *BranchPageElement) PgId() Pgid {
return n.pgid
}
// DO NOT EDIT. Copied from the "bolt" package.
type LeafPageElement struct {
flags uint32
pos uint32
ksize uint32
vsize uint32
}
// DO NOT EDIT. Copied from the "bolt" package.
func (n *LeafPageElement) Key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return buf[n.pos : n.pos+n.ksize]
}
// DO NOT EDIT. Copied from the "bolt" package.
func (n *LeafPageElement) Value() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
}
func (n *LeafPageElement) IsBucketEntry() bool {
return n.flags&uint32(bucketLeafFlag) != 0
}
func (n *LeafPageElement) Bucket() *Bucket {
if n.IsBucketEntry() {
return LoadBucket(n.Value())
} else {
return nil
}
}
// ReadPage reads Page info & full Page data from a path.
// This is not transactionally safe.
func ReadPage(path string, pageID uint64) (*Page, []byte, error) {
func ReadPage(path string, pageID uint64) (*common.Page, []byte, error) {
// Find Page size.
pageSize, hwm, err := ReadPageAndHWMSize(path)
if err != nil {
@ -264,11 +41,11 @@ func ReadPage(path string, pageID uint64) (*Page, []byte, error) {
}
// Determine total number of blocks.
p := LoadPage(buf)
if p.id != Pgid(pageID) {
return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.id, pageID)
p := common.LoadPage(buf)
if p.Id() != common.Pgid(pageID) {
return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.Id(), pageID)
}
overflowN := p.overflow
overflowN := p.Overflow()
if overflowN >= uint32(hwm)-3 { // we exclude 2 Meta pages and the current Page.
return nil, nil, fmt.Errorf("error: %w, Page claims to have %d overflow pages (>=hwm=%d). Interrupting to avoid risky OOM", ErrCorrupt, overflowN, hwm)
}
@ -280,16 +57,16 @@ func ReadPage(path string, pageID uint64) (*Page, []byte, error) {
} else if n != len(buf) {
return nil, nil, io.ErrUnexpectedEOF
}
p = LoadPage(buf)
if p.id != Pgid(pageID) {
return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.id, pageID)
p = common.LoadPage(buf)
if p.Id() != common.Pgid(pageID) {
return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.Id(), pageID)
}
return p, buf, nil
}
func WritePage(path string, pageBuf []byte) error {
page := LoadPage(pageBuf)
page := common.LoadPage(pageBuf)
pageSize, _, err := ReadPageAndHWMSize(path)
if err != nil {
return err
@ -309,7 +86,7 @@ func WritePage(path string, pageBuf []byte) error {
// ReadPageAndHWMSize reads Page size and HWM (id of the last+1 Page).
// This is not transactionally safe.
func ReadPageAndHWMSize(path string) (uint64, Pgid, error) {
func ReadPageAndHWMSize(path string) (uint64, common.Pgid, error) {
// Open database file.
f, err := os.Open(path)
if err != nil {
@ -324,28 +101,28 @@ func ReadPageAndHWMSize(path string) (uint64, Pgid, error) {
}
// Read Page size from metadata.
m := LoadPageMeta(buf)
if m.magic != magic {
m := common.LoadPageMeta(buf)
if m.Magic() != common.Magic {
return 0, 0, fmt.Errorf("the Meta Page has wrong (unexpected) magic")
}
return uint64(m.pageSize), Pgid(m.pgid), nil
return uint64(m.PageSize()), common.Pgid(m.Pgid()), nil
}
// GetRootPage returns the root-page (according to the most recent transaction).
func GetRootPage(path string) (root Pgid, activeMeta Pgid, err error) {
func GetRootPage(path string) (root common.Pgid, activeMeta common.Pgid, err error) {
_, buf0, err0 := ReadPage(path, 0)
if err0 != nil {
return 0, 0, err0
}
m0 := LoadPageMeta(buf0)
m0 := common.LoadPageMeta(buf0)
_, buf1, err1 := ReadPage(path, 1)
if err1 != nil {
return 0, 1, err1
}
m1 := LoadPageMeta(buf1)
if m0.txid < m1.txid {
return m1.root.root, 1, nil
m1 := common.LoadPageMeta(buf1)
if m0.Txid() < m1.Txid() {
return m1.RootBucket().RootPage(), 1, nil
} else {
return m0.root.root, 0, nil
return m0.RootBucket().RootPage(), 0, nil
}
}

View File

@ -2,10 +2,11 @@ package surgeon
import (
"fmt"
"go.etcd.io/bbolt/internal/common"
"go.etcd.io/bbolt/internal/guts_cli"
)
func CopyPage(path string, srcPage guts_cli.Pgid, target guts_cli.Pgid) error {
func CopyPage(path string, srcPage common.Pgid, target common.Pgid) error {
p1, d1, err1 := guts_cli.ReadPage(path, uint64(srcPage))
if err1 != nil {
return err1
@ -14,7 +15,7 @@ func CopyPage(path string, srcPage guts_cli.Pgid, target guts_cli.Pgid) error {
return guts_cli.WritePage(path, d1)
}
func ClearPage(path string, pgId guts_cli.Pgid) error {
func ClearPage(path string, pgId common.Pgid) error {
// Read the page
p, buf, err := guts_cli.ReadPage(path, uint64(pgId))
if err != nil {

View File

@ -9,6 +9,7 @@ import (
"bytes"
"fmt"
"go.etcd.io/bbolt/internal/common"
"go.etcd.io/bbolt/internal/guts_cli"
)
@ -20,7 +21,7 @@ func NewXRay(path string) XRay {
return XRay{path}
}
func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page, stack []guts_cli.Pgid) error) error {
func (n XRay) traverse(stack []common.Pgid, callback func(page *common.Page, stack []common.Pgid) error) error {
p, data, err := guts_cli.ReadPage(n.path, uint64(stack[len(stack)-1]))
if err != nil {
return fmt.Errorf("failed reading page (stack %v): %w", stack, err)
@ -29,10 +30,10 @@ func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page,
if err != nil {
return fmt.Errorf("failed callback for page (stack %v): %w", stack, err)
}
switch p.Type() {
switch p.Typ() {
case "meta":
{
m := guts_cli.LoadPageMeta(data)
m := common.LoadPageMeta(data)
r := m.RootBucket().RootPage()
return n.traverse(append(stack, r), callback)
}
@ -40,7 +41,7 @@ func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page,
{
for i := uint16(0); i < p.Count(); i++ {
bpe := p.BranchPageElement(i)
if err := n.traverse(append(stack, bpe.PgId()), callback); err != nil {
if err := n.traverse(append(stack, bpe.Pgid()), callback); err != nil {
return err
}
}
@ -73,19 +74,19 @@ func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page,
// As it traverses multiple buckets, so in theory there might be multiple keys with the given name.
// Note: For simplicity it's currently implemented as traversing of the whole reachable tree.
// If key is a bucket name, a page-path referencing the key will be returned as well.
func (n XRay) FindPathsToKey(key []byte) ([][]guts_cli.Pgid, error) {
var found [][]guts_cli.Pgid
func (n XRay) FindPathsToKey(key []byte) ([][]common.Pgid, error) {
var found [][]common.Pgid
rootPage, _, err := guts_cli.GetRootPage(n.path)
if err != nil {
return nil, err
}
err = n.traverse([]guts_cli.Pgid{rootPage},
func(page *guts_cli.Page, stack []guts_cli.Pgid) error {
if page.Type() == "leaf" {
err = n.traverse([]common.Pgid{rootPage},
func(page *common.Page, stack []common.Pgid) error {
if page.Typ() == "leaf" {
for i := uint16(0); i < page.Count(); i++ {
if bytes.Equal(page.LeafPageElement(i).Key(), key) {
var copyPath []guts_cli.Pgid
var copyPath []common.Pgid
copyPath = append(copyPath, stack...)
found = append(found, copyPath)
}

120
node.go
View File

@ -5,6 +5,8 @@ import (
"fmt"
"sort"
"unsafe"
"go.etcd.io/bbolt/internal/common"
)
// node represents an in-memory, deserialized page.
@ -14,7 +16,7 @@ type node struct {
unbalanced bool
spilled bool
key []byte
pgid pgid
pgid common.Pgid
parent *node
children nodes
inodes inodes
@ -38,7 +40,7 @@ func (n *node) minKeys() int {
// size returns the size of the node after serialization.
func (n *node) size() int {
sz, elsz := pageHeaderSize, n.pageElementSize()
sz, elsz := common.PageHeaderSize, n.pageElementSize()
for i := 0; i < len(n.inodes); i++ {
item := &n.inodes[i]
sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value))
@ -50,7 +52,7 @@ func (n *node) size() int {
// This is an optimization to avoid calculating a large node when we only need
// to know if it fits inside a certain page size.
func (n *node) sizeLessThan(v uintptr) bool {
sz, elsz := pageHeaderSize, n.pageElementSize()
sz, elsz := common.PageHeaderSize, n.pageElementSize()
for i := 0; i < len(n.inodes); i++ {
item := &n.inodes[i]
sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value))
@ -64,9 +66,9 @@ func (n *node) sizeLessThan(v uintptr) bool {
// pageElementSize returns the size of each page element based on the type of node.
func (n *node) pageElementSize() uintptr {
if n.isLeaf {
return leafPageElementSize
return common.LeafPageElementSize
}
return branchPageElementSize
return common.BranchPageElementSize
}
// childAt returns the child node at a given index.
@ -113,9 +115,9 @@ func (n *node) prevSibling() *node {
}
// put inserts a key/value.
func (n *node) put(oldKey, newKey, value []byte, pgId pgid, flags uint32) {
if pgId >= n.bucket.tx.meta.pgid {
panic(fmt.Sprintf("pgId (%d) above high water mark (%d)", pgId, n.bucket.tx.meta.pgid))
func (n *node) put(oldKey, newKey, value []byte, pgId common.Pgid, flags uint32) {
if pgId >= n.bucket.tx.meta.Pgid() {
panic(fmt.Sprintf("pgId (%d) above high water mark (%d)", pgId, n.bucket.tx.meta.Pgid()))
} else if len(oldKey) <= 0 {
panic("put: zero-length old key")
} else if len(newKey) <= 0 {
@ -126,7 +128,7 @@ func (n *node) put(oldKey, newKey, value []byte, pgId pgid, flags uint32) {
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
// Add capacity and shift nodes if we don't have an exact match and need to insert.
exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
exact := len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey)
if !exact {
n.inodes = append(n.inodes, inode{})
copy(n.inodes[index+1:], n.inodes[index:])
@ -137,7 +139,7 @@ func (n *node) put(oldKey, newKey, value []byte, pgId pgid, flags uint32) {
inode.key = newKey
inode.value = value
inode.pgid = pgId
_assert(len(inode.key) > 0, "put: zero-length inode key")
common.Assert(len(inode.key) > 0, "put: zero-length inode key")
}
// del removes a key from the node.
@ -158,30 +160,30 @@ func (n *node) del(key []byte) {
}
// read initializes the node from a page.
func (n *node) read(p *page) {
n.pgid = p.id
n.isLeaf = ((p.flags & leafPageFlag) != 0)
n.inodes = make(inodes, int(p.count))
func (n *node) read(p *common.Page) {
n.pgid = p.Id()
n.isLeaf = (p.Flags() & common.LeafPageFlag) != 0
n.inodes = make(inodes, int(p.Count()))
for i := 0; i < int(p.count); i++ {
for i := 0; i < int(p.Count()); i++ {
inode := &n.inodes[i]
if n.isLeaf {
elem := p.leafPageElement(uint16(i))
inode.flags = elem.flags
inode.key = elem.key()
inode.value = elem.value()
elem := p.LeafPageElement(uint16(i))
inode.flags = elem.Flags()
inode.key = elem.Key()
inode.value = elem.Value()
} else {
elem := p.branchPageElement(uint16(i))
inode.pgid = elem.pgid
inode.key = elem.key()
elem := p.BranchPageElement(uint16(i))
inode.pgid = elem.Pgid()
inode.key = elem.Key()
}
_assert(len(inode.key) > 0, "read: zero-length inode key")
common.Assert(len(inode.key) > 0, "read: zero-length inode key")
}
// Save first key so we can find the node in the parent when we spill.
// Save first key, so we can find the node in the parent when we spill.
if len(n.inodes) > 0 {
n.key = n.inodes[0].key
_assert(len(n.key) > 0, "read: zero-length node key")
common.Assert(len(n.key) > 0, "read: zero-length node key")
} else {
n.key = nil
}
@ -190,23 +192,23 @@ func (n *node) read(p *page) {
// write writes the items onto one or more pages.
// The page should have p.id (might be 0 for meta or bucket-inline page) and p.overflow set
// and the rest should be zeroed.
func (n *node) write(p *page) {
_assert(p.count == 0 && p.flags == 0, "node cannot be written into a not empty page")
func (n *node) write(p *common.Page) {
common.Assert(p.Count() == 0 && p.Flags() == 0, "node cannot be written into a not empty page")
// Initialize page.
if n.isLeaf {
p.flags = leafPageFlag
p.SetFlags(common.LeafPageFlag)
} else {
p.flags = branchPageFlag
p.SetFlags(common.BranchPageFlag)
}
if len(n.inodes) >= 0xFFFF {
panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.Id()))
}
p.count = uint16(len(n.inodes))
p.SetCount(uint16(len(n.inodes)))
// Stop here if there are no items to write.
if p.count == 0 {
if p.Count() == 0 {
return
}
@ -214,27 +216,27 @@ func (n *node) write(p *page) {
// off tracks the offset into p of the start of the next data.
off := unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes))
for i, item := range n.inodes {
_assert(len(item.key) > 0, "write: zero-length inode key")
common.Assert(len(item.key) > 0, "write: zero-length inode key")
// Create a slice to write into of needed size and advance
// byte pointer for next iteration.
sz := len(item.key) + len(item.value)
b := unsafeByteSlice(unsafe.Pointer(p), off, 0, sz)
b := common.UnsafeByteSlice(unsafe.Pointer(p), off, 0, sz)
off += uintptr(sz)
// Write the page element.
if n.isLeaf {
elem := p.leafPageElement(uint16(i))
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
elem.flags = item.flags
elem.ksize = uint32(len(item.key))
elem.vsize = uint32(len(item.value))
elem := p.LeafPageElement(uint16(i))
elem.SetPos(uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))))
elem.SetFlags(item.flags)
elem.SetKsize(uint32(len(item.key)))
elem.SetVsize(uint32(len(item.value)))
} else {
elem := p.branchPageElement(uint16(i))
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
elem.ksize = uint32(len(item.key))
elem.pgid = item.pgid
_assert(elem.pgid != p.id, "write: circular dependency occurred")
elem := p.BranchPageElement(uint16(i))
elem.SetPos(uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))))
elem.SetKsize(uint32(len(item.key)))
elem.SetPgid(item.pgid)
common.Assert(elem.Pgid() != p.Id(), "write: circular dependency occurred")
}
// Write data for the element to the end of the page.
@ -273,7 +275,7 @@ func (n *node) split(pageSize uintptr) []*node {
func (n *node) splitTwo(pageSize uintptr) (*node, *node) {
// Ignore the split if the page doesn't have at least enough nodes for
// two pages or if the nodes can fit in a single page.
if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
if len(n.inodes) <= (common.MinKeysPerPage*2) || n.sizeLessThan(pageSize) {
return n, nil
}
@ -313,17 +315,17 @@ func (n *node) splitTwo(pageSize uintptr) (*node, *node) {
// It returns the index as well as the size of the first page.
// This is only be called from split().
func (n *node) splitIndex(threshold int) (index, sz uintptr) {
sz = pageHeaderSize
sz = common.PageHeaderSize
// Loop until we only have the minimum number of keys required for the second page.
for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
for i := 0; i < len(n.inodes)-common.MinKeysPerPage; i++ {
index = uintptr(i)
inode := n.inodes[i]
elsize := n.pageElementSize() + uintptr(len(inode.key)) + uintptr(len(inode.value))
// If we have at least the minimum number of keys and adding another
// node would put us over the threshold then exit and return.
if index >= minKeysPerPage && sz+elsize > uintptr(threshold) {
if index >= common.MinKeysPerPage && sz+elsize > uintptr(threshold) {
break
}
@ -360,7 +362,7 @@ func (n *node) spill() error {
for _, node := range nodes {
// Add node's page to the freelist if it's not new.
if node.pgid > 0 {
tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
tx.db.freelist.free(tx.meta.Txid(), tx.page(node.pgid))
node.pgid = 0
}
@ -371,10 +373,10 @@ func (n *node) spill() error {
}
// Write the node.
if p.id >= tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
if p.Id() >= tx.meta.Pgid() {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.Id(), tx.meta.Pgid()))
}
node.pgid = p.id
node.pgid = p.Id()
node.write(p)
node.spilled = true
@ -387,7 +389,7 @@ func (n *node) spill() error {
node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
node.key = node.inodes[0].key
_assert(len(node.key) > 0, "spill: zero-length node key")
common.Assert(len(node.key) > 0, "spill: zero-length node key")
}
// Update the statistics.
@ -457,11 +459,11 @@ func (n *node) rebalance() {
return
}
_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
common.Assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
// Destination node is right sibling if idx == 0, otherwise left sibling.
var target *node
var useNextSibling = (n.parent.childIndex(n) == 0)
var useNextSibling = n.parent.childIndex(n) == 0
if useNextSibling {
target = n.nextSibling()
} else {
@ -525,7 +527,7 @@ func (n *node) dereference() {
key := make([]byte, len(n.key))
copy(key, n.key)
n.key = key
_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
common.Assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
}
for i := range n.inodes {
@ -534,7 +536,7 @@ func (n *node) dereference() {
key := make([]byte, len(inode.key))
copy(key, inode.key)
inode.key = key
_assert(len(inode.key) > 0, "dereference: zero-length inode key")
common.Assert(len(inode.key) > 0, "dereference: zero-length inode key")
value := make([]byte, len(inode.value))
copy(value, inode.value)
@ -553,7 +555,7 @@ func (n *node) dereference() {
// free adds the node's underlying page to the freelist.
func (n *node) free() {
if n.pgid != 0 {
n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
n.bucket.tx.db.freelist.free(n.bucket.tx.meta.Txid(), n.bucket.tx.page(n.pgid))
n.pgid = 0
}
}
@ -602,7 +604,7 @@ func (s nodes) Less(i, j int) bool {
// to an element which hasn't been added to a page yet.
type inode struct {
flags uint32
pgid pgid
pgid common.Pgid
key []byte
value []byte
}

View File

@ -3,15 +3,19 @@ package bbolt
import (
"testing"
"unsafe"
"go.etcd.io/bbolt/internal/common"
)
// Ensure that a node can insert a key/value.
func TestNode_put(t *testing.T) {
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{meta: &meta{pgid: 1}}}}
m := &common.Meta{}
m.SetPgid(1)
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{meta: m}}}
n.put([]byte("baz"), []byte("baz"), []byte("2"), 0, 0)
n.put([]byte("foo"), []byte("foo"), []byte("0"), 0, 0)
n.put([]byte("bar"), []byte("bar"), []byte("1"), 0, 0)
n.put([]byte("foo"), []byte("foo"), []byte("3"), 0, leafPageFlag)
n.put([]byte("foo"), []byte("foo"), []byte("3"), 0, common.LeafPageFlag)
if len(n.inodes) != 3 {
t.Fatalf("exp=3; got=%d", len(n.inodes))
@ -25,7 +29,7 @@ func TestNode_put(t *testing.T) {
if k, v := n.inodes[2].key, n.inodes[2].value; string(k) != "foo" || string(v) != "3" {
t.Fatalf("exp=<foo,3>; got=<%s,%s>", k, v)
}
if n.inodes[2].flags != uint32(leafPageFlag) {
if n.inodes[2].flags != uint32(common.LeafPageFlag) {
t.Fatalf("not a leaf: %d", n.inodes[2].flags)
}
}
@ -34,18 +38,19 @@ func TestNode_put(t *testing.T) {
func TestNode_read_LeafPage(t *testing.T) {
// Create a page.
var buf [4096]byte
page := (*page)(unsafe.Pointer(&buf[0]))
page.flags = leafPageFlag
page.count = 2
page := (*common.Page)(unsafe.Pointer(&buf[0]))
page.SetFlags(common.LeafPageFlag)
page.SetCount(2)
// Insert 2 elements at the beginning. sizeof(leafPageElement) == 16
nodes := (*[3]leafPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page)))
nodes[0] = leafPageElement{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(leafPageElement) * 2
nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4
nodes := page.LeafPageElements()
//nodes := (*[3]leafPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page)))
nodes[0] = *common.NewLeafPageElement(0, 32, 3, 4) // pos = sizeof(leafPageElement) * 2
nodes[1] = *common.NewLeafPageElement(0, 23, 10, 3) // pos = sizeof(leafPageElement) + 3 + 4
// Write data for the nodes at the end.
const s = "barfoozhelloworldbye"
data := unsafeByteSlice(unsafe.Pointer(&nodes[2]), 0, 0, len(s))
data := common.UnsafeByteSlice(unsafe.Pointer(uintptr(unsafe.Pointer(page))+unsafe.Sizeof(*page)+common.LeafPageElementSize*2), 0, 0, len(s))
copy(data, s)
// Deserialize page into a leaf.
@ -70,14 +75,16 @@ func TestNode_read_LeafPage(t *testing.T) {
// Ensure that a node can serialize into a leaf page.
func TestNode_write_LeafPage(t *testing.T) {
// Create a node.
n := &node{isLeaf: true, inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
m := &common.Meta{}
m.SetPgid(1)
n := &node{isLeaf: true, inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}}
n.put([]byte("susy"), []byte("susy"), []byte("que"), 0, 0)
n.put([]byte("ricki"), []byte("ricki"), []byte("lake"), 0, 0)
n.put([]byte("john"), []byte("john"), []byte("johnson"), 0, 0)
// Write it to a page.
var buf [4096]byte
p := (*page)(unsafe.Pointer(&buf[0]))
p := (*common.Page)(unsafe.Pointer(&buf[0]))
n.write(p)
// Read the page back in.
@ -102,7 +109,9 @@ func TestNode_write_LeafPage(t *testing.T) {
// Ensure that a node can split into appropriate subgroups.
func TestNode_split(t *testing.T) {
// Create a node.
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
m := &common.Meta{}
m.SetPgid(1)
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}}
n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0)
n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0)
n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0)
@ -127,7 +136,9 @@ func TestNode_split(t *testing.T) {
// Ensure that a page with the minimum number of inodes just returns a single node.
func TestNode_split_MinKeys(t *testing.T) {
// Create a node.
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
m := &common.Meta{}
m.SetPgid(1)
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}}
n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0)
n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0)
@ -141,7 +152,9 @@ func TestNode_split_MinKeys(t *testing.T) {
// Ensure that a node that has keys that all fit on a page just returns one leaf.
func TestNode_split_SinglePage(t *testing.T) {
// Create a node.
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
m := &common.Meta{}
m.SetPgid(1)
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}}
n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0)
n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0)
n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0)

214
page.go
View File

@ -1,214 +0,0 @@
package bbolt
import (
"fmt"
"os"
"sort"
"unsafe"
)
const pageHeaderSize = unsafe.Sizeof(page{})
const minKeysPerPage = 2
const branchPageElementSize = unsafe.Sizeof(branchPageElement{})
const leafPageElementSize = unsafe.Sizeof(leafPageElement{})
const (
branchPageFlag = 0x01
leafPageFlag = 0x02
metaPageFlag = 0x04
freelistPageFlag = 0x10
)
const (
bucketLeafFlag = 0x01
)
type pgid uint64
type page struct {
id pgid
flags uint16
count uint16
overflow uint32
}
// typ returns a human readable page type string used for debugging.
func (p *page) typ() string {
if (p.flags & branchPageFlag) != 0 {
return "branch"
} else if (p.flags & leafPageFlag) != 0 {
return "leaf"
} else if (p.flags & metaPageFlag) != 0 {
return "meta"
} else if (p.flags & freelistPageFlag) != 0 {
return "freelist"
}
return fmt.Sprintf("unknown<%02x>", p.flags)
}
// meta returns a pointer to the metadata section of the page.
func (p *page) meta() *meta {
return (*meta)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
}
func (p *page) fastCheck(id pgid) {
_assert(p.id == id, "Page expected to be: %v, but self identifies as %v", id, p.id)
// Only one flag of page-type can be set.
_assert(p.flags == branchPageFlag ||
p.flags == leafPageFlag ||
p.flags == metaPageFlag ||
p.flags == freelistPageFlag,
"page %v: has unexpected type/flags: %x", p.id, p.flags)
}
// leafPageElement retrieves the leaf node by index
func (p *page) leafPageElement(index uint16) *leafPageElement {
return (*leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
leafPageElementSize, int(index)))
}
// leafPageElements retrieves a list of leaf nodes.
func (p *page) leafPageElements() []leafPageElement {
if p.count == 0 {
return nil
}
var elems []leafPageElement
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
unsafeSlice(unsafe.Pointer(&elems), data, int(p.count))
return elems
}
// branchPageElement retrieves the branch node by index
func (p *page) branchPageElement(index uint16) *branchPageElement {
return (*branchPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
unsafe.Sizeof(branchPageElement{}), int(index)))
}
// branchPageElements retrieves a list of branch nodes.
func (p *page) branchPageElements() []branchPageElement {
if p.count == 0 {
return nil
}
var elems []branchPageElement
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
unsafeSlice(unsafe.Pointer(&elems), data, int(p.count))
return elems
}
// dump writes n bytes of the page to STDERR as hex output.
func (p *page) hexdump(n int) {
buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, n)
fmt.Fprintf(os.Stderr, "%x\n", buf)
}
type pages []*page
func (s pages) Len() int { return len(s) }
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
// branchPageElement represents a node on a branch page.
type branchPageElement struct {
pos uint32
ksize uint32
pgid pgid
}
// key returns a byte slice of the node key.
func (n *branchPageElement) key() []byte {
return unsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize))
}
// leafPageElement represents a node on a leaf page.
type leafPageElement struct {
flags uint32
pos uint32
ksize uint32
vsize uint32
}
// key returns a byte slice of the node key.
func (n *leafPageElement) key() []byte {
i := int(n.pos)
j := i + int(n.ksize)
return unsafeByteSlice(unsafe.Pointer(n), 0, i, j)
}
// value returns a byte slice of the node value.
func (n *leafPageElement) value() []byte {
i := int(n.pos) + int(n.ksize)
j := i + int(n.vsize)
return unsafeByteSlice(unsafe.Pointer(n), 0, i, j)
}
// PageInfo represents human readable information about a page.
type PageInfo struct {
ID int
Type string
Count int
OverflowCount int
}
type pgids []pgid
func (s pgids) Len() int { return len(s) }
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
// merge returns the sorted union of a and b.
func (a pgids) merge(b pgids) pgids {
// Return the opposite slice if one is nil.
if len(a) == 0 {
return b
}
if len(b) == 0 {
return a
}
merged := make(pgids, len(a)+len(b))
mergepgids(merged, a, b)
return merged
}
// mergepgids copies the sorted union of a and b into dst.
// If dst is too small, it panics.
func mergepgids(dst, a, b pgids) {
if len(dst) < len(a)+len(b) {
panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
}
// Copy in the opposite slice if one is nil.
if len(a) == 0 {
copy(dst, b)
return
}
if len(b) == 0 {
copy(dst, a)
return
}
// Merged will hold all elements from both lists.
merged := dst[:0]
// Assign lead to the slice with a lower starting value, follow to the higher value.
lead, follow := a, b
if b[0] < a[0] {
lead, follow = b, a
}
// Continue while there are elements in the lead.
for len(lead) > 0 {
// Merge largest prefix of lead that is ahead of follow[0].
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
merged = append(merged, lead[:n]...)
if n >= len(lead) {
break
}
// Swap lead and follow.
lead, follow = follow, lead[n:]
}
// Append what's left in follow.
_ = append(merged, follow...)
}

View File

@ -1,72 +0,0 @@
package bbolt
import (
"reflect"
"sort"
"testing"
"testing/quick"
)
// Ensure that the page type can be returned in human readable format.
func TestPage_typ(t *testing.T) {
if typ := (&page{flags: branchPageFlag}).typ(); typ != "branch" {
t.Fatalf("exp=branch; got=%v", typ)
}
if typ := (&page{flags: leafPageFlag}).typ(); typ != "leaf" {
t.Fatalf("exp=leaf; got=%v", typ)
}
if typ := (&page{flags: metaPageFlag}).typ(); typ != "meta" {
t.Fatalf("exp=meta; got=%v", typ)
}
if typ := (&page{flags: freelistPageFlag}).typ(); typ != "freelist" {
t.Fatalf("exp=freelist; got=%v", typ)
}
if typ := (&page{flags: 20000}).typ(); typ != "unknown<4e20>" {
t.Fatalf("exp=unknown<4e20>; got=%v", typ)
}
}
// Ensure that the hexdump debugging function doesn't blow up.
func TestPage_dump(t *testing.T) {
(&page{id: 256}).hexdump(16)
}
func TestPgids_merge(t *testing.T) {
a := pgids{4, 5, 6, 10, 11, 12, 13, 27}
b := pgids{1, 3, 8, 9, 25, 30}
c := a.merge(b)
if !reflect.DeepEqual(c, pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) {
t.Errorf("mismatch: %v", c)
}
a = pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36}
b = pgids{8, 9, 25, 30}
c = a.merge(b)
if !reflect.DeepEqual(c, pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) {
t.Errorf("mismatch: %v", c)
}
}
func TestPgids_merge_quick(t *testing.T) {
if err := quick.Check(func(a, b pgids) bool {
// Sort incoming lists.
sort.Sort(a)
sort.Sort(b)
// Merge the two lists together.
got := a.merge(b)
// The expected value should be the two lists combined and sorted.
exp := append(a, b...)
sort.Sort(exp)
if !reflect.DeepEqual(exp, got) {
t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got)
return false
}
return true
}, nil); err != nil {
t.Fatal(err)
}
}

141
tx.go
View File

@ -9,10 +9,9 @@ import (
"sync/atomic"
"time"
"unsafe"
)
// txid represents the internal transaction identifier.
type txid uint64
"go.etcd.io/bbolt/internal/common"
)
// Tx represents a read-only or read/write transaction on the database.
// Read-only transactions can be used for retrieving values for keys and creating cursors.
@ -26,9 +25,9 @@ type Tx struct {
writable bool
managed bool
db *DB
meta *meta
meta *common.Meta
root Bucket
pages map[pgid]*page
pages map[common.Pgid]*common.Page
stats TxStats
commitHandlers []func()
@ -47,24 +46,24 @@ func (tx *Tx) init(db *DB) {
tx.pages = nil
// Copy the meta page since it can be changed by the writer.
tx.meta = &meta{}
db.meta().copy(tx.meta)
tx.meta = &common.Meta{}
db.meta().Copy(tx.meta)
// Copy over the root bucket.
tx.root = newBucket(tx)
tx.root.bucket = &bucket{}
*tx.root.bucket = tx.meta.root
tx.root.InBucket = &common.InBucket{}
*tx.root.InBucket = *(tx.meta.RootBucket())
// Increment the transaction id and add a page cache for writable transactions.
if tx.writable {
tx.pages = make(map[pgid]*page)
tx.meta.txid += txid(1)
tx.pages = make(map[common.Pgid]*common.Page)
tx.meta.IncTxid()
}
}
// ID returns the transaction id.
func (tx *Tx) ID() int {
return int(tx.meta.txid)
return int(tx.meta.Txid())
}
// DB returns a reference to the database that created the transaction.
@ -74,7 +73,7 @@ func (tx *Tx) DB() *DB {
// Size returns current database size in bytes as seen by this transaction.
func (tx *Tx) Size() int64 {
return int64(tx.meta.pgid) * int64(tx.db.pageSize)
return int64(tx.meta.Pgid()) * int64(tx.db.pageSize)
}
// Writable returns whether the transaction can perform write operations.
@ -140,11 +139,11 @@ func (tx *Tx) OnCommit(fn func()) {
// Returns an error if a disk write error occurs, or if Commit is
// called on a read-only transaction.
func (tx *Tx) Commit() error {
_assert(!tx.managed, "managed tx commit not allowed")
common.Assert(!tx.managed, "managed tx commit not allowed")
if tx.db == nil {
return ErrTxClosed
return common.ErrTxClosed
} else if !tx.writable {
return ErrTxNotWritable
return common.ErrTxNotWritable
}
// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
@ -156,7 +155,7 @@ func (tx *Tx) Commit() error {
tx.stats.IncRebalanceTime(time.Since(startTime))
}
opgid := tx.meta.pgid
opgid := tx.meta.Pgid()
// spill data onto dirty pages.
startTime = time.Now()
@ -167,11 +166,11 @@ func (tx *Tx) Commit() error {
tx.stats.IncSpillTime(time.Since(startTime))
// Free the old root bucket.
tx.meta.root.root = tx.root.root
tx.meta.RootBucket().SetRootPage(tx.root.RootPage())
// Free the old freelist because commit writes out a fresh freelist.
if tx.meta.freelist != pgidNoFreelist {
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
if tx.meta.Freelist() != common.PgidNoFreelist {
tx.db.freelist.free(tx.meta.Txid(), tx.db.page(tx.meta.Freelist()))
}
if !tx.db.NoFreelistSync {
@ -180,12 +179,12 @@ func (tx *Tx) Commit() error {
return err
}
} else {
tx.meta.freelist = pgidNoFreelist
tx.meta.SetFreelist(common.PgidNoFreelist)
}
// If the high water mark has moved up then attempt to grow the database.
if tx.meta.pgid > opgid {
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
if tx.meta.Pgid() > opgid {
if err := tx.db.grow(int(tx.meta.Pgid()+1) * tx.db.pageSize); err != nil {
tx.rollback()
return err
}
@ -244,7 +243,7 @@ func (tx *Tx) commitFreelist() error {
tx.rollback()
return err
}
tx.meta.freelist = p.id
tx.meta.SetFreelist(p.Id())
return nil
}
@ -252,9 +251,9 @@ func (tx *Tx) commitFreelist() error {
// Rollback closes the transaction and ignores all previous updates. Read-only
// transactions must be rolled back and not committed.
func (tx *Tx) Rollback() error {
_assert(!tx.managed, "managed tx rollback not allowed")
common.Assert(!tx.managed, "managed tx rollback not allowed")
if tx.db == nil {
return ErrTxClosed
return common.ErrTxClosed
}
tx.nonPhysicalRollback()
return nil
@ -266,7 +265,7 @@ func (tx *Tx) nonPhysicalRollback() {
return
}
if tx.writable {
tx.db.freelist.rollback(tx.meta.txid)
tx.db.freelist.rollback(tx.meta.Txid())
}
tx.close()
}
@ -277,7 +276,7 @@ func (tx *Tx) rollback() {
return
}
if tx.writable {
tx.db.freelist.rollback(tx.meta.txid)
tx.db.freelist.rollback(tx.meta.Txid())
// When mmap fails, the `data`, `dataref` and `datasz` may be reset to
// zero values, and there is no way to reload free page IDs in this case.
if tx.db.data != nil {
@ -287,7 +286,7 @@ func (tx *Tx) rollback() {
tx.db.freelist.noSyncReload(tx.db.freepages())
} else {
// Read free page list from freelist page.
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
tx.db.freelist.reload(tx.db.page(tx.db.meta().Freelist()))
}
}
}
@ -352,13 +351,13 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
// Generate a meta page. We use the same page data for both meta pages.
buf := make([]byte, tx.db.pageSize)
page := (*page)(unsafe.Pointer(&buf[0]))
page.flags = metaPageFlag
*page.meta() = *tx.meta
page := (*common.Page)(unsafe.Pointer(&buf[0]))
page.SetFlags(common.MetaPageFlag)
*page.Meta() = *tx.meta
// Write meta 0.
page.id = 0
page.meta().checksum = page.meta().sum64()
page.SetId(0)
page.Meta().SetChecksum(page.Meta().Sum64())
nn, err := w.Write(buf)
n += int64(nn)
if err != nil {
@ -366,9 +365,9 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
}
// Write meta 1 with a lower transaction id.
page.id = 1
page.meta().txid -= 1
page.meta().checksum = page.meta().sum64()
page.SetId(1)
page.Meta().DecTxid()
page.Meta().SetChecksum(page.Meta().Sum64())
nn, err = w.Write(buf)
n += int64(nn)
if err != nil {
@ -408,14 +407,14 @@ func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
}
// allocate returns a contiguous block of memory starting at a given page.
func (tx *Tx) allocate(count int) (*page, error) {
p, err := tx.db.allocate(tx.meta.txid, count)
func (tx *Tx) allocate(count int) (*common.Page, error) {
p, err := tx.db.allocate(tx.meta.Txid(), count)
if err != nil {
return nil, err
}
// Save to our page cache.
tx.pages[p.id] = p
tx.pages[p.Id()] = p
// Update statistics.
tx.stats.IncPageCount(int64(count))
@ -427,18 +426,18 @@ func (tx *Tx) allocate(count int) (*page, error) {
// write writes any dirty pages to disk.
func (tx *Tx) write() error {
// Sort pages by id.
pages := make(pages, 0, len(tx.pages))
pages := make(common.Pages, 0, len(tx.pages))
for _, p := range tx.pages {
pages = append(pages, p)
}
// Clear out page cache early.
tx.pages = make(map[pgid]*page)
tx.pages = make(map[common.Pgid]*common.Page)
sort.Sort(pages)
// Write pages to disk in order.
for _, p := range pages {
rem := (uint64(p.overflow) + 1) * uint64(tx.db.pageSize)
offset := int64(p.id) * int64(tx.db.pageSize)
rem := (uint64(p.Overflow()) + 1) * uint64(tx.db.pageSize)
offset := int64(p.Id()) * int64(tx.db.pageSize)
var written uintptr
// Write out page in "max allocation" sized chunks.
@ -447,7 +446,7 @@ func (tx *Tx) write() error {
if sz > maxAllocSize-1 {
sz = maxAllocSize - 1
}
buf := unsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz))
buf := common.UnsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz))
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
return err
@ -469,7 +468,7 @@ func (tx *Tx) write() error {
}
// Ignore file sync if flag is set on DB.
if !tx.db.NoSync || IgnoreNoSync {
if !tx.db.NoSync || common.IgnoreNoSync {
if err := fdatasync(tx.db); err != nil {
return err
}
@ -479,11 +478,11 @@ func (tx *Tx) write() error {
for _, p := range pages {
// Ignore page sizes over 1 page.
// These are allocated using make() instead of the page pool.
if int(p.overflow) != 0 {
if int(p.Overflow()) != 0 {
continue
}
buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize)
buf := common.UnsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize)
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
for i := range buf {
@ -500,13 +499,13 @@ func (tx *Tx) writeMeta() error {
// Create a temporary buffer for the meta page.
buf := make([]byte, tx.db.pageSize)
p := tx.db.pageInBuffer(buf, 0)
tx.meta.write(p)
tx.meta.Write(p)
// Write the meta page to file.
if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
if _, err := tx.db.ops.writeAt(buf, int64(p.Id())*int64(tx.db.pageSize)); err != nil {
return err
}
if !tx.db.NoSync || IgnoreNoSync {
if !tx.db.NoSync || common.IgnoreNoSync {
if err := fdatasync(tx.db); err != nil {
return err
}
@ -520,69 +519,69 @@ func (tx *Tx) writeMeta() error {
// page returns a reference to the page with a given id.
// If page has been written to then a temporary buffered page is returned.
func (tx *Tx) page(id pgid) *page {
func (tx *Tx) page(id common.Pgid) *common.Page {
// Check the dirty pages first.
if tx.pages != nil {
if p, ok := tx.pages[id]; ok {
p.fastCheck(id)
p.FastCheck(id)
return p
}
}
// Otherwise return directly from the mmap.
p := tx.db.page(id)
p.fastCheck(id)
p.FastCheck(id)
return p
}
// forEachPage iterates over every page within a given page and executes a function.
func (tx *Tx) forEachPage(pgidnum pgid, fn func(*page, int, []pgid)) {
stack := make([]pgid, 10)
func (tx *Tx) forEachPage(pgidnum common.Pgid, fn func(*common.Page, int, []common.Pgid)) {
stack := make([]common.Pgid, 10)
stack[0] = pgidnum
tx.forEachPageInternal(stack[:1], fn)
}
func (tx *Tx) forEachPageInternal(pgidstack []pgid, fn func(*page, int, []pgid)) {
func (tx *Tx) forEachPageInternal(pgidstack []common.Pgid, fn func(*common.Page, int, []common.Pgid)) {
p := tx.page(pgidstack[len(pgidstack)-1])
// Execute function.
fn(p, len(pgidstack)-1, pgidstack)
// Recursively loop over children.
if (p.flags & branchPageFlag) != 0 {
for i := 0; i < int(p.count); i++ {
elem := p.branchPageElement(uint16(i))
tx.forEachPageInternal(append(pgidstack, elem.pgid), fn)
if (p.Flags() & common.BranchPageFlag) != 0 {
for i := 0; i < int(p.Count()); i++ {
elem := p.BranchPageElement(uint16(i))
tx.forEachPageInternal(append(pgidstack, elem.Pgid()), fn)
}
}
}
// Page returns page information for a given page number.
// This is only safe for concurrent use when used by a writable transaction.
func (tx *Tx) Page(id int) (*PageInfo, error) {
func (tx *Tx) Page(id int) (*common.PageInfo, error) {
if tx.db == nil {
return nil, ErrTxClosed
} else if pgid(id) >= tx.meta.pgid {
return nil, common.ErrTxClosed
} else if common.Pgid(id) >= tx.meta.Pgid() {
return nil, nil
}
if tx.db.freelist == nil {
return nil, ErrFreePagesNotLoaded
return nil, common.ErrFreePagesNotLoaded
}
// Build the page info.
p := tx.db.page(pgid(id))
info := &PageInfo{
p := tx.db.page(common.Pgid(id))
info := &common.PageInfo{
ID: id,
Count: int(p.count),
OverflowCount: int(p.overflow),
Count: int(p.Count()),
OverflowCount: int(p.Overflow()),
}
// Determine the type (or if it's free).
if tx.db.freelist.freed(pgid(id)) {
if tx.db.freelist.freed(common.Pgid(id)) {
info.Type = "free"
} else {
info.Type = p.typ()
info.Type = p.Typ()
}
return info, nil

View File

@ -3,6 +3,8 @@ package bbolt
import (
"encoding/hex"
"fmt"
"go.etcd.io/bbolt/internal/common"
)
// Check performs several consistency checks on the database for this transaction.
@ -37,8 +39,8 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) {
tx.db.loadFreelist()
// Check if any pages are double freed.
freed := make(map[pgid]bool)
all := make([]pgid, tx.db.freelist.count())
freed := make(map[common.Pgid]bool)
all := make([]common.Pgid, tx.db.freelist.count())
tx.db.freelist.copyall(all)
for _, id := range all {
if freed[id] {
@ -48,12 +50,12 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) {
}
// Track every reachable page.
reachable := make(map[pgid]*page)
reachable := make(map[common.Pgid]*common.Page)
reachable[0] = tx.page(0) // meta0
reachable[1] = tx.page(1) // meta1
if tx.meta.freelist != pgidNoFreelist {
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
if tx.meta.Freelist() != common.PgidNoFreelist {
for i := uint32(0); i <= tx.page(tx.meta.Freelist()).Overflow(); i++ {
reachable[tx.meta.Freelist()+common.Pgid(i)] = tx.page(tx.meta.Freelist())
}
}
@ -61,7 +63,7 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) {
tx.checkBucket(&tx.root, reachable, freed, kvStringer, ch)
// Ensure all pages below high water mark are either reachable or freed.
for i := pgid(0); i < tx.meta.pgid; i++ {
for i := common.Pgid(0); i < tx.meta.Pgid(); i++ {
_, isReachable := reachable[i]
if !isReachable && !freed[i] {
ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
@ -72,22 +74,22 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) {
close(ch)
}
func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool,
func (tx *Tx) checkBucket(b *Bucket, reachable map[common.Pgid]*common.Page, freed map[common.Pgid]bool,
kvStringer KVStringer, ch chan error) {
// Ignore inline buckets.
if b.root == 0 {
if b.RootPage() == 0 {
return
}
// Check every page used by this bucket.
b.tx.forEachPage(b.root, func(p *page, _ int, stack []pgid) {
if p.id > tx.meta.pgid {
ch <- fmt.Errorf("page %d: out of bounds: %d (stack: %v)", int(p.id), int(b.tx.meta.pgid), stack)
b.tx.forEachPage(b.RootPage(), func(p *common.Page, _ int, stack []common.Pgid) {
if p.Id() > tx.meta.Pgid() {
ch <- fmt.Errorf("page %d: out of bounds: %d (stack: %v)", int(p.Id()), int(b.tx.meta.Pgid()), stack)
}
// Ensure each page is only referenced once.
for i := pgid(0); i <= pgid(p.overflow); i++ {
var id = p.id + i
for i := common.Pgid(0); i <= common.Pgid(p.Overflow()); i++ {
var id = p.Id() + i
if _, ok := reachable[id]; ok {
ch <- fmt.Errorf("page %d: multiple references (stack: %v)", int(id), stack)
}
@ -95,14 +97,14 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo
}
// We should only encounter un-freed leaf and branch pages.
if freed[p.id] {
ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
ch <- fmt.Errorf("page %d: invalid type: %s (stack: %v)", int(p.id), p.typ(), stack)
if freed[p.Id()] {
ch <- fmt.Errorf("page %d: reachable freed", int(p.Id()))
} else if (p.Flags()&common.BranchPageFlag) == 0 && (p.Flags()&common.LeafPageFlag) == 0 {
ch <- fmt.Errorf("page %d: invalid type: %s (stack: %v)", int(p.Id()), p.Typ(), stack)
}
})
tx.recursivelyCheckPages(b.root, kvStringer.KeyToString, ch)
tx.recursivelyCheckPages(b.RootPage(), kvStringer.KeyToString, ch)
// Check each bucket within this bucket.
_ = b.ForEachBucket(func(k []byte) error {
@ -117,7 +119,7 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo
// key order constraints:
// - keys on pages must be sorted
// - keys on children pages are between 2 consecutive keys on the parent's branch page).
func (tx *Tx) recursivelyCheckPages(pgId pgid, keyToString func([]byte) string, ch chan error) {
func (tx *Tx) recursivelyCheckPages(pgId common.Pgid, keyToString func([]byte) string, ch chan error) {
tx.recursivelyCheckPagesInternal(pgId, nil, nil, nil, keyToString, ch)
}
@ -127,36 +129,36 @@ func (tx *Tx) recursivelyCheckPages(pgId pgid, keyToString func([]byte) string,
// - Are in right ordering relationship to their parents.
// `pagesStack` is expected to contain IDs of pages from the tree root to `pgid` for the clean debugging message.
func (tx *Tx) recursivelyCheckPagesInternal(
pgId pgid, minKeyClosed, maxKeyOpen []byte, pagesStack []pgid,
pgId common.Pgid, minKeyClosed, maxKeyOpen []byte, pagesStack []common.Pgid,
keyToString func([]byte) string, ch chan error) (maxKeyInSubtree []byte) {
p := tx.page(pgId)
pagesStack = append(pagesStack, pgId)
switch {
case p.flags&branchPageFlag != 0:
case p.Flags()&common.BranchPageFlag != 0:
// For branch page we navigate ranges of all subpages.
runningMin := minKeyClosed
for i := range p.branchPageElements() {
elem := p.branchPageElement(uint16(i))
verifyKeyOrder(elem.pgid, "branch", i, elem.key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack)
for i := range p.BranchPageElements() {
elem := p.BranchPageElement(uint16(i))
verifyKeyOrder(elem.Pgid(), "branch", i, elem.Key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack)
maxKey := maxKeyOpen
if i < len(p.branchPageElements())-1 {
maxKey = p.branchPageElement(uint16(i + 1)).key()
if i < len(p.BranchPageElements())-1 {
maxKey = p.BranchPageElement(uint16(i + 1)).Key()
}
maxKeyInSubtree = tx.recursivelyCheckPagesInternal(elem.pgid, elem.key(), maxKey, pagesStack, keyToString, ch)
maxKeyInSubtree = tx.recursivelyCheckPagesInternal(elem.Pgid(), elem.Key(), maxKey, pagesStack, keyToString, ch)
runningMin = maxKeyInSubtree
}
return maxKeyInSubtree
case p.flags&leafPageFlag != 0:
case p.Flags()&common.LeafPageFlag != 0:
runningMin := minKeyClosed
for i := range p.leafPageElements() {
elem := p.leafPageElement(uint16(i))
verifyKeyOrder(pgId, "leaf", i, elem.key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack)
runningMin = elem.key()
for i := range p.LeafPageElements() {
elem := p.LeafPageElement(uint16(i))
verifyKeyOrder(pgId, "leaf", i, elem.Key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack)
runningMin = elem.Key()
}
if p.count > 0 {
return p.leafPageElement(p.count - 1).key()
if p.Count() > 0 {
return p.LeafPageElement(p.Count() - 1).Key()
}
default:
ch <- fmt.Errorf("unexpected page type for pgId:%d", pgId)
@ -168,7 +170,7 @@ func (tx *Tx) recursivelyCheckPagesInternal(
* verifyKeyOrder checks whether an entry with given #index on pgId (pageType: "branch|leaf") that has given "key",
* is within range determined by (previousKey..maxKeyOpen) and reports found violations to the channel (ch).
*/
func verifyKeyOrder(pgId pgid, pageType string, index int, key []byte, previousKey []byte, maxKeyOpen []byte, ch chan error, keyToString func([]byte) string, pagesStack []pgid) {
func verifyKeyOrder(pgId common.Pgid, pageType string, index int, key []byte, previousKey []byte, maxKeyOpen []byte, ch chan error, keyToString func([]byte) string, pagesStack []common.Pgid) {
if index == 0 && previousKey != nil && compareKeys(previousKey, key) > 0 {
ch <- fmt.Errorf("the first key[%d]=(hex)%s on %s page(%d) needs to be >= the key in the ancestor (%s). Stack: %v",
index, keyToString(key), pageType, pgId, keyToString(previousKey), pagesStack)

View File

@ -15,6 +15,7 @@ import (
bolt "go.etcd.io/bbolt"
"go.etcd.io/bbolt/internal/btesting"
"go.etcd.io/bbolt/internal/common"
)
// TestTx_Check_ReadOnly tests consistency checking on a ReadOnly database.
@ -84,7 +85,7 @@ func TestTx_Commit_ErrTxClosed(t *testing.T) {
t.Fatal(err)
}
if err := tx.Commit(); err != bolt.ErrTxClosed {
if err := tx.Commit(); err != common.ErrTxClosed {
t.Fatalf("unexpected error: %s", err)
}
}
@ -101,7 +102,7 @@ func TestTx_Rollback_ErrTxClosed(t *testing.T) {
if err := tx.Rollback(); err != nil {
t.Fatal(err)
}
if err := tx.Rollback(); err != bolt.ErrTxClosed {
if err := tx.Rollback(); err != common.ErrTxClosed {
t.Fatalf("unexpected error: %s", err)
}
}
@ -113,7 +114,7 @@ func TestTx_Commit_ErrTxNotWritable(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if err := tx.Commit(); err != bolt.ErrTxNotWritable {
if err := tx.Commit(); err != common.ErrTxNotWritable {
t.Fatal(err)
}
// Close the view transaction
@ -165,7 +166,7 @@ func TestTx_CreateBucket_ErrTxNotWritable(t *testing.T) {
db := btesting.MustCreateDB(t)
if err := db.View(func(tx *bolt.Tx) error {
_, err := tx.CreateBucket([]byte("foo"))
if err != bolt.ErrTxNotWritable {
if err != common.ErrTxNotWritable {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -185,7 +186,7 @@ func TestTx_CreateBucket_ErrTxClosed(t *testing.T) {
t.Fatal(err)
}
if _, err := tx.CreateBucket([]byte("foo")); err != bolt.ErrTxClosed {
if _, err := tx.CreateBucket([]byte("foo")); err != common.ErrTxClosed {
t.Fatalf("unexpected error: %s", err)
}
}
@ -293,11 +294,11 @@ func TestTx_CreateBucketIfNotExists(t *testing.T) {
func TestTx_CreateBucketIfNotExists_ErrBucketNameRequired(t *testing.T) {
db := btesting.MustCreateDB(t)
if err := db.Update(func(tx *bolt.Tx) error {
if _, err := tx.CreateBucketIfNotExists([]byte{}); err != bolt.ErrBucketNameRequired {
if _, err := tx.CreateBucketIfNotExists([]byte{}); err != common.ErrBucketNameRequired {
t.Fatalf("unexpected error: %s", err)
}
if _, err := tx.CreateBucketIfNotExists(nil); err != bolt.ErrBucketNameRequired {
if _, err := tx.CreateBucketIfNotExists(nil); err != common.ErrBucketNameRequired {
t.Fatalf("unexpected error: %s", err)
}
@ -323,7 +324,7 @@ func TestTx_CreateBucket_ErrBucketExists(t *testing.T) {
// Create the same bucket again.
if err := db.Update(func(tx *bolt.Tx) error {
if _, err := tx.CreateBucket([]byte("widgets")); err != bolt.ErrBucketExists {
if _, err := tx.CreateBucket([]byte("widgets")); err != common.ErrBucketExists {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -336,7 +337,7 @@ func TestTx_CreateBucket_ErrBucketExists(t *testing.T) {
func TestTx_CreateBucket_ErrBucketNameRequired(t *testing.T) {
db := btesting.MustCreateDB(t)
if err := db.Update(func(tx *bolt.Tx) error {
if _, err := tx.CreateBucket(nil); err != bolt.ErrBucketNameRequired {
if _, err := tx.CreateBucket(nil); err != common.ErrBucketNameRequired {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -401,7 +402,7 @@ func TestTx_DeleteBucket_ErrTxClosed(t *testing.T) {
if err := tx.Commit(); err != nil {
t.Fatal(err)
}
if err := tx.DeleteBucket([]byte("foo")); err != bolt.ErrTxClosed {
if err := tx.DeleteBucket([]byte("foo")); err != common.ErrTxClosed {
t.Fatalf("unexpected error: %s", err)
}
}
@ -410,7 +411,7 @@ func TestTx_DeleteBucket_ErrTxClosed(t *testing.T) {
func TestTx_DeleteBucket_ReadOnly(t *testing.T) {
db := btesting.MustCreateDB(t)
if err := db.View(func(tx *bolt.Tx) error {
if err := tx.DeleteBucket([]byte("foo")); err != bolt.ErrTxNotWritable {
if err := tx.DeleteBucket([]byte("foo")); err != common.ErrTxNotWritable {
t.Fatalf("unexpected error: %s", err)
}
return nil
@ -423,7 +424,7 @@ func TestTx_DeleteBucket_ReadOnly(t *testing.T) {
func TestTx_DeleteBucket_NotFound(t *testing.T) {
db := btesting.MustCreateDB(t)
if err := db.Update(func(tx *bolt.Tx) error {
if err := tx.DeleteBucket([]byte("widgets")); err != bolt.ErrBucketNotFound {
if err := tx.DeleteBucket([]byte("widgets")); err != common.ErrBucketNotFound {
t.Fatalf("unexpected error: %s", err)
}
return nil

View File

@ -1,39 +0,0 @@
package bbolt
import (
"reflect"
"unsafe"
)
func unsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer {
return unsafe.Pointer(uintptr(base) + offset)
}
func unsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer {
return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz)
}
func unsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte {
// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
//
// This memory is not allocated from C, but it is unmanaged by Go's
// garbage collector and should behave similarly, and the compiler
// should produce similar code. Note that this conversion allows a
// subslice to begin after the base address, with an optional offset,
// while the URL above does not cover this case and only slices from
// index 0. However, the wiki never says that the address must be to
// the beginning of a C allocation (or even that malloc was used at
// all), so this is believed to be correct.
return (*[maxAllocSize]byte)(unsafeAdd(base, offset))[i:j:j]
}
// unsafeSlice modifies the data, len, and cap of a slice variable pointed to by
// the slice parameter. This helper should be used over other direct
// manipulation of reflect.SliceHeader to prevent misuse, namely, converting
// from reflect.SliceHeader to a Go slice type.
func unsafeSlice(slice, data unsafe.Pointer, len int) {
s := (*reflect.SliceHeader)(slice)
s.Data = uintptr(data)
s.Cap = len
s.Len = len
}