From ea511567eb216de0ef8539eacbd56bed8d1aa2a7 Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Sat, 28 Jan 2023 14:37:24 +0800 Subject: [PATCH] refactor both bolt and guts_cli based on the common package Signed-off-by: Benjamin Wang --- allocate_test.go | 10 +- bolt_unix.go | 4 +- bolt_windows.go | 6 +- bucket.go | 237 +++++++++++++------------- bucket_test.go | 31 ++-- cmd/bbolt/main.go | 43 +++-- cmd/bbolt/page_command.go | 22 +-- cmd/bbolt/surgery_commands.go | 6 +- cmd/bbolt/surgery_commands_test.go | 8 +- cursor.go | 86 +++++----- cursor_test.go | 3 +- db.go | 260 +++++++++------------------- db_test.go | 15 +- db_whitebox_test.go | 4 +- errors.go | 78 --------- freelist.go | 173 +++++++++---------- freelist_hmap.go | 50 +++--- freelist_test.go | 142 ++++++++-------- internal/btesting/btesting.go | 7 +- internal/guts_cli/guts_cli.go | 265 +++-------------------------- internal/surgeon/surgeon.go | 5 +- internal/surgeon/xray.go | 21 +-- node.go | 120 ++++++------- node_test.go | 43 +++-- page.go | 214 ----------------------- page_test.go | 72 -------- tx.go | 141 ++++++++------- tx_check.go | 74 ++++---- tx_test.go | 25 +-- unsafe.go | 39 ----- 30 files changed, 750 insertions(+), 1454 deletions(-) delete mode 100644 errors.go delete mode 100644 page.go delete mode 100644 page_test.go delete mode 100644 unsafe.go diff --git a/allocate_test.go b/allocate_test.go index 94e9116..9f08be1 100644 --- a/allocate_test.go +++ b/allocate_test.go @@ -2,20 +2,22 @@ package bbolt import ( "testing" + + "go.etcd.io/bbolt/internal/common" ) func TestTx_allocatePageStats(t *testing.T) { f := newTestFreelist() - ids := []pgid{2, 3} + ids := []common.Pgid{2, 3} f.readIDs(ids) tx := &Tx{ db: &DB{ freelist: f, - pageSize: defaultPageSize, + pageSize: common.DefaultPageSize, }, - meta: &meta{}, - pages: make(map[pgid]*page), + meta: &common.Meta{}, + pages: make(map[common.Pgid]*common.Page), } txStats := tx.Stats() diff --git a/bolt_unix.go b/bolt_unix.go index 757ae4d..e901e56 100644 --- a/bolt_unix.go +++ b/bolt_unix.go @@ -10,6 +10,8 @@ import ( "unsafe" "golang.org/x/sys/unix" + + "go.etcd.io/bbolt/internal/common" ) // flock acquires an advisory lock on a file descriptor. @@ -36,7 +38,7 @@ func flock(db *DB, exclusive bool, timeout time.Duration) error { // If we timed out then return an error. if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout { - return ErrTimeout + return common.ErrTimeout } // Wait for a bit and try again. diff --git a/bolt_windows.go b/bolt_windows.go index e5dde27..1981c64 100644 --- a/bolt_windows.go +++ b/bolt_windows.go @@ -8,6 +8,8 @@ import ( "unsafe" "golang.org/x/sys/windows" + + "go.etcd.io/bbolt/internal/common" ) // fdatasync flushes written data to a file descriptor. @@ -42,7 +44,7 @@ func flock(db *DB, exclusive bool, timeout time.Duration) error { // If we timed oumercit then return an error. if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout { - return ErrTimeout + return common.ErrTimeout } // Wait for a bit and try again. @@ -93,7 +95,7 @@ func mmap(db *DB, sz int) error { } // Convert to a byte array. - db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr))) + db.data = (*[maxMapSize]byte)(unsafe.Pointer(addr)) db.datasz = sz return nil diff --git a/bucket.go b/bucket.go index 054467a..0950f77 100644 --- a/bucket.go +++ b/bucket.go @@ -4,6 +4,8 @@ import ( "bytes" "fmt" "unsafe" + + "go.etcd.io/bbolt/internal/common" ) const ( @@ -14,8 +16,6 @@ const ( MaxValueSize = (1 << 31) - 2 ) -const bucketHeaderSize = int(unsafe.Sizeof(bucket{})) - const ( minFillPercent = 0.1 maxFillPercent = 1.0 @@ -27,12 +27,12 @@ const DefaultFillPercent = 0.5 // Bucket represents a collection of key/value pairs inside the database. type Bucket struct { - *bucket - tx *Tx // the associated transaction - buckets map[string]*Bucket // subbucket cache - page *page // inline page reference - rootNode *node // materialized node for the root page. - nodes map[pgid]*node // node cache + *common.InBucket + tx *Tx // the associated transaction + buckets map[string]*Bucket // subbucket cache + page *common.Page // inline page reference + rootNode *node // materialized node for the root page. + nodes map[common.Pgid]*node // node cache // Sets the threshold for filling nodes when they split. By default, // the bucket will fill to 50% but it can be useful to increase this @@ -42,21 +42,12 @@ type Bucket struct { FillPercent float64 } -// bucket represents the on-file representation of a bucket. -// This is stored as the "value" of a bucket key. If the bucket is small enough, -// then its root page can be stored inline in the "value", after the bucket -// header. In the case of inline buckets, the "root" will be 0. -type bucket struct { - root pgid // page id of the bucket's root-level page - sequence uint64 // monotonically incrementing, used by NextSequence() -} - // newBucket returns a new bucket associated with a transaction. func newBucket(tx *Tx) Bucket { var b = Bucket{tx: tx, FillPercent: DefaultFillPercent} if tx.writable { b.buckets = make(map[string]*Bucket) - b.nodes = make(map[pgid]*node) + b.nodes = make(map[common.Pgid]*node) } return b } @@ -67,8 +58,8 @@ func (b *Bucket) Tx() *Tx { } // Root returns the root of the bucket. -func (b *Bucket) Root() pgid { - return b.root +func (b *Bucket) Root() common.Pgid { + return b.RootPage() } // Writable returns whether the bucket is writable. @@ -105,7 +96,7 @@ func (b *Bucket) Bucket(name []byte) *Bucket { k, v, flags := c.seek(name) // Return nil if the key doesn't exist or it is not a bucket. - if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 { + if !bytes.Equal(name, k) || (flags&common.BucketLeafFlag) == 0 { return nil } @@ -125,8 +116,8 @@ func (b *Bucket) openBucket(value []byte) *Bucket { // Unaligned access requires a copy to be made. const unalignedMask = unsafe.Alignof(struct { - bucket - page + common.InBucket + common.Page }{}) - 1 unaligned := uintptr(unsafe.Pointer(&value[0]))&unalignedMask != 0 if unaligned { @@ -136,15 +127,15 @@ func (b *Bucket) openBucket(value []byte) *Bucket { // If this is a writable transaction then we need to copy the bucket entry. // Read-only transactions can point directly at the mmap entry. if b.tx.writable && !unaligned { - child.bucket = &bucket{} - *child.bucket = *(*bucket)(unsafe.Pointer(&value[0])) + child.InBucket = &common.InBucket{} + *child.InBucket = *(*common.InBucket)(unsafe.Pointer(&value[0])) } else { - child.bucket = (*bucket)(unsafe.Pointer(&value[0])) + child.InBucket = (*common.InBucket)(unsafe.Pointer(&value[0])) } // Save a reference to the inline page if the bucket is inline. - if child.root == 0 { - child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize])) + if child.RootPage() == 0 { + child.page = (*common.Page)(unsafe.Pointer(&value[common.BucketHeaderSize])) } return &child @@ -155,11 +146,11 @@ func (b *Bucket) openBucket(value []byte) *Bucket { // The bucket instance is only valid for the lifetime of the transaction. func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { if b.tx.db == nil { - return nil, ErrTxClosed + return nil, common.ErrTxClosed } else if !b.tx.writable { - return nil, ErrTxNotWritable + return nil, common.ErrTxNotWritable } else if len(key) == 0 { - return nil, ErrBucketNameRequired + return nil, common.ErrBucketNameRequired } // Move cursor to correct position. @@ -168,15 +159,15 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { // Return an error if there is an existing key. if bytes.Equal(key, k) { - if (flags & bucketLeafFlag) != 0 { - return nil, ErrBucketExists + if (flags & common.BucketLeafFlag) != 0 { + return nil, common.ErrBucketExists } - return nil, ErrIncompatibleValue + return nil, common.ErrIncompatibleValue } // Create empty, inline bucket. var bucket = Bucket{ - bucket: &bucket{}, + InBucket: &common.InBucket{}, rootNode: &node{isLeaf: true}, FillPercent: DefaultFillPercent, } @@ -184,7 +175,7 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { // Insert into node. key = cloneBytes(key) - c.node().put(key, key, value, 0, bucketLeafFlag) + c.node().put(key, key, value, 0, common.BucketLeafFlag) // Since subbuckets are not allowed on inline buckets, we need to // dereference the inline page, if it exists. This will cause the bucket @@ -199,7 +190,7 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { // The bucket instance is only valid for the lifetime of the transaction. func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) { child, err := b.CreateBucket(key) - if err == ErrBucketExists { + if err == common.ErrBucketExists { return b.Bucket(key), nil } else if err != nil { return nil, err @@ -211,9 +202,9 @@ func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) { // Returns an error if the bucket does not exist, or if the key represents a non-bucket value. func (b *Bucket) DeleteBucket(key []byte) error { if b.tx.db == nil { - return ErrTxClosed + return common.ErrTxClosed } else if !b.Writable() { - return ErrTxNotWritable + return common.ErrTxNotWritable } // Move cursor to correct position. @@ -222,9 +213,9 @@ func (b *Bucket) DeleteBucket(key []byte) error { // Return an error if bucket doesn't exist or is not a bucket. if !bytes.Equal(key, k) { - return ErrBucketNotFound - } else if (flags & bucketLeafFlag) == 0 { - return ErrIncompatibleValue + return common.ErrBucketNotFound + } else if (flags & common.BucketLeafFlag) == 0 { + return common.ErrIncompatibleValue } // Recursively delete all child buckets. @@ -260,7 +251,7 @@ func (b *Bucket) Get(key []byte) []byte { k, v, flags := b.Cursor().seek(key) // Return nil if this is a bucket. - if (flags & bucketLeafFlag) != 0 { + if (flags & common.BucketLeafFlag) != 0 { return nil } @@ -277,15 +268,15 @@ func (b *Bucket) Get(key []byte) []byte { // Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large. func (b *Bucket) Put(key []byte, value []byte) error { if b.tx.db == nil { - return ErrTxClosed + return common.ErrTxClosed } else if !b.Writable() { - return ErrTxNotWritable + return common.ErrTxNotWritable } else if len(key) == 0 { - return ErrKeyRequired + return common.ErrKeyRequired } else if len(key) > MaxKeySize { - return ErrKeyTooLarge + return common.ErrKeyTooLarge } else if int64(len(value)) > MaxValueSize { - return ErrValueTooLarge + return common.ErrValueTooLarge } // Move cursor to correct position. @@ -293,8 +284,8 @@ func (b *Bucket) Put(key []byte, value []byte) error { k, _, flags := c.seek(key) // Return an error if there is an existing key with a bucket value. - if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 { - return ErrIncompatibleValue + if bytes.Equal(key, k) && (flags&common.BucketLeafFlag) != 0 { + return common.ErrIncompatibleValue } // Insert into node. @@ -309,9 +300,9 @@ func (b *Bucket) Put(key []byte, value []byte) error { // Returns an error if the bucket was created from a read-only transaction. func (b *Bucket) Delete(key []byte) error { if b.tx.db == nil { - return ErrTxClosed + return common.ErrTxClosed } else if !b.Writable() { - return ErrTxNotWritable + return common.ErrTxNotWritable } // Move cursor to correct position. @@ -324,8 +315,8 @@ func (b *Bucket) Delete(key []byte) error { } // Return an error if there is already existing bucket value. - if (flags & bucketLeafFlag) != 0 { - return ErrIncompatibleValue + if (flags & common.BucketLeafFlag) != 0 { + return common.ErrIncompatibleValue } // Delete the node if we have a matching key. @@ -335,44 +326,46 @@ func (b *Bucket) Delete(key []byte) error { } // Sequence returns the current integer for the bucket without incrementing it. -func (b *Bucket) Sequence() uint64 { return b.bucket.sequence } +func (b *Bucket) Sequence() uint64 { + return b.InSequence() +} // SetSequence updates the sequence number for the bucket. func (b *Bucket) SetSequence(v uint64) error { if b.tx.db == nil { - return ErrTxClosed + return common.ErrTxClosed } else if !b.Writable() { - return ErrTxNotWritable + return common.ErrTxNotWritable } // Materialize the root node if it hasn't been already so that the // bucket will be saved during commit. if b.rootNode == nil { - _ = b.node(b.root, nil) + _ = b.node(b.RootPage(), nil) } // Set the sequence. - b.bucket.sequence = v + b.SetInSequence(v) return nil } // NextSequence returns an autoincrementing integer for the bucket. func (b *Bucket) NextSequence() (uint64, error) { if b.tx.db == nil { - return 0, ErrTxClosed + return 0, common.ErrTxClosed } else if !b.Writable() { - return 0, ErrTxNotWritable + return 0, common.ErrTxNotWritable } // Materialize the root node if it hasn't been already so that the // bucket will be saved during commit. if b.rootNode == nil { - _ = b.node(b.root, nil) + _ = b.node(b.RootPage(), nil) } // Increment and return the sequence. - b.bucket.sequence++ - return b.bucket.sequence, nil + b.IncSequence() + return b.Sequence(), nil } // ForEach executes a function for each key/value pair in a bucket. @@ -382,7 +375,7 @@ func (b *Bucket) NextSequence() (uint64, error) { // the bucket; this will result in undefined behavior. func (b *Bucket) ForEach(fn func(k, v []byte) error) error { if b.tx.db == nil { - return ErrTxClosed + return common.ErrTxClosed } c := b.Cursor() for k, v := c.First(); k != nil; k, v = c.Next() { @@ -395,11 +388,11 @@ func (b *Bucket) ForEach(fn func(k, v []byte) error) error { func (b *Bucket) ForEachBucket(fn func(k []byte) error) error { if b.tx.db == nil { - return ErrTxClosed + return common.ErrTxClosed } c := b.Cursor() for k, _, flags := c.first(); k != nil; k, _, flags = c.next() { - if flags&bucketLeafFlag != 0 { + if flags&common.BucketLeafFlag != 0 { if err := fn(k); err != nil { return err } @@ -413,64 +406,64 @@ func (b *Bucket) Stats() BucketStats { var s, subStats BucketStats pageSize := b.tx.db.pageSize s.BucketN += 1 - if b.root == 0 { + if b.RootPage() == 0 { s.InlineBucketN += 1 } - b.forEachPage(func(p *page, depth int, pgstack []pgid) { - if (p.flags & leafPageFlag) != 0 { - s.KeyN += int(p.count) + b.forEachPage(func(p *common.Page, depth int, pgstack []common.Pgid) { + if (p.Flags() & common.LeafPageFlag) != 0 { + s.KeyN += int(p.Count()) // used totals the used bytes for the page - used := pageHeaderSize + used := common.PageHeaderSize - if p.count != 0 { + if p.Count() != 0 { // If page has any elements, add all element headers. - used += leafPageElementSize * uintptr(p.count-1) + used += common.LeafPageElementSize * uintptr(p.Count()-1) // Add all element key, value sizes. // The computation takes advantage of the fact that the position // of the last element's key/value equals to the total of the sizes // of all previous elements' keys and values. // It also includes the last element's header. - lastElement := p.leafPageElement(p.count - 1) - used += uintptr(lastElement.pos + lastElement.ksize + lastElement.vsize) + lastElement := p.LeafPageElement(p.Count() - 1) + used += uintptr(lastElement.Pos() + lastElement.Ksize() + lastElement.Vsize()) } - if b.root == 0 { + if b.RootPage() == 0 { // For inlined bucket just update the inline stats s.InlineBucketInuse += int(used) } else { // For non-inlined bucket update all the leaf stats s.LeafPageN++ s.LeafInuse += int(used) - s.LeafOverflowN += int(p.overflow) + s.LeafOverflowN += int(p.Overflow()) // Collect stats from sub-buckets. // Do that by iterating over all element headers // looking for the ones with the bucketLeafFlag. - for i := uint16(0); i < p.count; i++ { - e := p.leafPageElement(i) - if (e.flags & bucketLeafFlag) != 0 { + for i := uint16(0); i < p.Count(); i++ { + e := p.LeafPageElement(i) + if (e.Flags() & common.BucketLeafFlag) != 0 { // For any bucket element, open the element value // and recursively call Stats on the contained bucket. - subStats.Add(b.openBucket(e.value()).Stats()) + subStats.Add(b.openBucket(e.Value()).Stats()) } } } - } else if (p.flags & branchPageFlag) != 0 { + } else if (p.Flags() & common.BranchPageFlag) != 0 { s.BranchPageN++ - lastElement := p.branchPageElement(p.count - 1) + lastElement := p.BranchPageElement(p.Count() - 1) // used totals the used bytes for the page // Add header and all element headers. - used := pageHeaderSize + (branchPageElementSize * uintptr(p.count-1)) + used := common.PageHeaderSize + (common.BranchPageElementSize * uintptr(p.Count()-1)) // Add size of all keys and values. // Again, use the fact that last element's position equals to // the total of key, value sizes of all previous elements. - used += uintptr(lastElement.pos + lastElement.ksize) + used += uintptr(lastElement.Pos() + lastElement.Ksize()) s.BranchInuse += int(used) - s.BranchOverflowN += int(p.overflow) + s.BranchOverflowN += int(p.Overflow()) } // Keep track of maximum page depth. @@ -491,29 +484,29 @@ func (b *Bucket) Stats() BucketStats { } // forEachPage iterates over every page in a bucket, including inline pages. -func (b *Bucket) forEachPage(fn func(*page, int, []pgid)) { +func (b *Bucket) forEachPage(fn func(*common.Page, int, []common.Pgid)) { // If we have an inline page then just use that. if b.page != nil { - fn(b.page, 0, []pgid{b.root}) + fn(b.page, 0, []common.Pgid{b.RootPage()}) return } // Otherwise traverse the page hierarchy. - b.tx.forEachPage(b.root, fn) + b.tx.forEachPage(b.RootPage(), fn) } // forEachPageNode iterates over every page (or node) in a bucket. // This also includes inline pages. -func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) { +func (b *Bucket) forEachPageNode(fn func(*common.Page, *node, int)) { // If we have an inline page or root node then just use that. if b.page != nil { fn(b.page, nil, 0) return } - b._forEachPageNode(b.root, 0, fn) + b._forEachPageNode(b.RootPage(), 0, fn) } -func (b *Bucket) _forEachPageNode(pgId pgid, depth int, fn func(*page, *node, int)) { +func (b *Bucket) _forEachPageNode(pgId common.Pgid, depth int, fn func(*common.Page, *node, int)) { var p, n = b.pageNode(pgId) // Execute function. @@ -521,10 +514,10 @@ func (b *Bucket) _forEachPageNode(pgId pgid, depth int, fn func(*page, *node, in // Recursively loop over children. if p != nil { - if (p.flags & branchPageFlag) != 0 { - for i := 0; i < int(p.count); i++ { - elem := p.branchPageElement(uint16(i)) - b._forEachPageNode(elem.pgid, depth+1, fn) + if (p.Flags() & common.BranchPageFlag) != 0 { + for i := 0; i < int(p.Count()); i++ { + elem := p.BranchPageElement(uint16(i)) + b._forEachPageNode(elem.Pgid(), depth+1, fn) } } } else { @@ -553,9 +546,9 @@ func (b *Bucket) spill() error { } // Update the child bucket header in this bucket. - value = make([]byte, unsafe.Sizeof(bucket{})) - var bucket = (*bucket)(unsafe.Pointer(&value[0])) - *bucket = *child.bucket + value = make([]byte, unsafe.Sizeof(common.InBucket{})) + var bucket = (*common.InBucket)(unsafe.Pointer(&value[0])) + *bucket = *child.InBucket } // Skip writing the bucket if there are no materialized nodes. @@ -569,10 +562,10 @@ func (b *Bucket) spill() error { if !bytes.Equal([]byte(name), k) { panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k)) } - if flags&bucketLeafFlag == 0 { + if flags&common.BucketLeafFlag == 0 { panic(fmt.Sprintf("unexpected bucket header flag: %x", flags)) } - c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag) + c.node().put([]byte(name), []byte(name), value, 0, common.BucketLeafFlag) } // Ignore if there's not a materialized root node. @@ -587,16 +580,16 @@ func (b *Bucket) spill() error { b.rootNode = b.rootNode.root() // Update the root node for this bucket. - if b.rootNode.pgid >= b.tx.meta.pgid { - panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid)) + if b.rootNode.pgid >= b.tx.meta.Pgid() { + panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.Pgid())) } - b.root = b.rootNode.pgid + b.SetRootPage(b.rootNode.pgid) return nil } // inlineable returns true if a bucket is small enough to be written inline -// and if it contains no subbuckets. Otherwise returns false. +// and if it contains no subbuckets. Otherwise, returns false. func (b *Bucket) inlineable() bool { var n = b.rootNode @@ -607,11 +600,11 @@ func (b *Bucket) inlineable() bool { // Bucket is not inlineable if it contains subbuckets or if it goes beyond // our threshold for inline bucket size. - var size = pageHeaderSize + var size = common.PageHeaderSize for _, inode := range n.inodes { - size += leafPageElementSize + uintptr(len(inode.key)) + uintptr(len(inode.value)) + size += common.LeafPageElementSize + uintptr(len(inode.key)) + uintptr(len(inode.value)) - if inode.flags&bucketLeafFlag != 0 { + if inode.flags&common.BucketLeafFlag != 0 { return false } else if size > b.maxInlineBucketSize() { return false @@ -630,14 +623,14 @@ func (b *Bucket) maxInlineBucketSize() uintptr { func (b *Bucket) write() []byte { // Allocate the appropriate size. var n = b.rootNode - var value = make([]byte, bucketHeaderSize+n.size()) + var value = make([]byte, common.BucketHeaderSize+n.size()) // Write a bucket header. - var bucket = (*bucket)(unsafe.Pointer(&value[0])) - *bucket = *b.bucket + var bucket = (*common.InBucket)(unsafe.Pointer(&value[0])) + *bucket = *b.InBucket // Convert byte slice to a fake page and write the root node. - var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize])) + var p = (*common.Page)(unsafe.Pointer(&value[common.BucketHeaderSize])) n.write(p) return value @@ -654,8 +647,8 @@ func (b *Bucket) rebalance() { } // node creates a node from a page and associates it with a given parent. -func (b *Bucket) node(pgId pgid, parent *node) *node { - _assert(b.nodes != nil, "nodes map expected") +func (b *Bucket) node(pgId common.Pgid, parent *node) *node { + common.Assert(b.nodes != nil, "nodes map expected") // Retrieve node if it's already been created. if n := b.nodes[pgId]; n != nil { @@ -688,19 +681,19 @@ func (b *Bucket) node(pgId pgid, parent *node) *node { // free recursively frees all pages in the bucket. func (b *Bucket) free() { - if b.root == 0 { + if b.RootPage() == 0 { return } var tx = b.tx - b.forEachPageNode(func(p *page, n *node, _ int) { + b.forEachPageNode(func(p *common.Page, n *node, _ int) { if p != nil { - tx.db.freelist.free(tx.meta.txid, p) + tx.db.freelist.free(tx.meta.Txid(), p) } else { n.free() } }) - b.root = 0 + b.SetRootPage(0) } // dereference removes all references to the old mmap. @@ -715,11 +708,11 @@ func (b *Bucket) dereference() { } // pageNode returns the in-memory node, if it exists. -// Otherwise returns the underlying page. -func (b *Bucket) pageNode(id pgid) (*page, *node) { +// Otherwise, returns the underlying page. +func (b *Bucket) pageNode(id common.Pgid) (*common.Page, *node) { // Inline buckets have a fake page embedded in their value so treat them // differently. We'll return the rootNode (if available) or the fake page. - if b.root == 0 { + if b.RootPage() == 0 { if id != 0 { panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id)) } diff --git a/bucket_test.go b/bucket_test.go index 1370612..33ff149 100644 --- a/bucket_test.go +++ b/bucket_test.go @@ -18,6 +18,7 @@ import ( bolt "go.etcd.io/bbolt" "go.etcd.io/bbolt/internal/btesting" + "go.etcd.io/bbolt/internal/common" ) // Ensure that a bucket that gets a non-existent key returns nil. @@ -246,7 +247,7 @@ func TestBucket_Put_IncompatibleValue(t *testing.T) { if _, err := tx.Bucket([]byte("widgets")).CreateBucket([]byte("foo")); err != nil { t.Fatal(err) } - if err := b0.Put([]byte("foo"), []byte("bar")); err != bolt.ErrIncompatibleValue { + if err := b0.Put([]byte("foo"), []byte("bar")); err != common.ErrIncompatibleValue { t.Fatalf("unexpected error: %s", err) } return nil @@ -272,7 +273,7 @@ func TestBucket_Put_Closed(t *testing.T) { t.Fatal(err) } - if err := b.Put([]byte("foo"), []byte("bar")); err != bolt.ErrTxClosed { + if err := b.Put([]byte("foo"), []byte("bar")); err != common.ErrTxClosed { t.Fatalf("unexpected error: %s", err) } } @@ -292,7 +293,7 @@ func TestBucket_Put_ReadOnly(t *testing.T) { if err := db.View(func(tx *bolt.Tx) error { b := tx.Bucket([]byte("widgets")) - if err := b.Put([]byte("foo"), []byte("bar")); err != bolt.ErrTxNotWritable { + if err := b.Put([]byte("foo"), []byte("bar")); err != common.ErrTxNotWritable { t.Fatalf("unexpected error: %s", err) } return nil @@ -560,7 +561,7 @@ func TestBucket_Delete_Bucket(t *testing.T) { if _, err := b.CreateBucket([]byte("foo")); err != nil { t.Fatal(err) } - if err := b.Delete([]byte("foo")); err != bolt.ErrIncompatibleValue { + if err := b.Delete([]byte("foo")); err != common.ErrIncompatibleValue { t.Fatalf("unexpected error: %s", err) } return nil @@ -583,7 +584,7 @@ func TestBucket_Delete_ReadOnly(t *testing.T) { } if err := db.View(func(tx *bolt.Tx) error { - if err := tx.Bucket([]byte("widgets")).Delete([]byte("foo")); err != bolt.ErrTxNotWritable { + if err := tx.Bucket([]byte("widgets")).Delete([]byte("foo")); err != common.ErrTxNotWritable { t.Fatalf("unexpected error: %s", err) } return nil @@ -609,7 +610,7 @@ func TestBucket_Delete_Closed(t *testing.T) { if err := tx.Rollback(); err != nil { t.Fatal(err) } - if err := b.Delete([]byte("foo")); err != bolt.ErrTxClosed { + if err := b.Delete([]byte("foo")); err != common.ErrTxClosed { t.Fatalf("unexpected error: %s", err) } } @@ -780,7 +781,7 @@ func TestBucket_CreateBucket_IncompatibleValue(t *testing.T) { if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil { t.Fatal(err) } - if _, err := widgets.CreateBucket([]byte("foo")); err != bolt.ErrIncompatibleValue { + if _, err := widgets.CreateBucket([]byte("foo")); err != common.ErrIncompatibleValue { t.Fatalf("unexpected error: %s", err) } return nil @@ -801,7 +802,7 @@ func TestBucket_DeleteBucket_IncompatibleValue(t *testing.T) { if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil { t.Fatal(err) } - if err := tx.Bucket([]byte("widgets")).DeleteBucket([]byte("foo")); err != bolt.ErrIncompatibleValue { + if err := tx.Bucket([]byte("widgets")).DeleteBucket([]byte("foo")); err != common.ErrIncompatibleValue { t.Fatalf("unexpected error: %s", err) } return nil @@ -943,7 +944,7 @@ func TestBucket_NextSequence_ReadOnly(t *testing.T) { if err := db.View(func(tx *bolt.Tx) error { _, err := tx.Bucket([]byte("widgets")).NextSequence() - if err != bolt.ErrTxNotWritable { + if err != common.ErrTxNotWritable { t.Fatalf("unexpected error: %s", err) } return nil @@ -966,7 +967,7 @@ func TestBucket_NextSequence_Closed(t *testing.T) { if err := tx.Rollback(); err != nil { t.Fatal(err) } - if _, err := b.NextSequence(); err != bolt.ErrTxClosed { + if _, err := b.NextSequence(); err != common.ErrTxClosed { t.Fatal(err) } } @@ -1158,7 +1159,7 @@ func TestBucket_ForEach_Closed(t *testing.T) { t.Fatal(err) } - if err := b.ForEach(func(k, v []byte) error { return nil }); err != bolt.ErrTxClosed { + if err := b.ForEach(func(k, v []byte) error { return nil }); err != common.ErrTxClosed { t.Fatalf("unexpected error: %s", err) } } @@ -1172,10 +1173,10 @@ func TestBucket_Put_EmptyKey(t *testing.T) { if err != nil { t.Fatal(err) } - if err := b.Put([]byte(""), []byte("bar")); err != bolt.ErrKeyRequired { + if err := b.Put([]byte(""), []byte("bar")); err != common.ErrKeyRequired { t.Fatalf("unexpected error: %s", err) } - if err := b.Put(nil, []byte("bar")); err != bolt.ErrKeyRequired { + if err := b.Put(nil, []byte("bar")); err != common.ErrKeyRequired { t.Fatalf("unexpected error: %s", err) } return nil @@ -1192,7 +1193,7 @@ func TestBucket_Put_KeyTooLarge(t *testing.T) { if err != nil { t.Fatal(err) } - if err := b.Put(make([]byte, 32769), []byte("bar")); err != bolt.ErrKeyTooLarge { + if err := b.Put(make([]byte, 32769), []byte("bar")); err != common.ErrKeyTooLarge { t.Fatalf("unexpected error: %s", err) } return nil @@ -1215,7 +1216,7 @@ func TestBucket_Put_ValueTooLarge(t *testing.T) { if err != nil { t.Fatal(err) } - if err := b.Put([]byte("foo"), make([]byte, bolt.MaxValueSize+1)); err != bolt.ErrValueTooLarge { + if err := b.Put([]byte("foo"), make([]byte, bolt.MaxValueSize+1)); err != common.ErrValueTooLarge { t.Fatalf("unexpected error: %s", err) } return nil diff --git a/cmd/bbolt/main.go b/cmd/bbolt/main.go index 96661b6..a06dd08 100644 --- a/cmd/bbolt/main.go +++ b/cmd/bbolt/main.go @@ -18,11 +18,10 @@ import ( "time" "unicode" "unicode/utf8" - "unsafe" - - "go.etcd.io/bbolt/internal/guts_cli" bolt "go.etcd.io/bbolt" + "go.etcd.io/bbolt/internal/common" + "go.etcd.io/bbolt/internal/guts_cli" ) var ( @@ -52,12 +51,6 @@ var ( // ErrBucketRequired is returned when a bucket is not specified. ErrBucketRequired = errors.New("bucket required") - // ErrBucketNotFound is returned when a bucket is not found. - ErrBucketNotFound = errors.New("bucket not found") - - // ErrKeyRequired is returned when a key is not specified. - ErrKeyRequired = errors.New("key required") - // ErrKeyNotFound is returned when a key is not found. ErrKeyNotFound = errors.New("key not found") ) @@ -509,16 +502,16 @@ func (cmd *pageItemCommand) Run(args ...string) error { return nil } -// leafPageElement retrieves a leaf page element. -func (cmd *pageItemCommand) leafPageElement(pageBytes []byte, index uint16) (*guts_cli.LeafPageElement, error) { - p := (*guts_cli.Page)(unsafe.Pointer(&pageBytes[0])) +func (cmd *pageItemCommand) validateLeafPage(pageBytes []byte, index uint16) (*common.Page, error) { + p := common.LoadPage(pageBytes) if index >= p.Count() { - return nil, fmt.Errorf("leafPageElement: expected item index less than %d, but got %d.", p.Count(), index) + return nil, fmt.Errorf("leafPageElement: expected item index less than %d, but got %d", p.Count(), index) } - if p.Type() != "leaf" { - return nil, fmt.Errorf("leafPageElement: expected page type of 'leaf', but got '%s'", p.Type()) + if p.Typ() != "leaf" { + return nil, fmt.Errorf("leafPageElement: expected page type of 'leaf', but got '%s'", p.Typ()) } - return p.LeafPageElement(index), nil + + return p, nil } const FORMAT_MODES = "auto|ascii-encoded|hex|bytes|redacted" @@ -568,19 +561,21 @@ func writelnBytes(w io.Writer, b []byte, format string) error { // PrintLeafItemKey writes the bytes of a leaf element's key. func (cmd *pageItemCommand) PrintLeafItemKey(w io.Writer, pageBytes []byte, index uint16, format string) error { - e, err := cmd.leafPageElement(pageBytes, index) + p, err := cmd.validateLeafPage(pageBytes, index) if err != nil { return err } + e := p.LeafPageElement(index) return writelnBytes(w, e.Key(), format) } -// PrintLeafItemKey writes the bytes of a leaf element's value. +// PrintLeafItemValue writes the bytes of a leaf element's value. func (cmd *pageItemCommand) PrintLeafItemValue(w io.Writer, pageBytes []byte, index uint16, format string) error { - e, err := cmd.leafPageElement(pageBytes, index) + p, err := cmd.validateLeafPage(pageBytes, index) if err != nil { return err } + e := p.LeafPageElement(index) return writelnBytes(w, e.Value(), format) } @@ -931,12 +926,12 @@ func (cmd *keysCommand) Run(args ...string) error { // Find bucket. var lastbucket *bolt.Bucket = tx.Bucket([]byte(buckets[0])) if lastbucket == nil { - return ErrBucketNotFound + return common.ErrBucketNotFound } for _, bucket := range buckets[1:] { lastbucket = lastbucket.Bucket([]byte(bucket)) if lastbucket == nil { - return ErrBucketNotFound + return common.ErrBucketNotFound } } @@ -1007,7 +1002,7 @@ func (cmd *getCommand) Run(args ...string) error { } else if len(buckets) == 0 { return ErrBucketRequired } else if len(key) == 0 { - return ErrKeyRequired + return common.ErrKeyRequired } // Open database. @@ -1022,12 +1017,12 @@ func (cmd *getCommand) Run(args ...string) error { // Find bucket. var lastbucket *bolt.Bucket = tx.Bucket([]byte(buckets[0])) if lastbucket == nil { - return ErrBucketNotFound + return common.ErrBucketNotFound } for _, bucket := range buckets[1:] { lastbucket = lastbucket.Bucket([]byte(bucket)) if lastbucket == nil { - return ErrBucketNotFound + return common.ErrBucketNotFound } } diff --git a/cmd/bbolt/page_command.go b/cmd/bbolt/page_command.go index 6789ba5..c608d84 100644 --- a/cmd/bbolt/page_command.go +++ b/cmd/bbolt/page_command.go @@ -8,6 +8,7 @@ import ( "os" "strings" + "go.etcd.io/bbolt/internal/common" "go.etcd.io/bbolt/internal/guts_cli" ) @@ -113,12 +114,12 @@ func (cmd *pageCommand) printPage(path string, pageID uint64, formatValue string // Print basic page info. fmt.Fprintf(cmd.Stdout, "Page ID: %d\n", p.Id()) - fmt.Fprintf(cmd.Stdout, "Page Type: %s\n", p.Type()) + fmt.Fprintf(cmd.Stdout, "Page Type: %s\n", p.Typ()) fmt.Fprintf(cmd.Stdout, "Total Size: %d bytes\n", len(buf)) fmt.Fprintf(cmd.Stdout, "Overflow pages: %d\n", p.Overflow()) // Print type-specific data. - switch p.Type() { + switch p.Typ() { case "meta": err = cmd.PrintMeta(cmd.Stdout, buf) case "leaf": @@ -136,14 +137,14 @@ func (cmd *pageCommand) printPage(path string, pageID uint64, formatValue string // PrintMeta prints the data from the meta page. func (cmd *pageCommand) PrintMeta(w io.Writer, buf []byte) error { - m := guts_cli.LoadPageMeta(buf) + m := common.LoadPageMeta(buf) m.Print(w) return nil } // PrintLeaf prints the data for a leaf page. func (cmd *pageCommand) PrintLeaf(w io.Writer, buf []byte, formatValue string) error { - p := guts_cli.LoadPage(buf) + p := common.LoadPage(buf) // Print number of items. fmt.Fprintf(w, "Item Count: %d\n", p.Count()) @@ -182,7 +183,7 @@ func (cmd *pageCommand) PrintLeaf(w io.Writer, buf []byte, formatValue string) e // PrintBranch prints the data for a leaf page. func (cmd *pageCommand) PrintBranch(w io.Writer, buf []byte) error { - p := guts_cli.LoadPage(buf) + p := common.LoadPage(buf) // Print number of items. fmt.Fprintf(w, "Item Count: %d\n", p.Count()) @@ -200,7 +201,7 @@ func (cmd *pageCommand) PrintBranch(w io.Writer, buf []byte) error { k = fmt.Sprintf("%x", string(e.Key())) } - fmt.Fprintf(w, "%s: \n", k, e.PgId()) + fmt.Fprintf(w, "%s: \n", k, e.Pgid()) } fmt.Fprintf(w, "\n") return nil @@ -208,16 +209,17 @@ func (cmd *pageCommand) PrintBranch(w io.Writer, buf []byte) error { // PrintFreelist prints the data for a freelist page. func (cmd *pageCommand) PrintFreelist(w io.Writer, buf []byte) error { - p := guts_cli.LoadPage(buf) + p := common.LoadPage(buf) // Print number of items. - fmt.Fprintf(w, "Item Count: %d\n", p.FreelistPageCount()) + _, cnt := p.FreelistPageCount() + fmt.Fprintf(w, "Item Count: %d\n", cnt) fmt.Fprintf(w, "Overflow: %d\n", p.Overflow()) fmt.Fprintf(w, "\n") // Print each page in the freelist. - ids := p.FreelistPagePages() + ids := p.FreelistPageIds() for _, ids := range ids { fmt.Fprintf(w, "%d\n", ids) } @@ -244,7 +246,7 @@ func (cmd *pageCommand) PrintPage(w io.Writer, r io.ReaderAt, pageID int, pageSi for offset := 0; offset < pageSize; offset += bytesPerLineN { // Retrieve current 16-byte line. line := buf[offset : offset+bytesPerLineN] - isLastLine := (offset == (pageSize - bytesPerLineN)) + isLastLine := offset == (pageSize - bytesPerLineN) // If it's the same as the previous line then print a skip. if bytes.Equal(line, prev) && !isLastLine { diff --git a/cmd/bbolt/surgery_commands.go b/cmd/bbolt/surgery_commands.go index 9685d3a..ace121f 100644 --- a/cmd/bbolt/surgery_commands.go +++ b/cmd/bbolt/surgery_commands.go @@ -9,7 +9,7 @@ import ( "strconv" "strings" - "go.etcd.io/bbolt/internal/guts_cli" + "go.etcd.io/bbolt/internal/common" "go.etcd.io/bbolt/internal/surgeon" ) @@ -224,7 +224,7 @@ func (cmd *copyPageCommand) Run(args ...string) error { } // copy the page - if err := surgeon.CopyPage(cmd.dstPath, guts_cli.Pgid(srcPageId), guts_cli.Pgid(dstPageId)); err != nil { + if err := surgeon.CopyPage(cmd.dstPath, common.Pgid(srcPageId), common.Pgid(dstPageId)); err != nil { return fmt.Errorf("copyPageCommand failed: %w", err) } @@ -279,7 +279,7 @@ func (cmd *clearPageCommand) Run(args ...string) error { return err } - if err := surgeon.ClearPage(cmd.dstPath, guts_cli.Pgid(pageId)); err != nil { + if err := surgeon.ClearPage(cmd.dstPath, common.Pgid(pageId)); err != nil { return fmt.Errorf("clearPageCommand failed: %w", err) } diff --git a/cmd/bbolt/surgery_commands_test.go b/cmd/bbolt/surgery_commands_test.go index 9978368..8d96eb3 100644 --- a/cmd/bbolt/surgery_commands_test.go +++ b/cmd/bbolt/surgery_commands_test.go @@ -11,7 +11,7 @@ import ( bolt "go.etcd.io/bbolt" "go.etcd.io/bbolt/internal/btesting" - "go.etcd.io/bbolt/internal/guts_cli" + "go.etcd.io/bbolt/internal/common" ) func TestSurgery_RevertMetaPage(t *testing.T) { @@ -28,8 +28,8 @@ func TestSurgery_RevertMetaPage(t *testing.T) { // Read both meta0 and meta1 from srcFile srcBuf0 := readPage(t, srcPath, 0, pageSize) srcBuf1 := readPage(t, srcPath, 1, pageSize) - meta0Page := guts_cli.LoadPageMeta(srcBuf0) - meta1Page := guts_cli.LoadPageMeta(srcBuf1) + meta0Page := common.LoadPageMeta(srcBuf0) + meta1Page := common.LoadPageMeta(srcBuf1) // Get the non-active meta page nonActiveSrcBuf := srcBuf0 @@ -115,7 +115,7 @@ func TestSurgery_ClearPage(t *testing.T) { t.Log("Verify result") dstPageId3Data := readPage(t, dstPath, 3, pageSize) - p := guts_cli.LoadPage(dstPageId3Data) + p := common.LoadPage(dstPageId3Data) assert.Equal(t, uint16(0), p.Count()) assert.Equal(t, uint32(0), p.Overflow()) } diff --git a/cursor.go b/cursor.go index 5dafb0c..f08da54 100644 --- a/cursor.go +++ b/cursor.go @@ -4,6 +4,8 @@ import ( "bytes" "fmt" "sort" + + "go.etcd.io/bbolt/internal/common" ) // Cursor represents an iterator that can traverse over all key/value pairs in a bucket @@ -30,9 +32,9 @@ func (c *Cursor) Bucket() *Bucket { // If the bucket is empty then a nil key and value are returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) First() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") + common.Assert(c.bucket.tx.db != nil, "tx closed") k, v, flags := c.first() - if (flags & uint32(bucketLeafFlag)) != 0 { + if (flags & uint32(common.BucketLeafFlag)) != 0 { return k, nil } return k, v @@ -40,7 +42,7 @@ func (c *Cursor) First() (key []byte, value []byte) { func (c *Cursor) first() (key []byte, value []byte, flags uint32) { c.stack = c.stack[:0] - p, n := c.bucket.pageNode(c.bucket.root) + p, n := c.bucket.pageNode(c.bucket.RootPage()) c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) c.goToFirstElementOnTheStack() @@ -51,7 +53,7 @@ func (c *Cursor) first() (key []byte, value []byte, flags uint32) { } k, v, flags := c.keyValue() - if (flags & uint32(bucketLeafFlag)) != 0 { + if (flags & uint32(common.BucketLeafFlag)) != 0 { return k, nil, flags } return k, v, flags @@ -61,9 +63,9 @@ func (c *Cursor) first() (key []byte, value []byte, flags uint32) { // If the bucket is empty then a nil key and value are returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) Last() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") + common.Assert(c.bucket.tx.db != nil, "tx closed") c.stack = c.stack[:0] - p, n := c.bucket.pageNode(c.bucket.root) + p, n := c.bucket.pageNode(c.bucket.RootPage()) ref := elemRef{page: p, node: n} ref.index = ref.count() - 1 c.stack = append(c.stack, ref) @@ -80,7 +82,7 @@ func (c *Cursor) Last() (key []byte, value []byte) { } k, v, flags := c.keyValue() - if (flags & uint32(bucketLeafFlag)) != 0 { + if (flags & uint32(common.BucketLeafFlag)) != 0 { return k, nil } return k, v @@ -90,9 +92,9 @@ func (c *Cursor) Last() (key []byte, value []byte) { // If the cursor is at the end of the bucket then a nil key and value are returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) Next() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") + common.Assert(c.bucket.tx.db != nil, "tx closed") k, v, flags := c.next() - if (flags & uint32(bucketLeafFlag)) != 0 { + if (flags & uint32(common.BucketLeafFlag)) != 0 { return k, nil } return k, v @@ -102,9 +104,9 @@ func (c *Cursor) Next() (key []byte, value []byte) { // If the cursor is at the beginning of the bucket then a nil key and value are returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) Prev() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") + common.Assert(c.bucket.tx.db != nil, "tx closed") k, v, flags := c.prev() - if (flags & uint32(bucketLeafFlag)) != 0 { + if (flags & uint32(common.BucketLeafFlag)) != 0 { return k, nil } return k, v @@ -115,7 +117,7 @@ func (c *Cursor) Prev() (key []byte, value []byte) { // follow, a nil key is returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") + common.Assert(c.bucket.tx.db != nil, "tx closed") k, v, flags := c.seek(seek) @@ -126,7 +128,7 @@ func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) { if k == nil { return nil, nil - } else if (flags & uint32(bucketLeafFlag)) != 0 { + } else if (flags & uint32(common.BucketLeafFlag)) != 0 { return k, nil } return k, v @@ -136,15 +138,15 @@ func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) { // Delete fails if current key/value is a bucket or if the transaction is not writable. func (c *Cursor) Delete() error { if c.bucket.tx.db == nil { - return ErrTxClosed + return common.ErrTxClosed } else if !c.bucket.Writable() { - return ErrTxNotWritable + return common.ErrTxNotWritable } key, _, flags := c.keyValue() // Return an error if current value is a bucket. - if (flags & bucketLeafFlag) != 0 { - return ErrIncompatibleValue + if (flags & common.BucketLeafFlag) != 0 { + return common.ErrIncompatibleValue } c.node().del(key) @@ -156,7 +158,7 @@ func (c *Cursor) Delete() error { func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) { // Start from root page/node and traverse to correct page. c.stack = c.stack[:0] - c.search(seek, c.bucket.root) + c.search(seek, c.bucket.RootPage()) // If this is a bucket then return a nil value. return c.keyValue() @@ -172,11 +174,11 @@ func (c *Cursor) goToFirstElementOnTheStack() { } // Keep adding pages pointing to the first element to the stack. - var pgId pgid + var pgId common.Pgid if ref.node != nil { pgId = ref.node.inodes[ref.index].pgid } else { - pgId = ref.page.branchPageElement(uint16(ref.index)).pgid + pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid() } p, n := c.bucket.pageNode(pgId) c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) @@ -193,11 +195,11 @@ func (c *Cursor) last() { } // Keep adding pages pointing to the last element in the stack. - var pgId pgid + var pgId common.Pgid if ref.node != nil { pgId = ref.node.inodes[ref.index].pgid } else { - pgId = ref.page.branchPageElement(uint16(ref.index)).pgid + pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid() } p, n := c.bucket.pageNode(pgId) @@ -268,10 +270,10 @@ func (c *Cursor) prev() (key []byte, value []byte, flags uint32) { } // search recursively performs a binary search against a given page/node until it finds a given key. -func (c *Cursor) search(key []byte, pgId pgid) { +func (c *Cursor) search(key []byte, pgId common.Pgid) { p, n := c.bucket.pageNode(pgId) - if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 { - panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags)) + if p != nil && (p.Flags()&(common.BranchPageFlag|common.LeafPageFlag)) == 0 { + panic(fmt.Sprintf("invalid page type: %d: %x", p.Id(), p.Flags())) } e := elemRef{page: p, node: n} c.stack = append(c.stack, e) @@ -309,15 +311,15 @@ func (c *Cursor) searchNode(key []byte, n *node) { c.search(key, n.inodes[index].pgid) } -func (c *Cursor) searchPage(key []byte, p *page) { +func (c *Cursor) searchPage(key []byte, p *common.Page) { // Binary search for the correct range. - inodes := p.branchPageElements() + inodes := p.BranchPageElements() var exact bool - index := sort.Search(int(p.count), func(i int) bool { + index := sort.Search(int(p.Count()), func(i int) bool { // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now. // sort.Search() finds the lowest index where f() != -1 but we need the highest index. - ret := bytes.Compare(inodes[i].key(), key) + ret := bytes.Compare(inodes[i].Key(), key) if ret == 0 { exact = true } @@ -329,7 +331,7 @@ func (c *Cursor) searchPage(key []byte, p *page) { c.stack[len(c.stack)-1].index = index // Recursively search to the next page. - c.search(key, inodes[index].pgid) + c.search(key, inodes[index].Pgid()) } // nsearch searches the leaf node on the top of the stack for a key. @@ -347,9 +349,9 @@ func (c *Cursor) nsearch(key []byte) { } // If we have a page then search its leaf elements. - inodes := p.leafPageElements() - index := sort.Search(int(p.count), func(i int) bool { - return bytes.Compare(inodes[i].key(), key) != -1 + inodes := p.LeafPageElements() + index := sort.Search(int(p.Count()), func(i int) bool { + return bytes.Compare(inodes[i].Key(), key) != -1 }) e.index = index } @@ -370,13 +372,13 @@ func (c *Cursor) keyValue() ([]byte, []byte, uint32) { } // Or retrieve value from page. - elem := ref.page.leafPageElement(uint16(ref.index)) - return elem.key(), elem.value(), elem.flags + elem := ref.page.LeafPageElement(uint16(ref.index)) + return elem.Key(), elem.Value(), elem.Flags() } // node returns the node that the cursor is currently positioned on. func (c *Cursor) node() *node { - _assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack") + common.Assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack") // If the top of the stack is a leaf node then just return it. if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() { @@ -386,19 +388,19 @@ func (c *Cursor) node() *node { // Start from root and traverse down the hierarchy. var n = c.stack[0].node if n == nil { - n = c.bucket.node(c.stack[0].page.id, nil) + n = c.bucket.node(c.stack[0].page.Id(), nil) } for _, ref := range c.stack[:len(c.stack)-1] { - _assert(!n.isLeaf, "expected branch node") + common.Assert(!n.isLeaf, "expected branch node") n = n.childAt(ref.index) } - _assert(n.isLeaf, "expected leaf node") + common.Assert(n.isLeaf, "expected leaf node") return n } // elemRef represents a reference to an element on a given page/node. type elemRef struct { - page *page + page *common.Page node *node index int } @@ -408,7 +410,7 @@ func (r *elemRef) isLeaf() bool { if r.node != nil { return r.node.isLeaf } - return (r.page.flags & leafPageFlag) != 0 + return (r.page.Flags() & common.LeafPageFlag) != 0 } // count returns the number of inodes or page elements. @@ -416,5 +418,5 @@ func (r *elemRef) count() int { if r.node != nil { return len(r.node.inodes) } - return int(r.page.count) + return int(r.page.Count()) } diff --git a/cursor_test.go b/cursor_test.go index 8e112c1..8fff82e 100644 --- a/cursor_test.go +++ b/cursor_test.go @@ -13,6 +13,7 @@ import ( bolt "go.etcd.io/bbolt" "go.etcd.io/bbolt/internal/btesting" + "go.etcd.io/bbolt/internal/common" ) // Ensure that a cursor can return a reference to the bucket that created it. @@ -139,7 +140,7 @@ func TestCursor_Delete(t *testing.T) { } c.Seek([]byte("sub")) - if err := c.Delete(); err != bolt.ErrIncompatibleValue { + if err := c.Delete(); err != common.ErrIncompatibleValue { t.Fatalf("unexpected error: %s", err) } diff --git a/db.go b/db.go index 5f45d96..5e125d6 100644 --- a/db.go +++ b/db.go @@ -3,7 +3,6 @@ package bbolt import ( "errors" "fmt" - "hash/fnv" "io" "os" "runtime" @@ -11,48 +10,13 @@ import ( "sync" "time" "unsafe" + + "go.etcd.io/bbolt/internal/common" ) -// The largest step that can be taken when remapping the mmap. -const maxMmapStep = 1 << 30 // 1GB - -// The data file format version. -const version = 2 - -// Represents a marker value to indicate that a file is a Bolt DB. -const magic uint32 = 0xED0CDAED - -const pgidNoFreelist pgid = 0xffffffffffffffff - -// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when -// syncing changes to a file. This is required as some operating systems, -// such as OpenBSD, do not have a unified buffer cache (UBC) and writes -// must be synchronized using the msync(2) syscall. -const IgnoreNoSync = runtime.GOOS == "openbsd" - -// Default values if not set in a DB instance. -const ( - DefaultMaxBatchSize int = 1000 - DefaultMaxBatchDelay = 10 * time.Millisecond - DefaultAllocSize = 16 * 1024 * 1024 -) - -// default page size for db is set to the OS page size. -var defaultPageSize = os.Getpagesize() - // The time elapsed between consecutive file locking attempts. const flockRetryTimeout = 50 * time.Millisecond -// FreelistType is the type of the freelist backend -type FreelistType string - -const ( - // FreelistArrayType indicates backend freelist type is array - FreelistArrayType = FreelistType("array") - // FreelistMapType indicates backend freelist type is hashmap - FreelistMapType = FreelistType("hashmap") -) - // DB represents a collection of buckets persisted to a file on disk. // All data access is performed through transactions which can be obtained through the DB. // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. @@ -85,7 +49,7 @@ type DB struct { // The alternative one is using hashmap, it is faster in almost all circumstances // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe. // The default type is array - FreelistType FreelistType + FreelistType common.FreelistType // When true, skips the truncate call when growing the database. // Setting this to true is only safe on non-ext3/ext4 systems. @@ -141,8 +105,8 @@ type DB struct { data *[maxMapSize]byte datasz int filesz int // current on disk file size - meta0 *meta - meta1 *meta + meta0 *common.Meta + meta1 *common.Meta pageSize int opened bool rwtx *Tx @@ -206,9 +170,9 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { db.Mlock = options.Mlock // Set default values for later DB operations. - db.MaxBatchSize = DefaultMaxBatchSize - db.MaxBatchDelay = DefaultMaxBatchDelay - db.AllocSize = DefaultAllocSize + db.MaxBatchSize = common.DefaultMaxBatchSize + db.MaxBatchDelay = common.DefaultMaxBatchDelay + db.AllocSize = common.DefaultAllocSize flag := os.O_RDWR if options.ReadOnly { @@ -249,7 +213,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { if db.pageSize = options.PageSize; db.pageSize == 0 { // Set the default page size to the OS page size. - db.pageSize = defaultPageSize + db.pageSize = common.DefaultPageSize } // Initialize the database if it doesn't exist. @@ -269,7 +233,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { db.pageSize = pgSize } else { _ = db.close() - return nil, ErrInvalid + return nil, common.ErrInvalid } } @@ -347,7 +311,7 @@ func (db *DB) getPageSize() (int, error) { return db.pageSize, nil } - return 0, ErrInvalid + return 0, common.ErrInvalid } // getPageSizeFromFirstMeta reads the pageSize from the first meta page @@ -356,11 +320,11 @@ func (db *DB) getPageSizeFromFirstMeta() (int, bool, error) { var metaCanRead bool if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) { metaCanRead = true - if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil { - return int(m.pageSize), metaCanRead, nil + if m := db.pageInBuffer(buf[:], 0).Meta(); m.Validate() == nil { + return int(m.PageSize()), metaCanRead, nil } } - return 0, metaCanRead, ErrInvalid + return 0, metaCanRead, common.ErrInvalid } // getPageSizeFromSecondMeta reads the pageSize from the second meta page @@ -392,13 +356,13 @@ func (db *DB) getPageSizeFromSecondMeta() (int, bool, error) { bw, err := db.file.ReadAt(buf[:], pos) if (err == nil && bw == len(buf)) || (err == io.EOF && int64(bw) == (fileSize-pos)) { metaCanRead = true - if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil { - return int(m.pageSize), metaCanRead, nil + if m := db.pageInBuffer(buf[:], 0).Meta(); m.Validate() == nil { + return int(m.PageSize()), metaCanRead, nil } } } - return 0, metaCanRead, ErrInvalid + return 0, metaCanRead, common.ErrInvalid } // loadFreelist reads the freelist if it is synced, or reconstructs it @@ -412,14 +376,14 @@ func (db *DB) loadFreelist() { db.freelist.readIDs(db.freepages()) } else { // Read free list from freelist page. - db.freelist.read(db.page(db.meta().freelist)) + db.freelist.read(db.page(db.meta().Freelist())) } db.stats.FreePageN = db.freelist.free_count() }) } func (db *DB) hasSyncedFreelist() bool { - return db.meta().freelist != pgidNoFreelist + return db.meta().Freelist() != common.PgidNoFreelist } // mmap opens the underlying memory-mapped file and initializes the meta references. @@ -478,14 +442,14 @@ func (db *DB) mmap(minsz int) error { } // Save references to the meta pages. - db.meta0 = db.page(0).meta() - db.meta1 = db.page(1).meta() + db.meta0 = db.page(0).Meta() + db.meta1 = db.page(1).Meta() // Validate the meta pages. We only return an error if both meta pages fail // validation, since meta0 failing validation means that it wasn't saved // properly -- but we can recover using meta1. And vice-versa. - err0 := db.meta0.validate() - err1 := db.meta1.validate() + err0 := db.meta0.Validate() + err1 := db.meta1.Validate() if err0 != nil && err1 != nil { return err0 } @@ -533,8 +497,8 @@ func (db *DB) mmapSize(size int) (int, error) { // If larger than 1GB then grow by 1GB at a time. sz := int64(size) - if remainder := sz % int64(maxMmapStep); remainder > 0 { - sz += int64(maxMmapStep) - remainder + if remainder := sz % int64(common.MaxMmapStep); remainder > 0 { + sz += int64(common.MaxMmapStep) - remainder } // Ensure that the mmap size is a multiple of the page size. @@ -581,33 +545,33 @@ func (db *DB) init() error { // Create two meta pages on a buffer. buf := make([]byte, db.pageSize*4) for i := 0; i < 2; i++ { - p := db.pageInBuffer(buf, pgid(i)) - p.id = pgid(i) - p.flags = metaPageFlag + p := db.pageInBuffer(buf, common.Pgid(i)) + p.SetId(common.Pgid(i)) + p.SetFlags(common.MetaPageFlag) // Initialize the meta page. - m := p.meta() - m.magic = magic - m.version = version - m.pageSize = uint32(db.pageSize) - m.freelist = 2 - m.root = bucket{root: 3} - m.pgid = 4 - m.txid = txid(i) - m.checksum = m.sum64() + m := p.Meta() + m.SetMagic(common.Magic) + m.SetVersion(common.Version) + m.SetPageSize(uint32(db.pageSize)) + m.SetFreelist(2) + m.SetRootBucket(common.NewInBucket(3, 0)) + m.SetPgid(4) + m.SetTxid(common.Txid(i)) + m.SetChecksum(m.Sum64()) } // Write an empty freelist at page 3. - p := db.pageInBuffer(buf, pgid(2)) - p.id = pgid(2) - p.flags = freelistPageFlag - p.count = 0 + p := db.pageInBuffer(buf, common.Pgid(2)) + p.SetId(2) + p.SetFlags(common.FreelistPageFlag) + p.SetCount(0) // Write an empty leaf page at page 4. - p = db.pageInBuffer(buf, pgid(3)) - p.id = pgid(3) - p.flags = leafPageFlag - p.count = 0 + p = db.pageInBuffer(buf, common.Pgid(3)) + p.SetId(3) + p.SetFlags(common.LeafPageFlag) + p.SetCount(0) // Write the buffer to our data file. if _, err := db.ops.writeAt(buf, 0); err != nil { @@ -719,14 +683,14 @@ func (db *DB) beginTx() (*Tx, error) { if !db.opened { db.mmaplock.RUnlock() db.metalock.Unlock() - return nil, ErrDatabaseNotOpen + return nil, common.ErrDatabaseNotOpen } // Exit if the database is not correctly mapped. if db.data == nil { db.mmaplock.RUnlock() db.metalock.Unlock() - return nil, ErrInvalidMapping + return nil, common.ErrInvalidMapping } // Create a transaction associated with the database. @@ -752,7 +716,7 @@ func (db *DB) beginTx() (*Tx, error) { func (db *DB) beginRWTx() (*Tx, error) { // If the database was opened with Options.ReadOnly, return an error. if db.readOnly { - return nil, ErrDatabaseReadOnly + return nil, common.ErrDatabaseReadOnly } // Obtain writer lock. This is released by the transaction when it closes. @@ -767,13 +731,13 @@ func (db *DB) beginRWTx() (*Tx, error) { // Exit if the database is not open yet. if !db.opened { db.rwlock.Unlock() - return nil, ErrDatabaseNotOpen + return nil, common.ErrDatabaseNotOpen } // Exit if the database is not correctly mapped. if db.data == nil { db.rwlock.Unlock() - return nil, ErrInvalidMapping + return nil, common.ErrInvalidMapping } // Create a transaction associated with the database. @@ -788,19 +752,19 @@ func (db *DB) beginRWTx() (*Tx, error) { func (db *DB) freePages() { // Free all pending pages prior to earliest open transaction. sort.Sort(txsById(db.txs)) - minid := txid(0xFFFFFFFFFFFFFFFF) + minid := common.Txid(0xFFFFFFFFFFFFFFFF) if len(db.txs) > 0 { - minid = db.txs[0].meta.txid + minid = db.txs[0].meta.Txid() } if minid > 0 { db.freelist.release(minid - 1) } // Release unused txid extents. for _, t := range db.txs { - db.freelist.releaseRange(minid, t.meta.txid-1) - minid = t.meta.txid + 1 + db.freelist.releaseRange(minid, t.meta.Txid()-1) + minid = t.meta.Txid() + 1 } - db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF)) + db.freelist.releaseRange(minid, common.Txid(0xFFFFFFFFFFFFFFFF)) // Any page both allocated and freed in an extent is safe to release. } @@ -808,7 +772,7 @@ type txsById []*Tx func (t txsById) Len() int { return len(t) } func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] } -func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid } +func (t txsById) Less(i, j int) bool { return t[i].meta.Txid() < t[j].meta.Txid() } // removeTx removes a transaction from the database. func (db *DB) removeTx(tx *Tx) { @@ -1050,37 +1014,37 @@ func (db *DB) Stats() Stats { // This is for internal access to the raw data bytes from the C cursor, use // carefully, or not at all. func (db *DB) Info() *Info { - _assert(db.data != nil, "database file isn't correctly mapped") + common.Assert(db.data != nil, "database file isn't correctly mapped") return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize} } // page retrieves a page reference from the mmap based on the current page size. -func (db *DB) page(id pgid) *page { - pos := id * pgid(db.pageSize) - return (*page)(unsafe.Pointer(&db.data[pos])) +func (db *DB) page(id common.Pgid) *common.Page { + pos := id * common.Pgid(db.pageSize) + return (*common.Page)(unsafe.Pointer(&db.data[pos])) } // pageInBuffer retrieves a page reference from a given byte array based on the current page size. -func (db *DB) pageInBuffer(b []byte, id pgid) *page { - return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)])) +func (db *DB) pageInBuffer(b []byte, id common.Pgid) *common.Page { + return (*common.Page)(unsafe.Pointer(&b[id*common.Pgid(db.pageSize)])) } // meta retrieves the current meta page reference. -func (db *DB) meta() *meta { +func (db *DB) meta() *common.Meta { // We have to return the meta with the highest txid which doesn't fail // validation. Otherwise, we can cause errors when in fact the database is // in a consistent state. metaA is the one with the higher txid. metaA := db.meta0 metaB := db.meta1 - if db.meta1.txid > db.meta0.txid { + if db.meta1.Txid() > db.meta0.Txid() { metaA = db.meta1 metaB = db.meta0 } // Use higher meta page if valid. Otherwise, fallback to previous, if valid. - if err := metaA.validate(); err == nil { + if err := metaA.Validate(); err == nil { return metaA - } else if err := metaB.validate(); err == nil { + } else if err := metaB.Validate(); err == nil { return metaB } @@ -1090,7 +1054,7 @@ func (db *DB) meta() *meta { } // allocate returns a contiguous block of memory starting at a given page. -func (db *DB) allocate(txid txid, count int) (*page, error) { +func (db *DB) allocate(txid common.Txid, count int) (*common.Page, error) { // Allocate a temporary buffer for the page. var buf []byte if count == 1 { @@ -1098,17 +1062,18 @@ func (db *DB) allocate(txid txid, count int) (*page, error) { } else { buf = make([]byte, count*db.pageSize) } - p := (*page)(unsafe.Pointer(&buf[0])) - p.overflow = uint32(count - 1) + p := (*common.Page)(unsafe.Pointer(&buf[0])) + p.SetOverflow(uint32(count - 1)) // Use pages from the freelist if they are available. - if p.id = db.freelist.allocate(txid, count); p.id != 0 { + p.SetId(db.freelist.allocate(txid, count)) + if p.Id() != 0 { return p, nil } // Resize mmap() if we're at the end. - p.id = db.rwtx.meta.pgid - var minsz = int((p.id+pgid(count))+1) * db.pageSize + p.SetId(db.rwtx.meta.Pgid()) + var minsz = int((p.Id()+common.Pgid(count))+1) * db.pageSize if minsz >= db.datasz { if err := db.mmap(minsz); err != nil { return nil, fmt.Errorf("mmap allocate error: %s", err) @@ -1116,7 +1081,8 @@ func (db *DB) allocate(txid txid, count int) (*page, error) { } // Move the page id high water mark. - db.rwtx.meta.pgid += pgid(count) + curPgid := db.rwtx.meta.Pgid() + db.rwtx.meta.SetPgid(curPgid + common.Pgid(count)) return p, nil } @@ -1163,7 +1129,7 @@ func (db *DB) IsReadOnly() bool { return db.readOnly } -func (db *DB) freepages() []pgid { +func (db *DB) freepages() []common.Pgid { tx, err := db.beginTx() defer func() { err = tx.Rollback() @@ -1175,8 +1141,8 @@ func (db *DB) freepages() []pgid { panic("freepages: failed to open read only tx") } - reachable := make(map[pgid]*page) - nofreed := make(map[pgid]bool) + reachable := make(map[common.Pgid]*common.Page) + nofreed := make(map[common.Pgid]bool) ech := make(chan error) go func() { for e := range ech { @@ -1188,8 +1154,8 @@ func (db *DB) freepages() []pgid { // TODO: If check bucket reported any corruptions (ech) we shouldn't proceed to freeing the pages. - var fids []pgid - for i := pgid(2); i < db.meta().pgid; i++ { + var fids []common.Pgid + for i := common.Pgid(2); i < db.meta().Pgid(); i++ { if _, ok := reachable[i]; !ok { fids = append(fids, i) } @@ -1221,7 +1187,7 @@ type Options struct { // The alternative one is using hashmap, it is faster in almost all circumstances // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe. // The default type is array - FreelistType FreelistType + FreelistType common.FreelistType // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to // grab a shared lock (UNIX). @@ -1263,7 +1229,7 @@ type Options struct { var DefaultOptions = &Options{ Timeout: 0, NoGrowSync: false, - FreelistType: FreelistArrayType, + FreelistType: common.FreelistArrayType, } // Stats represents statistics about the database. @@ -1302,65 +1268,3 @@ type Info struct { Data uintptr PageSize int } - -type meta struct { - magic uint32 - version uint32 - pageSize uint32 - flags uint32 - root bucket - freelist pgid - pgid pgid - txid txid - checksum uint64 -} - -// validate checks the marker bytes and version of the meta page to ensure it matches this binary. -func (m *meta) validate() error { - if m.magic != magic { - return ErrInvalid - } else if m.version != version { - return ErrVersionMismatch - } else if m.checksum != m.sum64() { - return ErrChecksum - } - return nil -} - -// copy copies one meta object to another. -func (m *meta) copy(dest *meta) { - *dest = *m -} - -// write writes the meta onto a page. -func (m *meta) write(p *page) { - if m.root.root >= m.pgid { - panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid)) - } else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist { - // TODO: reject pgidNoFreeList if !NoFreelistSync - panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid)) - } - - // Page id is either going to be 0 or 1 which we can determine by the transaction ID. - p.id = pgid(m.txid % 2) - p.flags |= metaPageFlag - - // Calculate the checksum. - m.checksum = m.sum64() - - m.copy(p.meta()) -} - -// generates the checksum for the meta. -func (m *meta) sum64() uint64 { - var h = fnv.New64a() - _, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:]) - return h.Sum64() -} - -// _assert will panic with a given formatted message if the given condition is false. -func _assert(condition bool, msg string, v ...interface{}) { - if !condition { - panic(fmt.Sprintf("assertion failed: "+msg, v...)) - } -} diff --git a/db_test.go b/db_test.go index 9f1076f..db7c619 100644 --- a/db_test.go +++ b/db_test.go @@ -21,6 +21,7 @@ import ( bolt "go.etcd.io/bbolt" "go.etcd.io/bbolt/internal/btesting" + "go.etcd.io/bbolt/internal/common" ) // pageSize is the size of one page in the data file. @@ -136,7 +137,7 @@ func TestOpen_ErrInvalid(t *testing.T) { t.Fatal(err) } - if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrInvalid { + if _, err := bolt.Open(path, 0666, nil); err != common.ErrInvalid { t.Fatalf("unexpected error: %s", err) } } @@ -172,7 +173,7 @@ func TestOpen_ErrVersionMismatch(t *testing.T) { } // Reopen data file. - if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrVersionMismatch { + if _, err := bolt.Open(path, 0666, nil); err != common.ErrVersionMismatch { t.Fatalf("unexpected error: %s", err) } } @@ -208,7 +209,7 @@ func TestOpen_ErrChecksum(t *testing.T) { } // Reopen data file. - if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrChecksum { + if _, err := bolt.Open(path, 0666, nil); err != common.ErrChecksum { t.Fatalf("unexpected error: %s", err) } } @@ -552,7 +553,7 @@ func TestDB_Open_ReadOnly(t *testing.T) { } // Can't launch read-write transaction. - if _, err := readOnlyDB.Begin(true); err != bolt.ErrDatabaseReadOnly { + if _, err := readOnlyDB.Begin(true); err != common.ErrDatabaseReadOnly { t.Fatalf("unexpected error: %s", err) } @@ -641,7 +642,7 @@ func TestOpen_RecoverFreeList(t *testing.T) { // Ensure that a database cannot open a transaction when it's not open. func TestDB_Begin_ErrDatabaseNotOpen(t *testing.T) { var db bolt.DB - if _, err := db.Begin(false); err != bolt.ErrDatabaseNotOpen { + if _, err := db.Begin(false); err != common.ErrDatabaseNotOpen { t.Fatalf("unexpected error: %s", err) } } @@ -727,7 +728,7 @@ func TestDB_Concurrent_WriteTo(t *testing.T) { // Ensure that opening a transaction while the DB is closed returns an error. func TestDB_BeginRW_Closed(t *testing.T) { var db bolt.DB - if _, err := db.Begin(true); err != bolt.ErrDatabaseNotOpen { + if _, err := db.Begin(true); err != common.ErrDatabaseNotOpen { t.Fatalf("unexpected error: %s", err) } } @@ -828,7 +829,7 @@ func TestDB_Update_Closed(t *testing.T) { t.Fatal(err) } return nil - }); err != bolt.ErrDatabaseNotOpen { + }); err != common.ErrDatabaseNotOpen { t.Fatalf("unexpected error: %s", err) } } diff --git a/db_whitebox_test.go b/db_whitebox_test.go index eb95155..8b195f7 100644 --- a/db_whitebox_test.go +++ b/db_whitebox_test.go @@ -6,6 +6,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "go.etcd.io/bbolt/internal/common" ) func TestOpenWithPreLoadFreelist(t *testing.T) { @@ -76,7 +78,7 @@ func TestMethodPage(t *testing.T) { name: "readonly mode without preloading free pages", readonly: true, preLoadFreePage: false, - expectedError: ErrFreePagesNotLoaded, + expectedError: common.ErrFreePagesNotLoaded, }, } diff --git a/errors.go b/errors.go deleted file mode 100644 index f2c3b20..0000000 --- a/errors.go +++ /dev/null @@ -1,78 +0,0 @@ -package bbolt - -import "errors" - -// These errors can be returned when opening or calling methods on a DB. -var ( - // ErrDatabaseNotOpen is returned when a DB instance is accessed before it - // is opened or after it is closed. - ErrDatabaseNotOpen = errors.New("database not open") - - // ErrDatabaseOpen is returned when opening a database that is - // already open. - ErrDatabaseOpen = errors.New("database already open") - - // ErrInvalid is returned when both meta pages on a database are invalid. - // This typically occurs when a file is not a bolt database. - ErrInvalid = errors.New("invalid database") - - // ErrInvalidMapping is returned when the database file fails to get mapped. - ErrInvalidMapping = errors.New("database isn't correctly mapped") - - // ErrVersionMismatch is returned when the data file was created with a - // different version of Bolt. - ErrVersionMismatch = errors.New("version mismatch") - - // ErrChecksum is returned when either meta page checksum does not match. - ErrChecksum = errors.New("checksum error") - - // ErrTimeout is returned when a database cannot obtain an exclusive lock - // on the data file after the timeout passed to Open(). - ErrTimeout = errors.New("timeout") -) - -// These errors can occur when beginning or committing a Tx. -var ( - // ErrTxNotWritable is returned when performing a write operation on a - // read-only transaction. - ErrTxNotWritable = errors.New("tx not writable") - - // ErrTxClosed is returned when committing or rolling back a transaction - // that has already been committed or rolled back. - ErrTxClosed = errors.New("tx closed") - - // ErrDatabaseReadOnly is returned when a mutating transaction is started on a - // read-only database. - ErrDatabaseReadOnly = errors.New("database is in read-only mode") - - // ErrFreePagesNotLoaded is returned when a readonly transaction without - // preloading the free pages is trying to access the free pages. - ErrFreePagesNotLoaded = errors.New("free pages are not pre-loaded") -) - -// These errors can occur when putting or deleting a value or a bucket. -var ( - // ErrBucketNotFound is returned when trying to access a bucket that has - // not been created yet. - ErrBucketNotFound = errors.New("bucket not found") - - // ErrBucketExists is returned when creating a bucket that already exists. - ErrBucketExists = errors.New("bucket already exists") - - // ErrBucketNameRequired is returned when creating a bucket with a blank name. - ErrBucketNameRequired = errors.New("bucket name required") - - // ErrKeyRequired is returned when inserting a zero-length key. - ErrKeyRequired = errors.New("key required") - - // ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize. - ErrKeyTooLarge = errors.New("key too large") - - // ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize. - ErrValueTooLarge = errors.New("value too large") - - // ErrIncompatibleValue is returned when trying create or delete a bucket - // on an existing non-bucket key or when trying to create or delete a - // non-bucket key on an existing bucket key. - ErrIncompatibleValue = errors.New("incompatible value") -) diff --git a/freelist.go b/freelist.go index 50f2d0e..dfccc50 100644 --- a/freelist.go +++ b/freelist.go @@ -4,50 +4,52 @@ import ( "fmt" "sort" "unsafe" + + "go.etcd.io/bbolt/internal/common" ) // txPending holds a list of pgids and corresponding allocation txns // that are pending to be freed. type txPending struct { - ids []pgid - alloctx []txid // txids allocating the ids - lastReleaseBegin txid // beginning txid of last matching releaseRange + ids []common.Pgid + alloctx []common.Txid // txids allocating the ids + lastReleaseBegin common.Txid // beginning txid of last matching releaseRange } // pidSet holds the set of starting pgids which have the same span size -type pidSet map[pgid]struct{} +type pidSet map[common.Pgid]struct{} // freelist represents a list of all pages that are available for allocation. // It also tracks pages that have been freed but are still in use by open transactions. type freelist struct { - freelistType FreelistType // freelist type - ids []pgid // all free and available free page ids. - allocs map[pgid]txid // mapping of txid that allocated a pgid. - pending map[txid]*txPending // mapping of soon-to-be free page ids by tx. - cache map[pgid]struct{} // fast lookup of all free and pending page ids. - freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size - forwardMap map[pgid]uint64 // key is start pgid, value is its span size - backwardMap map[pgid]uint64 // key is end pgid, value is its span size - allocate func(txid txid, n int) pgid // the freelist allocate func - free_count func() int // the function which gives you free page number - mergeSpans func(ids pgids) // the mergeSpan func - getFreePageIDs func() []pgid // get free pgids func - readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist + freelistType common.FreelistType // freelist type + ids []common.Pgid // all free and available free page ids. + allocs map[common.Pgid]common.Txid // mapping of Txid that allocated a pgid. + pending map[common.Txid]*txPending // mapping of soon-to-be free page ids by tx. + cache map[common.Pgid]struct{} // fast lookup of all free and pending page ids. + freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size + forwardMap map[common.Pgid]uint64 // key is start pgid, value is its span size + backwardMap map[common.Pgid]uint64 // key is end pgid, value is its span size + allocate func(txid common.Txid, n int) common.Pgid // the freelist allocate func + free_count func() int // the function which gives you free page number + mergeSpans func(ids common.Pgids) // the mergeSpan func + getFreePageIDs func() []common.Pgid // get free pgids func + readIDs func(pgids []common.Pgid) // readIDs func reads list of pages and init the freelist } // newFreelist returns an empty, initialized freelist. -func newFreelist(freelistType FreelistType) *freelist { +func newFreelist(freelistType common.FreelistType) *freelist { f := &freelist{ freelistType: freelistType, - allocs: make(map[pgid]txid), - pending: make(map[txid]*txPending), - cache: make(map[pgid]struct{}), + allocs: make(map[common.Pgid]common.Txid), + pending: make(map[common.Txid]*txPending), + cache: make(map[common.Pgid]struct{}), freemaps: make(map[uint64]pidSet), - forwardMap: make(map[pgid]uint64), - backwardMap: make(map[pgid]uint64), + forwardMap: make(map[common.Pgid]uint64), + backwardMap: make(map[common.Pgid]uint64), } - if freelistType == FreelistMapType { + if freelistType == common.FreelistMapType { f.allocate = f.hashmapAllocate f.free_count = f.hashmapFreeCount f.mergeSpans = f.hashmapMergeSpans @@ -71,7 +73,7 @@ func (f *freelist) size() int { // The first element will be used to store the count. See freelist.write. n++ } - return int(pageHeaderSize) + (int(unsafe.Sizeof(pgid(0))) * n) + return int(common.PageHeaderSize) + (int(unsafe.Sizeof(common.Pgid(0))) * n) } // count returns count of pages on the freelist @@ -95,23 +97,23 @@ func (f *freelist) pending_count() int { // copyall copies a list of all free ids and all pending ids in one sorted list. // f.count returns the minimum length required for dst. -func (f *freelist) copyall(dst []pgid) { - m := make(pgids, 0, f.pending_count()) +func (f *freelist) copyall(dst []common.Pgid) { + m := make(common.Pgids, 0, f.pending_count()) for _, txp := range f.pending { m = append(m, txp.ids...) } sort.Sort(m) - mergepgids(dst, f.getFreePageIDs(), m) + common.Mergepgids(dst, f.getFreePageIDs(), m) } // arrayAllocate returns the starting page id of a contiguous list of pages of a given size. // If a contiguous block cannot be found then 0 is returned. -func (f *freelist) arrayAllocate(txid txid, n int) pgid { +func (f *freelist) arrayAllocate(txid common.Txid, n int) common.Pgid { if len(f.ids) == 0 { return 0 } - var initial, previd pgid + var initial, previd common.Pgid for i, id := range f.ids { if id <= 1 { panic(fmt.Sprintf("invalid page allocation: %d", id)) @@ -123,7 +125,7 @@ func (f *freelist) arrayAllocate(txid txid, n int) pgid { } // If we found a contiguous block then remove it and return it. - if (id-initial)+1 == pgid(n) { + if (id-initial)+1 == common.Pgid(n) { // If we're allocating off the beginning then take the fast path // and just adjust the existing slice. This will use extra memory // temporarily but the append() in free() will realloc the slice @@ -136,7 +138,7 @@ func (f *freelist) arrayAllocate(txid txid, n int) pgid { } // Remove from the free cache. - for i := pgid(0); i < pgid(n); i++ { + for i := common.Pgid(0); i < common.Pgid(n); i++ { delete(f.cache, initial+i) } f.allocs[initial] = txid @@ -150,9 +152,9 @@ func (f *freelist) arrayAllocate(txid txid, n int) pgid { // free releases a page and its overflow for a given transaction id. // If the page is already free then a panic will occur. -func (f *freelist) free(txid txid, p *page) { - if p.id <= 1 { - panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id)) +func (f *freelist) free(txid common.Txid, p *common.Page) { + if p.Id() <= 1 { + panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.Id())) } // Free page and all its overflow pages. @@ -161,15 +163,15 @@ func (f *freelist) free(txid txid, p *page) { txp = &txPending{} f.pending[txid] = txp } - allocTxid, ok := f.allocs[p.id] + allocTxid, ok := f.allocs[p.Id()] if ok { - delete(f.allocs, p.id) - } else if (p.flags & freelistPageFlag) != 0 { + delete(f.allocs, p.Id()) + } else if (p.Flags() & common.FreelistPageFlag) != 0 { // Freelist is always allocated by prior tx. allocTxid = txid - 1 } - for id := p.id; id <= p.id+pgid(p.overflow); id++ { + for id := p.Id(); id <= p.Id()+common.Pgid(p.Overflow()); id++ { // Verify that page is not already free. if _, ok := f.cache[id]; ok { panic(fmt.Sprintf("page %d already freed", id)) @@ -182,8 +184,8 @@ func (f *freelist) free(txid txid, p *page) { } // release moves all page ids for a transaction id (or older) to the freelist. -func (f *freelist) release(txid txid) { - m := make(pgids, 0) +func (f *freelist) release(txid common.Txid) { + m := make(common.Pgids, 0) for tid, txp := range f.pending { if tid <= txid { // Move transaction's pending pages to the available freelist. @@ -196,11 +198,11 @@ func (f *freelist) release(txid txid) { } // releaseRange moves pending pages allocated within an extent [begin,end] to the free list. -func (f *freelist) releaseRange(begin, end txid) { +func (f *freelist) releaseRange(begin, end common.Txid) { if begin > end { return } - var m pgids + var m common.Pgids for tid, txp := range f.pending { if tid < begin || tid > end { continue @@ -229,13 +231,13 @@ func (f *freelist) releaseRange(begin, end txid) { } // rollback removes the pages from a given pending tx. -func (f *freelist) rollback(txid txid) { +func (f *freelist) rollback(txid common.Txid) { // Remove page ids from cache. txp := f.pending[txid] if txp == nil { return } - var m pgids + var m common.Pgids for i, pgid := range txp.ids { delete(f.cache, pgid) tx := txp.alloctx[i] @@ -256,82 +258,69 @@ func (f *freelist) rollback(txid txid) { } // freed returns whether a given page is in the free list. -func (f *freelist) freed(pgId pgid) bool { +func (f *freelist) freed(pgId common.Pgid) bool { _, ok := f.cache[pgId] return ok } // read initializes the freelist from a freelist page. -func (f *freelist) read(p *page) { - if (p.flags & freelistPageFlag) == 0 { - panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ())) - } - // If the page.count is at the max uint16 value (64k) then it's considered - // an overflow and the size of the freelist is stored as the first element. - var idx, count = 0, int(p.count) - if count == 0xFFFF { - idx = 1 - c := *(*pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))) - count = int(c) - if count < 0 { - panic(fmt.Sprintf("leading element count %d overflows int", c)) - } +func (f *freelist) read(p *common.Page) { + if (p.Flags() & common.FreelistPageFlag) == 0 { + panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.Id(), p.Typ())) } + ids := p.FreelistPageIds() + // Copy the list of page ids from the freelist. - if count == 0 { + if len(ids) == 0 { f.ids = nil } else { - var ids []pgid - data := unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), unsafe.Sizeof(ids[0]), idx) - unsafeSlice(unsafe.Pointer(&ids), data, count) - // copy the ids, so we don't modify on the freelist page directly - idsCopy := make([]pgid, count) + idsCopy := make([]common.Pgid, len(ids)) copy(idsCopy, ids) // Make sure they're sorted. - sort.Sort(pgids(idsCopy)) + sort.Sort(common.Pgids(idsCopy)) f.readIDs(idsCopy) } } // arrayReadIDs initializes the freelist from a given list of ids. -func (f *freelist) arrayReadIDs(ids []pgid) { +func (f *freelist) arrayReadIDs(ids []common.Pgid) { f.ids = ids f.reindex() } -func (f *freelist) arrayGetFreePageIDs() []pgid { +func (f *freelist) arrayGetFreePageIDs() []common.Pgid { return f.ids } // write writes the page ids onto a freelist page. All free and pending ids are // saved to disk since in the event of a program crash, all pending ids will // become free. -func (f *freelist) write(p *page) error { +func (f *freelist) write(p *common.Page) error { // Combine the old free pgids and pgids waiting on an open transaction. // Update the header flag. - p.flags |= freelistPageFlag + p.FlagsXOR(common.FreelistPageFlag) // The page.count can only hold up to 64k elements so if we overflow that // number then we handle it by putting the size in the first element. l := f.count() if l == 0 { - p.count = uint16(l) + p.SetCount(uint16(l)) } else if l < 0xFFFF { - p.count = uint16(l) - var ids []pgid - data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) - unsafeSlice(unsafe.Pointer(&ids), data, l) + p.SetCount(uint16(l)) + var ids []common.Pgid + data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + common.UnsafeSlice(unsafe.Pointer(&ids), data, l) f.copyall(ids) } else { - p.count = 0xFFFF - var ids []pgid - data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) - unsafeSlice(unsafe.Pointer(&ids), data, l+1) - ids[0] = pgid(l) + p.SetCount(0xFFFF) + var ids []common.Pgid + data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + common.UnsafeSlice(unsafe.Pointer(&ids), data, l+1) + ids[0] = common.Pgid(l) f.copyall(ids[1:]) } @@ -339,11 +328,11 @@ func (f *freelist) write(p *page) error { } // reload reads the freelist from a page and filters out pending items. -func (f *freelist) reload(p *page) { +func (f *freelist) reload(p *common.Page) { f.read(p) // Build a cache of only pending pages. - pcache := make(map[pgid]bool) + pcache := make(map[common.Pgid]bool) for _, txp := range f.pending { for _, pendingID := range txp.ids { pcache[pendingID] = true @@ -352,7 +341,7 @@ func (f *freelist) reload(p *page) { // Check each page in the freelist and build a new available freelist // with any pages not in the pending lists. - var a []pgid + var a []common.Pgid for _, id := range f.getFreePageIDs() { if !pcache[id] { a = append(a, id) @@ -362,10 +351,10 @@ func (f *freelist) reload(p *page) { f.readIDs(a) } -// noSyncReload reads the freelist from pgids and filters out pending items. -func (f *freelist) noSyncReload(pgids []pgid) { +// noSyncReload reads the freelist from Pgids and filters out pending items. +func (f *freelist) noSyncReload(Pgids []common.Pgid) { // Build a cache of only pending pages. - pcache := make(map[pgid]bool) + pcache := make(map[common.Pgid]bool) for _, txp := range f.pending { for _, pendingID := range txp.ids { pcache[pendingID] = true @@ -374,8 +363,8 @@ func (f *freelist) noSyncReload(pgids []pgid) { // Check each page in the freelist and build a new available freelist // with any pages not in the pending lists. - var a []pgid - for _, id := range pgids { + var a []common.Pgid + for _, id := range Pgids { if !pcache[id] { a = append(a, id) } @@ -387,7 +376,7 @@ func (f *freelist) noSyncReload(pgids []pgid) { // reindex rebuilds the free cache based on available and pending free lists. func (f *freelist) reindex() { ids := f.getFreePageIDs() - f.cache = make(map[pgid]struct{}, len(ids)) + f.cache = make(map[common.Pgid]struct{}, len(ids)) for _, id := range ids { f.cache[id] = struct{}{} } @@ -399,7 +388,7 @@ func (f *freelist) reindex() { } // arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array -func (f *freelist) arrayMergeSpans(ids pgids) { +func (f *freelist) arrayMergeSpans(ids common.Pgids) { sort.Sort(ids) - f.ids = pgids(f.ids).merge(ids) + f.ids = common.Pgids(f.ids).Merge(ids) } diff --git a/freelist_hmap.go b/freelist_hmap.go index dbd67a1..6e01bc1 100644 --- a/freelist_hmap.go +++ b/freelist_hmap.go @@ -1,6 +1,10 @@ package bbolt -import "sort" +import ( + "sort" + + "go.etcd.io/bbolt/internal/common" +) // hashmapFreeCount returns count of free pages(hashmap version) func (f *freelist) hashmapFreeCount() int { @@ -13,7 +17,7 @@ func (f *freelist) hashmapFreeCount() int { } // hashmapAllocate serves the same purpose as arrayAllocate, but use hashmap as backend -func (f *freelist) hashmapAllocate(txid txid, n int) pgid { +func (f *freelist) hashmapAllocate(txid common.Txid, n int) common.Pgid { if n == 0 { return 0 } @@ -26,7 +30,7 @@ func (f *freelist) hashmapAllocate(txid txid, n int) pgid { f.allocs[pid] = txid - for i := pgid(0); i < pgid(n); i++ { + for i := common.Pgid(0); i < common.Pgid(n); i++ { delete(f.cache, pid+i) } return pid @@ -48,9 +52,9 @@ func (f *freelist) hashmapAllocate(txid txid, n int) pgid { remain := size - uint64(n) // add remain span - f.addSpan(pid+pgid(n), remain) + f.addSpan(pid+common.Pgid(n), remain) - for i := pgid(0); i < pgid(n); i++ { + for i := common.Pgid(0); i < common.Pgid(n); i++ { delete(f.cache, pid+i) } return pid @@ -61,7 +65,7 @@ func (f *freelist) hashmapAllocate(txid txid, n int) pgid { } // hashmapReadIDs reads pgids as input an initial the freelist(hashmap version) -func (f *freelist) hashmapReadIDs(pgids []pgid) { +func (f *freelist) hashmapReadIDs(pgids []common.Pgid) { f.init(pgids) // Rebuild the page cache. @@ -69,25 +73,25 @@ func (f *freelist) hashmapReadIDs(pgids []pgid) { } // hashmapGetFreePageIDs returns the sorted free page ids -func (f *freelist) hashmapGetFreePageIDs() []pgid { +func (f *freelist) hashmapGetFreePageIDs() []common.Pgid { count := f.free_count() if count == 0 { return nil } - m := make([]pgid, 0, count) + m := make([]common.Pgid, 0, count) for start, size := range f.forwardMap { for i := 0; i < int(size); i++ { - m = append(m, start+pgid(i)) + m = append(m, start+common.Pgid(i)) } } - sort.Sort(pgids(m)) + sort.Sort(common.Pgids(m)) return m } // hashmapMergeSpans try to merge list of pages(represented by pgids) with existing spans -func (f *freelist) hashmapMergeSpans(ids pgids) { +func (f *freelist) hashmapMergeSpans(ids common.Pgids) { for _, id := range ids { // try to see if we can merge and update f.mergeWithExistingSpan(id) @@ -95,7 +99,7 @@ func (f *freelist) hashmapMergeSpans(ids pgids) { } // mergeWithExistingSpan merges pid to the existing free spans, try to merge it backward and forward -func (f *freelist) mergeWithExistingSpan(pid pgid) { +func (f *freelist) mergeWithExistingSpan(pid common.Pgid) { prev := pid - 1 next := pid + 1 @@ -106,10 +110,10 @@ func (f *freelist) mergeWithExistingSpan(pid pgid) { if mergeWithPrev { //merge with previous span - start := prev + 1 - pgid(preSize) + start := prev + 1 - common.Pgid(preSize) f.delSpan(start, preSize) - newStart -= pgid(preSize) + newStart -= common.Pgid(preSize) newSize += preSize } @@ -122,19 +126,19 @@ func (f *freelist) mergeWithExistingSpan(pid pgid) { f.addSpan(newStart, newSize) } -func (f *freelist) addSpan(start pgid, size uint64) { - f.backwardMap[start-1+pgid(size)] = size +func (f *freelist) addSpan(start common.Pgid, size uint64) { + f.backwardMap[start-1+common.Pgid(size)] = size f.forwardMap[start] = size if _, ok := f.freemaps[size]; !ok { - f.freemaps[size] = make(map[pgid]struct{}) + f.freemaps[size] = make(map[common.Pgid]struct{}) } f.freemaps[size][start] = struct{}{} } -func (f *freelist) delSpan(start pgid, size uint64) { +func (f *freelist) delSpan(start common.Pgid, size uint64) { delete(f.forwardMap, start) - delete(f.backwardMap, start+pgid(size-1)) + delete(f.backwardMap, start+common.Pgid(size-1)) delete(f.freemaps[size], start) if len(f.freemaps[size]) == 0 { delete(f.freemaps, size) @@ -143,7 +147,7 @@ func (f *freelist) delSpan(start pgid, size uint64) { // initial from pgids using when use hashmap version // pgids must be sorted -func (f *freelist) init(pgids []pgid) { +func (f *freelist) init(pgids []common.Pgid) { if len(pgids) == 0 { return } @@ -151,13 +155,13 @@ func (f *freelist) init(pgids []pgid) { size := uint64(1) start := pgids[0] - if !sort.SliceIsSorted([]pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) { + if !sort.SliceIsSorted([]common.Pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) { panic("pgids not sorted") } f.freemaps = make(map[uint64]pidSet) - f.forwardMap = make(map[pgid]uint64) - f.backwardMap = make(map[pgid]uint64) + f.forwardMap = make(map[common.Pgid]uint64) + f.backwardMap = make(map[common.Pgid]uint64) for i := 1; i < len(pgids); i++ { // continuous page diff --git a/freelist_test.go b/freelist_test.go index 97656f4..1fffff2 100644 --- a/freelist_test.go +++ b/freelist_test.go @@ -7,6 +7,8 @@ import ( "sort" "testing" "unsafe" + + "go.etcd.io/bbolt/internal/common" ) // TestFreelistType is used as a env variable for test to indicate the backend type @@ -15,17 +17,17 @@ const TestFreelistType = "TEST_FREELIST_TYPE" // Ensure that a page is added to a transaction's freelist. func TestFreelist_free(t *testing.T) { f := newTestFreelist() - f.free(100, &page{id: 12}) - if !reflect.DeepEqual([]pgid{12}, f.pending[100].ids) { - t.Fatalf("exp=%v; got=%v", []pgid{12}, f.pending[100].ids) + f.free(100, common.NewPage(12, 0, 0, 0)) + if !reflect.DeepEqual([]common.Pgid{12}, f.pending[100].ids) { + t.Fatalf("exp=%v; got=%v", []common.Pgid{12}, f.pending[100].ids) } } // Ensure that a page and its overflow is added to a transaction's freelist. func TestFreelist_free_overflow(t *testing.T) { f := newTestFreelist() - f.free(100, &page{id: 12, overflow: 3}) - if exp := []pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100].ids) { + f.free(100, common.NewPage(12, 0, 0, 3)) + if exp := []common.Pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100].ids) { t.Fatalf("exp=%v; got=%v", exp, f.pending[100].ids) } } @@ -33,17 +35,17 @@ func TestFreelist_free_overflow(t *testing.T) { // Ensure that a transaction's free pages can be released. func TestFreelist_release(t *testing.T) { f := newTestFreelist() - f.free(100, &page{id: 12, overflow: 1}) - f.free(100, &page{id: 9}) - f.free(102, &page{id: 39}) + f.free(100, common.NewPage(12, 0, 0, 1)) + f.free(100, common.NewPage(9, 0, 0, 0)) + f.free(102, common.NewPage(39, 0, 0, 0)) f.release(100) f.release(101) - if exp := []pgid{9, 12, 13}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { + if exp := []common.Pgid{9, 12, 13}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs()) } f.release(102) - if exp := []pgid{9, 12, 13, 39}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { + if exp := []common.Pgid{9, 12, 13, 39}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs()) } } @@ -51,33 +53,33 @@ func TestFreelist_release(t *testing.T) { // Ensure that releaseRange handles boundary conditions correctly func TestFreelist_releaseRange(t *testing.T) { type testRange struct { - begin, end txid + begin, end common.Txid } type testPage struct { - id pgid + id common.Pgid n int - allocTxn txid - freeTxn txid + allocTxn common.Txid + freeTxn common.Txid } var releaseRangeTests = []struct { title string pagesIn []testPage releaseRanges []testRange - wantFree []pgid + wantFree []common.Pgid }{ { title: "Single pending in range", pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, releaseRanges: []testRange{{1, 300}}, - wantFree: []pgid{3}, + wantFree: []common.Pgid{3}, }, { title: "Single pending with minimum end range", pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, releaseRanges: []testRange{{1, 200}}, - wantFree: []pgid{3}, + wantFree: []common.Pgid{3}, }, { title: "Single pending outsize minimum end range", @@ -89,7 +91,7 @@ func TestFreelist_releaseRange(t *testing.T) { title: "Single pending with minimum begin range", pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}}, releaseRanges: []testRange{{100, 300}}, - wantFree: []pgid{3}, + wantFree: []common.Pgid{3}, }, { title: "Single pending outside minimum begin range", @@ -101,7 +103,7 @@ func TestFreelist_releaseRange(t *testing.T) { title: "Single pending in minimum range", pagesIn: []testPage{{id: 3, n: 1, allocTxn: 199, freeTxn: 200}}, releaseRanges: []testRange{{199, 200}}, - wantFree: []pgid{3}, + wantFree: []common.Pgid{3}, }, { title: "Single pending and read transaction at 199", @@ -146,16 +148,16 @@ func TestFreelist_releaseRange(t *testing.T) { {id: 9, n: 2, allocTxn: 175, freeTxn: 200}, }, releaseRanges: []testRange{{50, 149}, {151, 300}}, - wantFree: []pgid{4, 9, 10}, + wantFree: []common.Pgid{4, 9, 10}, }, } for _, c := range releaseRangeTests { f := newTestFreelist() - var ids []pgid + var ids []common.Pgid for _, p := range c.pagesIn { for i := uint64(0); i < uint64(p.n); i++ { - ids = append(ids, pgid(uint64(p.id)+i)) + ids = append(ids, common.Pgid(uint64(p.id)+i)) } } f.readIDs(ids) @@ -164,7 +166,7 @@ func TestFreelist_releaseRange(t *testing.T) { } for _, p := range c.pagesIn { - f.free(p.freeTxn, &page{id: p.id, overflow: uint32(p.n - 1)}) + f.free(p.freeTxn, common.NewPage(p.id, 0, 0, uint32(p.n-1))) } for _, r := range c.releaseRanges { @@ -179,11 +181,11 @@ func TestFreelist_releaseRange(t *testing.T) { func TestFreelistHashmap_allocate(t *testing.T) { f := newTestFreelist() - if f.freelistType != FreelistMapType { + if f.freelistType != common.FreelistMapType { t.Skip() } - ids := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} + ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} f.readIDs(ids) f.allocate(1, 3) @@ -209,10 +211,10 @@ func TestFreelistHashmap_allocate(t *testing.T) { // Ensure that a freelist can find contiguous blocks of pages. func TestFreelistArray_allocate(t *testing.T) { f := newTestFreelist() - if f.freelistType != FreelistArrayType { + if f.freelistType != common.FreelistArrayType { t.Skip() } - ids := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} + ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} f.readIDs(ids) if id := int(f.allocate(1, 3)); id != 3 { t.Fatalf("exp=3; got=%v", id) @@ -235,7 +237,7 @@ func TestFreelistArray_allocate(t *testing.T) { if id := int(f.allocate(1, 0)); id != 0 { t.Fatalf("exp=0; got=%v", id) } - if exp := []pgid{9, 18}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { + if exp := []common.Pgid{9, 18}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs()) } @@ -248,7 +250,7 @@ func TestFreelistArray_allocate(t *testing.T) { if id := int(f.allocate(1, 1)); id != 0 { t.Fatalf("exp=0; got=%v", id) } - if exp := []pgid{}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { + if exp := []common.Pgid{}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs()) } } @@ -257,12 +259,12 @@ func TestFreelistArray_allocate(t *testing.T) { func TestFreelist_read(t *testing.T) { // Create a page. var buf [4096]byte - page := (*page)(unsafe.Pointer(&buf[0])) - page.flags = freelistPageFlag - page.count = 2 + page := (*common.Page)(unsafe.Pointer(&buf[0])) + page.SetFlags(common.FreelistPageFlag) + page.SetCount(2) // Insert 2 page ids. - ids := (*[3]pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page))) + ids := (*[3]common.Pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page))) ids[0] = 23 ids[1] = 50 @@ -271,7 +273,7 @@ func TestFreelist_read(t *testing.T) { f.read(page) // Ensure that there are two page ids in the freelist. - if exp := []pgid{23, 50}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { + if exp := []common.Pgid{23, 50}; !reflect.DeepEqual(exp, f.getFreePageIDs()) { t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs()) } } @@ -282,10 +284,10 @@ func TestFreelist_write(t *testing.T) { var buf [4096]byte f := newTestFreelist() - f.readIDs([]pgid{12, 39}) - f.pending[100] = &txPending{ids: []pgid{28, 11}} - f.pending[101] = &txPending{ids: []pgid{3}} - p := (*page)(unsafe.Pointer(&buf[0])) + f.readIDs([]common.Pgid{12, 39}) + f.pending[100] = &txPending{ids: []common.Pgid{28, 11}} + f.pending[101] = &txPending{ids: []common.Pgid{3}} + p := (*common.Page)(unsafe.Pointer(&buf[0])) if err := f.write(p); err != nil { t.Fatal(err) } @@ -296,7 +298,7 @@ func TestFreelist_write(t *testing.T) { // Ensure that the freelist is correct. // All pages should be present and in reverse order. - if exp := []pgid{3, 11, 12, 28, 39}; !reflect.DeepEqual(exp, f2.getFreePageIDs()) { + if exp := []common.Pgid{3, 11, 12, 28, 39}; !reflect.DeepEqual(exp, f2.getFreePageIDs()) { t.Fatalf("exp=%v; got=%v", exp, f2.getFreePageIDs()) } } @@ -313,17 +315,17 @@ func benchmark_FreelistRelease(b *testing.B, size int) { for i := 0; i < b.N; i++ { txp := &txPending{ids: pending} f := newTestFreelist() - f.pending = map[txid]*txPending{1: txp} + f.pending = map[common.Txid]*txPending{1: txp} f.readIDs(ids) f.release(1) } } -func randomPgids(n int) []pgid { +func randomPgids(n int) []common.Pgid { rand.Seed(42) - pgids := make(pgids, n) + pgids := make(common.Pgids, n) for i := range pgids { - pgids[i] = pgid(rand.Int63()) + pgids[i] = common.Pgid(rand.Int63()) } sort.Sort(pgids) return pgids @@ -331,7 +333,7 @@ func randomPgids(n int) []pgid { func Test_freelist_ReadIDs_and_getFreePageIDs(t *testing.T) { f := newTestFreelist() - exp := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} + exp := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18} f.readIDs(exp) @@ -340,7 +342,7 @@ func Test_freelist_ReadIDs_and_getFreePageIDs(t *testing.T) { } f2 := newTestFreelist() - var exp2 []pgid + var exp2 []common.Pgid f2.readIDs(exp2) if got2 := f2.getFreePageIDs(); !reflect.DeepEqual(got2, exp2) { @@ -355,53 +357,53 @@ func Test_freelist_mergeWithExist(t *testing.T) { bm2 := pidSet{5: struct{}{}} tests := []struct { name string - ids []pgid - pgid pgid - want []pgid - wantForwardmap map[pgid]uint64 - wantBackwardmap map[pgid]uint64 + ids []common.Pgid + pgid common.Pgid + want []common.Pgid + wantForwardmap map[common.Pgid]uint64 + wantBackwardmap map[common.Pgid]uint64 wantfreemap map[uint64]pidSet }{ { name: "test1", - ids: []pgid{1, 2, 4, 5, 6}, + ids: []common.Pgid{1, 2, 4, 5, 6}, pgid: 3, - want: []pgid{1, 2, 3, 4, 5, 6}, - wantForwardmap: map[pgid]uint64{1: 6}, - wantBackwardmap: map[pgid]uint64{6: 6}, + want: []common.Pgid{1, 2, 3, 4, 5, 6}, + wantForwardmap: map[common.Pgid]uint64{1: 6}, + wantBackwardmap: map[common.Pgid]uint64{6: 6}, wantfreemap: map[uint64]pidSet{6: bm1}, }, { name: "test2", - ids: []pgid{1, 2, 5, 6}, + ids: []common.Pgid{1, 2, 5, 6}, pgid: 3, - want: []pgid{1, 2, 3, 5, 6}, - wantForwardmap: map[pgid]uint64{1: 3, 5: 2}, - wantBackwardmap: map[pgid]uint64{6: 2, 3: 3}, + want: []common.Pgid{1, 2, 3, 5, 6}, + wantForwardmap: map[common.Pgid]uint64{1: 3, 5: 2}, + wantBackwardmap: map[common.Pgid]uint64{6: 2, 3: 3}, wantfreemap: map[uint64]pidSet{3: bm1, 2: bm2}, }, { name: "test3", - ids: []pgid{1, 2}, + ids: []common.Pgid{1, 2}, pgid: 3, - want: []pgid{1, 2, 3}, - wantForwardmap: map[pgid]uint64{1: 3}, - wantBackwardmap: map[pgid]uint64{3: 3}, + want: []common.Pgid{1, 2, 3}, + wantForwardmap: map[common.Pgid]uint64{1: 3}, + wantBackwardmap: map[common.Pgid]uint64{3: 3}, wantfreemap: map[uint64]pidSet{3: bm1}, }, { name: "test4", - ids: []pgid{2, 3}, + ids: []common.Pgid{2, 3}, pgid: 1, - want: []pgid{1, 2, 3}, - wantForwardmap: map[pgid]uint64{1: 3}, - wantBackwardmap: map[pgid]uint64{3: 3}, + want: []common.Pgid{1, 2, 3}, + wantForwardmap: map[common.Pgid]uint64{1: 3}, + wantBackwardmap: map[common.Pgid]uint64{3: 3}, wantfreemap: map[uint64]pidSet{3: bm1}, }, } for _, tt := range tests { f := newTestFreelist() - if f.freelistType == FreelistArrayType { + if f.freelistType == common.FreelistArrayType { t.Skip() } f.readIDs(tt.ids) @@ -425,9 +427,9 @@ func Test_freelist_mergeWithExist(t *testing.T) { // newTestFreelist get the freelist type from env and initial the freelist func newTestFreelist() *freelist { - freelistType := FreelistArrayType - if env := os.Getenv(TestFreelistType); env == string(FreelistMapType) { - freelistType = FreelistMapType + freelistType := common.FreelistArrayType + if env := os.Getenv(TestFreelistType); env == string(common.FreelistMapType) { + freelistType = common.FreelistMapType } return newFreelist(freelistType) diff --git a/internal/btesting/btesting.go b/internal/btesting/btesting.go index b305072..b5b8145 100644 --- a/internal/btesting/btesting.go +++ b/internal/btesting/btesting.go @@ -12,6 +12,7 @@ import ( "github.com/stretchr/testify/require" bolt "go.etcd.io/bbolt" + "go.etcd.io/bbolt/internal/common" ) var statsFlag = flag.Bool("stats", false, "show performance stats") @@ -44,9 +45,9 @@ func MustOpenDBWithOption(t testing.TB, f string, o *bolt.Options) *DB { o = bolt.DefaultOptions } - freelistType := bolt.FreelistArrayType - if env := os.Getenv(TestFreelistType); env == string(bolt.FreelistMapType) { - freelistType = bolt.FreelistMapType + freelistType := common.FreelistArrayType + if env := os.Getenv(TestFreelistType); env == string(common.FreelistMapType) { + freelistType = common.FreelistMapType } o.FreelistType = freelistType diff --git a/internal/guts_cli/guts_cli.go b/internal/guts_cli/guts_cli.go index 30e5566..891ddb7 100644 --- a/internal/guts_cli/guts_cli.go +++ b/internal/guts_cli/guts_cli.go @@ -2,14 +2,13 @@ package guts_cli // Low level access to pages / data-structures of the bbolt file. -// TODO(ptab): Merge with bbolt/page file that should get ported to internal. - import ( "errors" "fmt" "io" "os" - "unsafe" + + "go.etcd.io/bbolt/internal/common" ) var ( @@ -17,231 +16,9 @@ var ( ErrCorrupt = errors.New("invalid value") ) -// PageHeaderSize represents the size of the bolt.Page header. -const PageHeaderSize = 16 - -// Represents a marker value to indicate that a file (Meta Page) is a Bolt DB. -const magic uint32 = 0xED0CDAED - -// DO NOT EDIT. Copied from the "bolt" package. -const maxAllocSize = 0xFFFFFFF - -// DO NOT EDIT. Copied from the "bolt" package. -const ( - branchPageFlag = 0x01 - leafPageFlag = 0x02 - metaPageFlag = 0x04 - freelistPageFlag = 0x10 -) - -// DO NOT EDIT. Copied from the "bolt" package. -const bucketLeafFlag = 0x01 - -// DO NOT EDIT. Copied from the "bolt" package. -type Pgid uint64 - -// DO NOT EDIT. Copied from the "bolt" package. -type txid uint64 - -// DO NOT EDIT. Copied from the "bolt" package. -type Meta struct { - magic uint32 - version uint32 - pageSize uint32 - flags uint32 - root Bucket - freelist Pgid - pgid Pgid // High Water Mark (id of next added Page if the file growths) - txid txid - checksum uint64 -} - -func LoadPageMeta(buf []byte) *Meta { - return (*Meta)(unsafe.Pointer(&buf[PageHeaderSize])) -} - -func (m *Meta) RootBucket() *Bucket { - return &m.root -} - -func (m *Meta) Txid() uint64 { - return uint64(m.txid) -} - -func (m *Meta) Print(w io.Writer) { - fmt.Fprintf(w, "Version: %d\n", m.version) - fmt.Fprintf(w, "Page Size: %d bytes\n", m.pageSize) - fmt.Fprintf(w, "Flags: %08x\n", m.flags) - fmt.Fprintf(w, "Root: \n", m.root.root) - fmt.Fprintf(w, "Freelist: \n", m.freelist) - fmt.Fprintf(w, "HWM: \n", m.pgid) - fmt.Fprintf(w, "Txn ID: %d\n", m.txid) - fmt.Fprintf(w, "Checksum: %016x\n", m.checksum) - fmt.Fprintf(w, "\n") -} - -// DO NOT EDIT. Copied from the "bolt" package. -type Bucket struct { - root Pgid - sequence uint64 -} - -const bucketHeaderSize = int(unsafe.Sizeof(Bucket{})) - -func LoadBucket(buf []byte) *Bucket { - return (*Bucket)(unsafe.Pointer(&buf[0])) -} - -func (b *Bucket) String() string { - return fmt.Sprintf("", b.root, b.sequence) -} - -func (b *Bucket) RootPage() Pgid { - return b.root -} - -func (b *Bucket) InlinePage(v []byte) *Page { - return (*Page)(unsafe.Pointer(&v[bucketHeaderSize])) -} - -// DO NOT EDIT. Copied from the "bolt" package. -type Page struct { - id Pgid - flags uint16 - count uint16 - overflow uint32 - ptr uintptr -} - -func LoadPage(buf []byte) *Page { - return (*Page)(unsafe.Pointer(&buf[0])) -} - -func (p *Page) FreelistPageCount() int { - // Check for overflow and, if present, adjust actual element count. - if p.count == 0xFFFF { - return int(((*[maxAllocSize]Pgid)(unsafe.Pointer(&p.ptr)))[0]) - } else { - return int(p.count) - } -} - -func (p *Page) FreelistPagePages() []Pgid { - // Check for overflow and, if present, adjust starting index. - idx := 0 - if p.count == 0xFFFF { - idx = 1 - } - return (*[maxAllocSize]Pgid)(unsafe.Pointer(&p.ptr))[idx:p.FreelistPageCount()] -} - -func (p *Page) Overflow() uint32 { - return p.overflow -} - -func (p *Page) String() string { - return fmt.Sprintf("ID: %d, Type: %s, count: %d, overflow: %d", p.id, p.Type(), p.count, p.overflow) -} - -// DO NOT EDIT. Copied from the "bolt" package. - -// TODO(ptabor): Make the page-types an enum. -func (p *Page) Type() string { - if (p.flags & branchPageFlag) != 0 { - return "branch" - } else if (p.flags & leafPageFlag) != 0 { - return "leaf" - } else if (p.flags & metaPageFlag) != 0 { - return "meta" - } else if (p.flags & freelistPageFlag) != 0 { - return "freelist" - } - return fmt.Sprintf("unknown<%02x>", p.flags) -} - -func (p *Page) Count() uint16 { - return p.count -} - -func (p *Page) Id() Pgid { - return p.id -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (p *Page) LeafPageElement(index uint16) *LeafPageElement { - n := &((*[0x7FFFFFF]LeafPageElement)(unsafe.Pointer(&p.ptr)))[index] - return n -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (p *Page) BranchPageElement(index uint16) *BranchPageElement { - return &((*[0x7FFFFFF]BranchPageElement)(unsafe.Pointer(&p.ptr)))[index] -} - -func (p *Page) SetId(target Pgid) { - p.id = target -} - -func (p *Page) SetCount(target uint16) { - p.count = target -} - -func (p *Page) SetOverflow(target uint32) { - p.overflow = target -} - -// DO NOT EDIT. Copied from the "bolt" package. -type BranchPageElement struct { - pos uint32 - ksize uint32 - pgid Pgid -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (n *BranchPageElement) Key() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return buf[n.pos : n.pos+n.ksize] -} - -func (n *BranchPageElement) PgId() Pgid { - return n.pgid -} - -// DO NOT EDIT. Copied from the "bolt" package. -type LeafPageElement struct { - flags uint32 - pos uint32 - ksize uint32 - vsize uint32 -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (n *LeafPageElement) Key() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return buf[n.pos : n.pos+n.ksize] -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (n *LeafPageElement) Value() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize] -} - -func (n *LeafPageElement) IsBucketEntry() bool { - return n.flags&uint32(bucketLeafFlag) != 0 -} - -func (n *LeafPageElement) Bucket() *Bucket { - if n.IsBucketEntry() { - return LoadBucket(n.Value()) - } else { - return nil - } -} - // ReadPage reads Page info & full Page data from a path. // This is not transactionally safe. -func ReadPage(path string, pageID uint64) (*Page, []byte, error) { +func ReadPage(path string, pageID uint64) (*common.Page, []byte, error) { // Find Page size. pageSize, hwm, err := ReadPageAndHWMSize(path) if err != nil { @@ -264,11 +41,11 @@ func ReadPage(path string, pageID uint64) (*Page, []byte, error) { } // Determine total number of blocks. - p := LoadPage(buf) - if p.id != Pgid(pageID) { - return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.id, pageID) + p := common.LoadPage(buf) + if p.Id() != common.Pgid(pageID) { + return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.Id(), pageID) } - overflowN := p.overflow + overflowN := p.Overflow() if overflowN >= uint32(hwm)-3 { // we exclude 2 Meta pages and the current Page. return nil, nil, fmt.Errorf("error: %w, Page claims to have %d overflow pages (>=hwm=%d). Interrupting to avoid risky OOM", ErrCorrupt, overflowN, hwm) } @@ -280,16 +57,16 @@ func ReadPage(path string, pageID uint64) (*Page, []byte, error) { } else if n != len(buf) { return nil, nil, io.ErrUnexpectedEOF } - p = LoadPage(buf) - if p.id != Pgid(pageID) { - return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.id, pageID) + p = common.LoadPage(buf) + if p.Id() != common.Pgid(pageID) { + return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.Id(), pageID) } return p, buf, nil } func WritePage(path string, pageBuf []byte) error { - page := LoadPage(pageBuf) + page := common.LoadPage(pageBuf) pageSize, _, err := ReadPageAndHWMSize(path) if err != nil { return err @@ -309,7 +86,7 @@ func WritePage(path string, pageBuf []byte) error { // ReadPageAndHWMSize reads Page size and HWM (id of the last+1 Page). // This is not transactionally safe. -func ReadPageAndHWMSize(path string) (uint64, Pgid, error) { +func ReadPageAndHWMSize(path string) (uint64, common.Pgid, error) { // Open database file. f, err := os.Open(path) if err != nil { @@ -324,28 +101,28 @@ func ReadPageAndHWMSize(path string) (uint64, Pgid, error) { } // Read Page size from metadata. - m := LoadPageMeta(buf) - if m.magic != magic { + m := common.LoadPageMeta(buf) + if m.Magic() != common.Magic { return 0, 0, fmt.Errorf("the Meta Page has wrong (unexpected) magic") } - return uint64(m.pageSize), Pgid(m.pgid), nil + return uint64(m.PageSize()), common.Pgid(m.Pgid()), nil } // GetRootPage returns the root-page (according to the most recent transaction). -func GetRootPage(path string) (root Pgid, activeMeta Pgid, err error) { +func GetRootPage(path string) (root common.Pgid, activeMeta common.Pgid, err error) { _, buf0, err0 := ReadPage(path, 0) if err0 != nil { return 0, 0, err0 } - m0 := LoadPageMeta(buf0) + m0 := common.LoadPageMeta(buf0) _, buf1, err1 := ReadPage(path, 1) if err1 != nil { return 0, 1, err1 } - m1 := LoadPageMeta(buf1) - if m0.txid < m1.txid { - return m1.root.root, 1, nil + m1 := common.LoadPageMeta(buf1) + if m0.Txid() < m1.Txid() { + return m1.RootBucket().RootPage(), 1, nil } else { - return m0.root.root, 0, nil + return m0.RootBucket().RootPage(), 0, nil } } diff --git a/internal/surgeon/surgeon.go b/internal/surgeon/surgeon.go index 7635837..d2220a2 100644 --- a/internal/surgeon/surgeon.go +++ b/internal/surgeon/surgeon.go @@ -2,10 +2,11 @@ package surgeon import ( "fmt" + "go.etcd.io/bbolt/internal/common" "go.etcd.io/bbolt/internal/guts_cli" ) -func CopyPage(path string, srcPage guts_cli.Pgid, target guts_cli.Pgid) error { +func CopyPage(path string, srcPage common.Pgid, target common.Pgid) error { p1, d1, err1 := guts_cli.ReadPage(path, uint64(srcPage)) if err1 != nil { return err1 @@ -14,7 +15,7 @@ func CopyPage(path string, srcPage guts_cli.Pgid, target guts_cli.Pgid) error { return guts_cli.WritePage(path, d1) } -func ClearPage(path string, pgId guts_cli.Pgid) error { +func ClearPage(path string, pgId common.Pgid) error { // Read the page p, buf, err := guts_cli.ReadPage(path, uint64(pgId)) if err != nil { diff --git a/internal/surgeon/xray.go b/internal/surgeon/xray.go index 4469341..b3f4e2b 100644 --- a/internal/surgeon/xray.go +++ b/internal/surgeon/xray.go @@ -9,6 +9,7 @@ import ( "bytes" "fmt" + "go.etcd.io/bbolt/internal/common" "go.etcd.io/bbolt/internal/guts_cli" ) @@ -20,7 +21,7 @@ func NewXRay(path string) XRay { return XRay{path} } -func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page, stack []guts_cli.Pgid) error) error { +func (n XRay) traverse(stack []common.Pgid, callback func(page *common.Page, stack []common.Pgid) error) error { p, data, err := guts_cli.ReadPage(n.path, uint64(stack[len(stack)-1])) if err != nil { return fmt.Errorf("failed reading page (stack %v): %w", stack, err) @@ -29,10 +30,10 @@ func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page, if err != nil { return fmt.Errorf("failed callback for page (stack %v): %w", stack, err) } - switch p.Type() { + switch p.Typ() { case "meta": { - m := guts_cli.LoadPageMeta(data) + m := common.LoadPageMeta(data) r := m.RootBucket().RootPage() return n.traverse(append(stack, r), callback) } @@ -40,7 +41,7 @@ func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page, { for i := uint16(0); i < p.Count(); i++ { bpe := p.BranchPageElement(i) - if err := n.traverse(append(stack, bpe.PgId()), callback); err != nil { + if err := n.traverse(append(stack, bpe.Pgid()), callback); err != nil { return err } } @@ -73,19 +74,19 @@ func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page, // As it traverses multiple buckets, so in theory there might be multiple keys with the given name. // Note: For simplicity it's currently implemented as traversing of the whole reachable tree. // If key is a bucket name, a page-path referencing the key will be returned as well. -func (n XRay) FindPathsToKey(key []byte) ([][]guts_cli.Pgid, error) { - var found [][]guts_cli.Pgid +func (n XRay) FindPathsToKey(key []byte) ([][]common.Pgid, error) { + var found [][]common.Pgid rootPage, _, err := guts_cli.GetRootPage(n.path) if err != nil { return nil, err } - err = n.traverse([]guts_cli.Pgid{rootPage}, - func(page *guts_cli.Page, stack []guts_cli.Pgid) error { - if page.Type() == "leaf" { + err = n.traverse([]common.Pgid{rootPage}, + func(page *common.Page, stack []common.Pgid) error { + if page.Typ() == "leaf" { for i := uint16(0); i < page.Count(); i++ { if bytes.Equal(page.LeafPageElement(i).Key(), key) { - var copyPath []guts_cli.Pgid + var copyPath []common.Pgid copyPath = append(copyPath, stack...) found = append(found, copyPath) } diff --git a/node.go b/node.go index 9c56150..9769345 100644 --- a/node.go +++ b/node.go @@ -5,6 +5,8 @@ import ( "fmt" "sort" "unsafe" + + "go.etcd.io/bbolt/internal/common" ) // node represents an in-memory, deserialized page. @@ -14,7 +16,7 @@ type node struct { unbalanced bool spilled bool key []byte - pgid pgid + pgid common.Pgid parent *node children nodes inodes inodes @@ -38,7 +40,7 @@ func (n *node) minKeys() int { // size returns the size of the node after serialization. func (n *node) size() int { - sz, elsz := pageHeaderSize, n.pageElementSize() + sz, elsz := common.PageHeaderSize, n.pageElementSize() for i := 0; i < len(n.inodes); i++ { item := &n.inodes[i] sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value)) @@ -50,7 +52,7 @@ func (n *node) size() int { // This is an optimization to avoid calculating a large node when we only need // to know if it fits inside a certain page size. func (n *node) sizeLessThan(v uintptr) bool { - sz, elsz := pageHeaderSize, n.pageElementSize() + sz, elsz := common.PageHeaderSize, n.pageElementSize() for i := 0; i < len(n.inodes); i++ { item := &n.inodes[i] sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value)) @@ -64,9 +66,9 @@ func (n *node) sizeLessThan(v uintptr) bool { // pageElementSize returns the size of each page element based on the type of node. func (n *node) pageElementSize() uintptr { if n.isLeaf { - return leafPageElementSize + return common.LeafPageElementSize } - return branchPageElementSize + return common.BranchPageElementSize } // childAt returns the child node at a given index. @@ -113,9 +115,9 @@ func (n *node) prevSibling() *node { } // put inserts a key/value. -func (n *node) put(oldKey, newKey, value []byte, pgId pgid, flags uint32) { - if pgId >= n.bucket.tx.meta.pgid { - panic(fmt.Sprintf("pgId (%d) above high water mark (%d)", pgId, n.bucket.tx.meta.pgid)) +func (n *node) put(oldKey, newKey, value []byte, pgId common.Pgid, flags uint32) { + if pgId >= n.bucket.tx.meta.Pgid() { + panic(fmt.Sprintf("pgId (%d) above high water mark (%d)", pgId, n.bucket.tx.meta.Pgid())) } else if len(oldKey) <= 0 { panic("put: zero-length old key") } else if len(newKey) <= 0 { @@ -126,7 +128,7 @@ func (n *node) put(oldKey, newKey, value []byte, pgId pgid, flags uint32) { index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 }) // Add capacity and shift nodes if we don't have an exact match and need to insert. - exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey)) + exact := len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey) if !exact { n.inodes = append(n.inodes, inode{}) copy(n.inodes[index+1:], n.inodes[index:]) @@ -137,7 +139,7 @@ func (n *node) put(oldKey, newKey, value []byte, pgId pgid, flags uint32) { inode.key = newKey inode.value = value inode.pgid = pgId - _assert(len(inode.key) > 0, "put: zero-length inode key") + common.Assert(len(inode.key) > 0, "put: zero-length inode key") } // del removes a key from the node. @@ -158,30 +160,30 @@ func (n *node) del(key []byte) { } // read initializes the node from a page. -func (n *node) read(p *page) { - n.pgid = p.id - n.isLeaf = ((p.flags & leafPageFlag) != 0) - n.inodes = make(inodes, int(p.count)) +func (n *node) read(p *common.Page) { + n.pgid = p.Id() + n.isLeaf = (p.Flags() & common.LeafPageFlag) != 0 + n.inodes = make(inodes, int(p.Count())) - for i := 0; i < int(p.count); i++ { + for i := 0; i < int(p.Count()); i++ { inode := &n.inodes[i] if n.isLeaf { - elem := p.leafPageElement(uint16(i)) - inode.flags = elem.flags - inode.key = elem.key() - inode.value = elem.value() + elem := p.LeafPageElement(uint16(i)) + inode.flags = elem.Flags() + inode.key = elem.Key() + inode.value = elem.Value() } else { - elem := p.branchPageElement(uint16(i)) - inode.pgid = elem.pgid - inode.key = elem.key() + elem := p.BranchPageElement(uint16(i)) + inode.pgid = elem.Pgid() + inode.key = elem.Key() } - _assert(len(inode.key) > 0, "read: zero-length inode key") + common.Assert(len(inode.key) > 0, "read: zero-length inode key") } - // Save first key so we can find the node in the parent when we spill. + // Save first key, so we can find the node in the parent when we spill. if len(n.inodes) > 0 { n.key = n.inodes[0].key - _assert(len(n.key) > 0, "read: zero-length node key") + common.Assert(len(n.key) > 0, "read: zero-length node key") } else { n.key = nil } @@ -190,23 +192,23 @@ func (n *node) read(p *page) { // write writes the items onto one or more pages. // The page should have p.id (might be 0 for meta or bucket-inline page) and p.overflow set // and the rest should be zeroed. -func (n *node) write(p *page) { - _assert(p.count == 0 && p.flags == 0, "node cannot be written into a not empty page") +func (n *node) write(p *common.Page) { + common.Assert(p.Count() == 0 && p.Flags() == 0, "node cannot be written into a not empty page") // Initialize page. if n.isLeaf { - p.flags = leafPageFlag + p.SetFlags(common.LeafPageFlag) } else { - p.flags = branchPageFlag + p.SetFlags(common.BranchPageFlag) } if len(n.inodes) >= 0xFFFF { - panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id)) + panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.Id())) } - p.count = uint16(len(n.inodes)) + p.SetCount(uint16(len(n.inodes))) // Stop here if there are no items to write. - if p.count == 0 { + if p.Count() == 0 { return } @@ -214,27 +216,27 @@ func (n *node) write(p *page) { // off tracks the offset into p of the start of the next data. off := unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes)) for i, item := range n.inodes { - _assert(len(item.key) > 0, "write: zero-length inode key") + common.Assert(len(item.key) > 0, "write: zero-length inode key") // Create a slice to write into of needed size and advance // byte pointer for next iteration. sz := len(item.key) + len(item.value) - b := unsafeByteSlice(unsafe.Pointer(p), off, 0, sz) + b := common.UnsafeByteSlice(unsafe.Pointer(p), off, 0, sz) off += uintptr(sz) // Write the page element. if n.isLeaf { - elem := p.leafPageElement(uint16(i)) - elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) - elem.flags = item.flags - elem.ksize = uint32(len(item.key)) - elem.vsize = uint32(len(item.value)) + elem := p.LeafPageElement(uint16(i)) + elem.SetPos(uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))) + elem.SetFlags(item.flags) + elem.SetKsize(uint32(len(item.key))) + elem.SetVsize(uint32(len(item.value))) } else { - elem := p.branchPageElement(uint16(i)) - elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) - elem.ksize = uint32(len(item.key)) - elem.pgid = item.pgid - _assert(elem.pgid != p.id, "write: circular dependency occurred") + elem := p.BranchPageElement(uint16(i)) + elem.SetPos(uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))) + elem.SetKsize(uint32(len(item.key))) + elem.SetPgid(item.pgid) + common.Assert(elem.Pgid() != p.Id(), "write: circular dependency occurred") } // Write data for the element to the end of the page. @@ -273,7 +275,7 @@ func (n *node) split(pageSize uintptr) []*node { func (n *node) splitTwo(pageSize uintptr) (*node, *node) { // Ignore the split if the page doesn't have at least enough nodes for // two pages or if the nodes can fit in a single page. - if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) { + if len(n.inodes) <= (common.MinKeysPerPage*2) || n.sizeLessThan(pageSize) { return n, nil } @@ -313,17 +315,17 @@ func (n *node) splitTwo(pageSize uintptr) (*node, *node) { // It returns the index as well as the size of the first page. // This is only be called from split(). func (n *node) splitIndex(threshold int) (index, sz uintptr) { - sz = pageHeaderSize + sz = common.PageHeaderSize // Loop until we only have the minimum number of keys required for the second page. - for i := 0; i < len(n.inodes)-minKeysPerPage; i++ { + for i := 0; i < len(n.inodes)-common.MinKeysPerPage; i++ { index = uintptr(i) inode := n.inodes[i] elsize := n.pageElementSize() + uintptr(len(inode.key)) + uintptr(len(inode.value)) // If we have at least the minimum number of keys and adding another // node would put us over the threshold then exit and return. - if index >= minKeysPerPage && sz+elsize > uintptr(threshold) { + if index >= common.MinKeysPerPage && sz+elsize > uintptr(threshold) { break } @@ -360,7 +362,7 @@ func (n *node) spill() error { for _, node := range nodes { // Add node's page to the freelist if it's not new. if node.pgid > 0 { - tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid)) + tx.db.freelist.free(tx.meta.Txid(), tx.page(node.pgid)) node.pgid = 0 } @@ -371,10 +373,10 @@ func (n *node) spill() error { } // Write the node. - if p.id >= tx.meta.pgid { - panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid)) + if p.Id() >= tx.meta.Pgid() { + panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.Id(), tx.meta.Pgid())) } - node.pgid = p.id + node.pgid = p.Id() node.write(p) node.spilled = true @@ -387,7 +389,7 @@ func (n *node) spill() error { node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0) node.key = node.inodes[0].key - _assert(len(node.key) > 0, "spill: zero-length node key") + common.Assert(len(node.key) > 0, "spill: zero-length node key") } // Update the statistics. @@ -457,11 +459,11 @@ func (n *node) rebalance() { return } - _assert(n.parent.numChildren() > 1, "parent must have at least 2 children") + common.Assert(n.parent.numChildren() > 1, "parent must have at least 2 children") // Destination node is right sibling if idx == 0, otherwise left sibling. var target *node - var useNextSibling = (n.parent.childIndex(n) == 0) + var useNextSibling = n.parent.childIndex(n) == 0 if useNextSibling { target = n.nextSibling() } else { @@ -525,7 +527,7 @@ func (n *node) dereference() { key := make([]byte, len(n.key)) copy(key, n.key) n.key = key - _assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node") + common.Assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node") } for i := range n.inodes { @@ -534,7 +536,7 @@ func (n *node) dereference() { key := make([]byte, len(inode.key)) copy(key, inode.key) inode.key = key - _assert(len(inode.key) > 0, "dereference: zero-length inode key") + common.Assert(len(inode.key) > 0, "dereference: zero-length inode key") value := make([]byte, len(inode.value)) copy(value, inode.value) @@ -553,7 +555,7 @@ func (n *node) dereference() { // free adds the node's underlying page to the freelist. func (n *node) free() { if n.pgid != 0 { - n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid)) + n.bucket.tx.db.freelist.free(n.bucket.tx.meta.Txid(), n.bucket.tx.page(n.pgid)) n.pgid = 0 } } @@ -602,7 +604,7 @@ func (s nodes) Less(i, j int) bool { // to an element which hasn't been added to a page yet. type inode struct { flags uint32 - pgid pgid + pgid common.Pgid key []byte value []byte } diff --git a/node_test.go b/node_test.go index eea4d25..6d286e9 100644 --- a/node_test.go +++ b/node_test.go @@ -3,15 +3,19 @@ package bbolt import ( "testing" "unsafe" + + "go.etcd.io/bbolt/internal/common" ) // Ensure that a node can insert a key/value. func TestNode_put(t *testing.T) { - n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{meta: &meta{pgid: 1}}}} + m := &common.Meta{} + m.SetPgid(1) + n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{meta: m}}} n.put([]byte("baz"), []byte("baz"), []byte("2"), 0, 0) n.put([]byte("foo"), []byte("foo"), []byte("0"), 0, 0) n.put([]byte("bar"), []byte("bar"), []byte("1"), 0, 0) - n.put([]byte("foo"), []byte("foo"), []byte("3"), 0, leafPageFlag) + n.put([]byte("foo"), []byte("foo"), []byte("3"), 0, common.LeafPageFlag) if len(n.inodes) != 3 { t.Fatalf("exp=3; got=%d", len(n.inodes)) @@ -25,7 +29,7 @@ func TestNode_put(t *testing.T) { if k, v := n.inodes[2].key, n.inodes[2].value; string(k) != "foo" || string(v) != "3" { t.Fatalf("exp=; got=<%s,%s>", k, v) } - if n.inodes[2].flags != uint32(leafPageFlag) { + if n.inodes[2].flags != uint32(common.LeafPageFlag) { t.Fatalf("not a leaf: %d", n.inodes[2].flags) } } @@ -34,18 +38,19 @@ func TestNode_put(t *testing.T) { func TestNode_read_LeafPage(t *testing.T) { // Create a page. var buf [4096]byte - page := (*page)(unsafe.Pointer(&buf[0])) - page.flags = leafPageFlag - page.count = 2 + page := (*common.Page)(unsafe.Pointer(&buf[0])) + page.SetFlags(common.LeafPageFlag) + page.SetCount(2) // Insert 2 elements at the beginning. sizeof(leafPageElement) == 16 - nodes := (*[3]leafPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page))) - nodes[0] = leafPageElement{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(leafPageElement) * 2 - nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4 + nodes := page.LeafPageElements() + //nodes := (*[3]leafPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page))) + nodes[0] = *common.NewLeafPageElement(0, 32, 3, 4) // pos = sizeof(leafPageElement) * 2 + nodes[1] = *common.NewLeafPageElement(0, 23, 10, 3) // pos = sizeof(leafPageElement) + 3 + 4 // Write data for the nodes at the end. const s = "barfoozhelloworldbye" - data := unsafeByteSlice(unsafe.Pointer(&nodes[2]), 0, 0, len(s)) + data := common.UnsafeByteSlice(unsafe.Pointer(uintptr(unsafe.Pointer(page))+unsafe.Sizeof(*page)+common.LeafPageElementSize*2), 0, 0, len(s)) copy(data, s) // Deserialize page into a leaf. @@ -70,14 +75,16 @@ func TestNode_read_LeafPage(t *testing.T) { // Ensure that a node can serialize into a leaf page. func TestNode_write_LeafPage(t *testing.T) { // Create a node. - n := &node{isLeaf: true, inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} + m := &common.Meta{} + m.SetPgid(1) + n := &node{isLeaf: true, inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}} n.put([]byte("susy"), []byte("susy"), []byte("que"), 0, 0) n.put([]byte("ricki"), []byte("ricki"), []byte("lake"), 0, 0) n.put([]byte("john"), []byte("john"), []byte("johnson"), 0, 0) // Write it to a page. var buf [4096]byte - p := (*page)(unsafe.Pointer(&buf[0])) + p := (*common.Page)(unsafe.Pointer(&buf[0])) n.write(p) // Read the page back in. @@ -102,7 +109,9 @@ func TestNode_write_LeafPage(t *testing.T) { // Ensure that a node can split into appropriate subgroups. func TestNode_split(t *testing.T) { // Create a node. - n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} + m := &common.Meta{} + m.SetPgid(1) + n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}} n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0) n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0) n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0) @@ -127,7 +136,9 @@ func TestNode_split(t *testing.T) { // Ensure that a page with the minimum number of inodes just returns a single node. func TestNode_split_MinKeys(t *testing.T) { // Create a node. - n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} + m := &common.Meta{} + m.SetPgid(1) + n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}} n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0) n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0) @@ -141,7 +152,9 @@ func TestNode_split_MinKeys(t *testing.T) { // Ensure that a node that has keys that all fit on a page just returns one leaf. func TestNode_split_SinglePage(t *testing.T) { // Create a node. - n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} + m := &common.Meta{} + m.SetPgid(1) + n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}} n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0) n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0) n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0) diff --git a/page.go b/page.go deleted file mode 100644 index 379645c..0000000 --- a/page.go +++ /dev/null @@ -1,214 +0,0 @@ -package bbolt - -import ( - "fmt" - "os" - "sort" - "unsafe" -) - -const pageHeaderSize = unsafe.Sizeof(page{}) - -const minKeysPerPage = 2 - -const branchPageElementSize = unsafe.Sizeof(branchPageElement{}) -const leafPageElementSize = unsafe.Sizeof(leafPageElement{}) - -const ( - branchPageFlag = 0x01 - leafPageFlag = 0x02 - metaPageFlag = 0x04 - freelistPageFlag = 0x10 -) - -const ( - bucketLeafFlag = 0x01 -) - -type pgid uint64 - -type page struct { - id pgid - flags uint16 - count uint16 - overflow uint32 -} - -// typ returns a human readable page type string used for debugging. -func (p *page) typ() string { - if (p.flags & branchPageFlag) != 0 { - return "branch" - } else if (p.flags & leafPageFlag) != 0 { - return "leaf" - } else if (p.flags & metaPageFlag) != 0 { - return "meta" - } else if (p.flags & freelistPageFlag) != 0 { - return "freelist" - } - return fmt.Sprintf("unknown<%02x>", p.flags) -} - -// meta returns a pointer to the metadata section of the page. -func (p *page) meta() *meta { - return (*meta)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))) -} - -func (p *page) fastCheck(id pgid) { - _assert(p.id == id, "Page expected to be: %v, but self identifies as %v", id, p.id) - // Only one flag of page-type can be set. - _assert(p.flags == branchPageFlag || - p.flags == leafPageFlag || - p.flags == metaPageFlag || - p.flags == freelistPageFlag, - "page %v: has unexpected type/flags: %x", p.id, p.flags) -} - -// leafPageElement retrieves the leaf node by index -func (p *page) leafPageElement(index uint16) *leafPageElement { - return (*leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), - leafPageElementSize, int(index))) -} - -// leafPageElements retrieves a list of leaf nodes. -func (p *page) leafPageElements() []leafPageElement { - if p.count == 0 { - return nil - } - var elems []leafPageElement - data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) - unsafeSlice(unsafe.Pointer(&elems), data, int(p.count)) - return elems -} - -// branchPageElement retrieves the branch node by index -func (p *page) branchPageElement(index uint16) *branchPageElement { - return (*branchPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), - unsafe.Sizeof(branchPageElement{}), int(index))) -} - -// branchPageElements retrieves a list of branch nodes. -func (p *page) branchPageElements() []branchPageElement { - if p.count == 0 { - return nil - } - var elems []branchPageElement - data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) - unsafeSlice(unsafe.Pointer(&elems), data, int(p.count)) - return elems -} - -// dump writes n bytes of the page to STDERR as hex output. -func (p *page) hexdump(n int) { - buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, n) - fmt.Fprintf(os.Stderr, "%x\n", buf) -} - -type pages []*page - -func (s pages) Len() int { return len(s) } -func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s pages) Less(i, j int) bool { return s[i].id < s[j].id } - -// branchPageElement represents a node on a branch page. -type branchPageElement struct { - pos uint32 - ksize uint32 - pgid pgid -} - -// key returns a byte slice of the node key. -func (n *branchPageElement) key() []byte { - return unsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize)) -} - -// leafPageElement represents a node on a leaf page. -type leafPageElement struct { - flags uint32 - pos uint32 - ksize uint32 - vsize uint32 -} - -// key returns a byte slice of the node key. -func (n *leafPageElement) key() []byte { - i := int(n.pos) - j := i + int(n.ksize) - return unsafeByteSlice(unsafe.Pointer(n), 0, i, j) -} - -// value returns a byte slice of the node value. -func (n *leafPageElement) value() []byte { - i := int(n.pos) + int(n.ksize) - j := i + int(n.vsize) - return unsafeByteSlice(unsafe.Pointer(n), 0, i, j) -} - -// PageInfo represents human readable information about a page. -type PageInfo struct { - ID int - Type string - Count int - OverflowCount int -} - -type pgids []pgid - -func (s pgids) Len() int { return len(s) } -func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s pgids) Less(i, j int) bool { return s[i] < s[j] } - -// merge returns the sorted union of a and b. -func (a pgids) merge(b pgids) pgids { - // Return the opposite slice if one is nil. - if len(a) == 0 { - return b - } - if len(b) == 0 { - return a - } - merged := make(pgids, len(a)+len(b)) - mergepgids(merged, a, b) - return merged -} - -// mergepgids copies the sorted union of a and b into dst. -// If dst is too small, it panics. -func mergepgids(dst, a, b pgids) { - if len(dst) < len(a)+len(b) { - panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b))) - } - // Copy in the opposite slice if one is nil. - if len(a) == 0 { - copy(dst, b) - return - } - if len(b) == 0 { - copy(dst, a) - return - } - - // Merged will hold all elements from both lists. - merged := dst[:0] - - // Assign lead to the slice with a lower starting value, follow to the higher value. - lead, follow := a, b - if b[0] < a[0] { - lead, follow = b, a - } - - // Continue while there are elements in the lead. - for len(lead) > 0 { - // Merge largest prefix of lead that is ahead of follow[0]. - n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] }) - merged = append(merged, lead[:n]...) - if n >= len(lead) { - break - } - - // Swap lead and follow. - lead, follow = follow, lead[n:] - } - - // Append what's left in follow. - _ = append(merged, follow...) -} diff --git a/page_test.go b/page_test.go deleted file mode 100644 index 9f5b7c0..0000000 --- a/page_test.go +++ /dev/null @@ -1,72 +0,0 @@ -package bbolt - -import ( - "reflect" - "sort" - "testing" - "testing/quick" -) - -// Ensure that the page type can be returned in human readable format. -func TestPage_typ(t *testing.T) { - if typ := (&page{flags: branchPageFlag}).typ(); typ != "branch" { - t.Fatalf("exp=branch; got=%v", typ) - } - if typ := (&page{flags: leafPageFlag}).typ(); typ != "leaf" { - t.Fatalf("exp=leaf; got=%v", typ) - } - if typ := (&page{flags: metaPageFlag}).typ(); typ != "meta" { - t.Fatalf("exp=meta; got=%v", typ) - } - if typ := (&page{flags: freelistPageFlag}).typ(); typ != "freelist" { - t.Fatalf("exp=freelist; got=%v", typ) - } - if typ := (&page{flags: 20000}).typ(); typ != "unknown<4e20>" { - t.Fatalf("exp=unknown<4e20>; got=%v", typ) - } -} - -// Ensure that the hexdump debugging function doesn't blow up. -func TestPage_dump(t *testing.T) { - (&page{id: 256}).hexdump(16) -} - -func TestPgids_merge(t *testing.T) { - a := pgids{4, 5, 6, 10, 11, 12, 13, 27} - b := pgids{1, 3, 8, 9, 25, 30} - c := a.merge(b) - if !reflect.DeepEqual(c, pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) { - t.Errorf("mismatch: %v", c) - } - - a = pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36} - b = pgids{8, 9, 25, 30} - c = a.merge(b) - if !reflect.DeepEqual(c, pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) { - t.Errorf("mismatch: %v", c) - } -} - -func TestPgids_merge_quick(t *testing.T) { - if err := quick.Check(func(a, b pgids) bool { - // Sort incoming lists. - sort.Sort(a) - sort.Sort(b) - - // Merge the two lists together. - got := a.merge(b) - - // The expected value should be the two lists combined and sorted. - exp := append(a, b...) - sort.Sort(exp) - - if !reflect.DeepEqual(exp, got) { - t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got) - return false - } - - return true - }, nil); err != nil { - t.Fatal(err) - } -} diff --git a/tx.go b/tx.go index 2fac8c0..3436442 100644 --- a/tx.go +++ b/tx.go @@ -9,10 +9,9 @@ import ( "sync/atomic" "time" "unsafe" -) -// txid represents the internal transaction identifier. -type txid uint64 + "go.etcd.io/bbolt/internal/common" +) // Tx represents a read-only or read/write transaction on the database. // Read-only transactions can be used for retrieving values for keys and creating cursors. @@ -26,9 +25,9 @@ type Tx struct { writable bool managed bool db *DB - meta *meta + meta *common.Meta root Bucket - pages map[pgid]*page + pages map[common.Pgid]*common.Page stats TxStats commitHandlers []func() @@ -47,24 +46,24 @@ func (tx *Tx) init(db *DB) { tx.pages = nil // Copy the meta page since it can be changed by the writer. - tx.meta = &meta{} - db.meta().copy(tx.meta) + tx.meta = &common.Meta{} + db.meta().Copy(tx.meta) // Copy over the root bucket. tx.root = newBucket(tx) - tx.root.bucket = &bucket{} - *tx.root.bucket = tx.meta.root + tx.root.InBucket = &common.InBucket{} + *tx.root.InBucket = *(tx.meta.RootBucket()) // Increment the transaction id and add a page cache for writable transactions. if tx.writable { - tx.pages = make(map[pgid]*page) - tx.meta.txid += txid(1) + tx.pages = make(map[common.Pgid]*common.Page) + tx.meta.IncTxid() } } // ID returns the transaction id. func (tx *Tx) ID() int { - return int(tx.meta.txid) + return int(tx.meta.Txid()) } // DB returns a reference to the database that created the transaction. @@ -74,7 +73,7 @@ func (tx *Tx) DB() *DB { // Size returns current database size in bytes as seen by this transaction. func (tx *Tx) Size() int64 { - return int64(tx.meta.pgid) * int64(tx.db.pageSize) + return int64(tx.meta.Pgid()) * int64(tx.db.pageSize) } // Writable returns whether the transaction can perform write operations. @@ -140,11 +139,11 @@ func (tx *Tx) OnCommit(fn func()) { // Returns an error if a disk write error occurs, or if Commit is // called on a read-only transaction. func (tx *Tx) Commit() error { - _assert(!tx.managed, "managed tx commit not allowed") + common.Assert(!tx.managed, "managed tx commit not allowed") if tx.db == nil { - return ErrTxClosed + return common.ErrTxClosed } else if !tx.writable { - return ErrTxNotWritable + return common.ErrTxNotWritable } // TODO(benbjohnson): Use vectorized I/O to write out dirty pages. @@ -156,7 +155,7 @@ func (tx *Tx) Commit() error { tx.stats.IncRebalanceTime(time.Since(startTime)) } - opgid := tx.meta.pgid + opgid := tx.meta.Pgid() // spill data onto dirty pages. startTime = time.Now() @@ -167,11 +166,11 @@ func (tx *Tx) Commit() error { tx.stats.IncSpillTime(time.Since(startTime)) // Free the old root bucket. - tx.meta.root.root = tx.root.root + tx.meta.RootBucket().SetRootPage(tx.root.RootPage()) // Free the old freelist because commit writes out a fresh freelist. - if tx.meta.freelist != pgidNoFreelist { - tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist)) + if tx.meta.Freelist() != common.PgidNoFreelist { + tx.db.freelist.free(tx.meta.Txid(), tx.db.page(tx.meta.Freelist())) } if !tx.db.NoFreelistSync { @@ -180,12 +179,12 @@ func (tx *Tx) Commit() error { return err } } else { - tx.meta.freelist = pgidNoFreelist + tx.meta.SetFreelist(common.PgidNoFreelist) } // If the high water mark has moved up then attempt to grow the database. - if tx.meta.pgid > opgid { - if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { + if tx.meta.Pgid() > opgid { + if err := tx.db.grow(int(tx.meta.Pgid()+1) * tx.db.pageSize); err != nil { tx.rollback() return err } @@ -244,7 +243,7 @@ func (tx *Tx) commitFreelist() error { tx.rollback() return err } - tx.meta.freelist = p.id + tx.meta.SetFreelist(p.Id()) return nil } @@ -252,9 +251,9 @@ func (tx *Tx) commitFreelist() error { // Rollback closes the transaction and ignores all previous updates. Read-only // transactions must be rolled back and not committed. func (tx *Tx) Rollback() error { - _assert(!tx.managed, "managed tx rollback not allowed") + common.Assert(!tx.managed, "managed tx rollback not allowed") if tx.db == nil { - return ErrTxClosed + return common.ErrTxClosed } tx.nonPhysicalRollback() return nil @@ -266,7 +265,7 @@ func (tx *Tx) nonPhysicalRollback() { return } if tx.writable { - tx.db.freelist.rollback(tx.meta.txid) + tx.db.freelist.rollback(tx.meta.Txid()) } tx.close() } @@ -277,7 +276,7 @@ func (tx *Tx) rollback() { return } if tx.writable { - tx.db.freelist.rollback(tx.meta.txid) + tx.db.freelist.rollback(tx.meta.Txid()) // When mmap fails, the `data`, `dataref` and `datasz` may be reset to // zero values, and there is no way to reload free page IDs in this case. if tx.db.data != nil { @@ -287,7 +286,7 @@ func (tx *Tx) rollback() { tx.db.freelist.noSyncReload(tx.db.freepages()) } else { // Read free page list from freelist page. - tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) + tx.db.freelist.reload(tx.db.page(tx.db.meta().Freelist())) } } } @@ -352,13 +351,13 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { // Generate a meta page. We use the same page data for both meta pages. buf := make([]byte, tx.db.pageSize) - page := (*page)(unsafe.Pointer(&buf[0])) - page.flags = metaPageFlag - *page.meta() = *tx.meta + page := (*common.Page)(unsafe.Pointer(&buf[0])) + page.SetFlags(common.MetaPageFlag) + *page.Meta() = *tx.meta // Write meta 0. - page.id = 0 - page.meta().checksum = page.meta().sum64() + page.SetId(0) + page.Meta().SetChecksum(page.Meta().Sum64()) nn, err := w.Write(buf) n += int64(nn) if err != nil { @@ -366,9 +365,9 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { } // Write meta 1 with a lower transaction id. - page.id = 1 - page.meta().txid -= 1 - page.meta().checksum = page.meta().sum64() + page.SetId(1) + page.Meta().DecTxid() + page.Meta().SetChecksum(page.Meta().Sum64()) nn, err = w.Write(buf) n += int64(nn) if err != nil { @@ -408,14 +407,14 @@ func (tx *Tx) CopyFile(path string, mode os.FileMode) error { } // allocate returns a contiguous block of memory starting at a given page. -func (tx *Tx) allocate(count int) (*page, error) { - p, err := tx.db.allocate(tx.meta.txid, count) +func (tx *Tx) allocate(count int) (*common.Page, error) { + p, err := tx.db.allocate(tx.meta.Txid(), count) if err != nil { return nil, err } // Save to our page cache. - tx.pages[p.id] = p + tx.pages[p.Id()] = p // Update statistics. tx.stats.IncPageCount(int64(count)) @@ -427,18 +426,18 @@ func (tx *Tx) allocate(count int) (*page, error) { // write writes any dirty pages to disk. func (tx *Tx) write() error { // Sort pages by id. - pages := make(pages, 0, len(tx.pages)) + pages := make(common.Pages, 0, len(tx.pages)) for _, p := range tx.pages { pages = append(pages, p) } // Clear out page cache early. - tx.pages = make(map[pgid]*page) + tx.pages = make(map[common.Pgid]*common.Page) sort.Sort(pages) // Write pages to disk in order. for _, p := range pages { - rem := (uint64(p.overflow) + 1) * uint64(tx.db.pageSize) - offset := int64(p.id) * int64(tx.db.pageSize) + rem := (uint64(p.Overflow()) + 1) * uint64(tx.db.pageSize) + offset := int64(p.Id()) * int64(tx.db.pageSize) var written uintptr // Write out page in "max allocation" sized chunks. @@ -447,7 +446,7 @@ func (tx *Tx) write() error { if sz > maxAllocSize-1 { sz = maxAllocSize - 1 } - buf := unsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz)) + buf := common.UnsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz)) if _, err := tx.db.ops.writeAt(buf, offset); err != nil { return err @@ -469,7 +468,7 @@ func (tx *Tx) write() error { } // Ignore file sync if flag is set on DB. - if !tx.db.NoSync || IgnoreNoSync { + if !tx.db.NoSync || common.IgnoreNoSync { if err := fdatasync(tx.db); err != nil { return err } @@ -479,11 +478,11 @@ func (tx *Tx) write() error { for _, p := range pages { // Ignore page sizes over 1 page. // These are allocated using make() instead of the page pool. - if int(p.overflow) != 0 { + if int(p.Overflow()) != 0 { continue } - buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize) + buf := common.UnsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize) // See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1 for i := range buf { @@ -500,13 +499,13 @@ func (tx *Tx) writeMeta() error { // Create a temporary buffer for the meta page. buf := make([]byte, tx.db.pageSize) p := tx.db.pageInBuffer(buf, 0) - tx.meta.write(p) + tx.meta.Write(p) // Write the meta page to file. - if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil { + if _, err := tx.db.ops.writeAt(buf, int64(p.Id())*int64(tx.db.pageSize)); err != nil { return err } - if !tx.db.NoSync || IgnoreNoSync { + if !tx.db.NoSync || common.IgnoreNoSync { if err := fdatasync(tx.db); err != nil { return err } @@ -520,69 +519,69 @@ func (tx *Tx) writeMeta() error { // page returns a reference to the page with a given id. // If page has been written to then a temporary buffered page is returned. -func (tx *Tx) page(id pgid) *page { +func (tx *Tx) page(id common.Pgid) *common.Page { // Check the dirty pages first. if tx.pages != nil { if p, ok := tx.pages[id]; ok { - p.fastCheck(id) + p.FastCheck(id) return p } } // Otherwise return directly from the mmap. p := tx.db.page(id) - p.fastCheck(id) + p.FastCheck(id) return p } // forEachPage iterates over every page within a given page and executes a function. -func (tx *Tx) forEachPage(pgidnum pgid, fn func(*page, int, []pgid)) { - stack := make([]pgid, 10) +func (tx *Tx) forEachPage(pgidnum common.Pgid, fn func(*common.Page, int, []common.Pgid)) { + stack := make([]common.Pgid, 10) stack[0] = pgidnum tx.forEachPageInternal(stack[:1], fn) } -func (tx *Tx) forEachPageInternal(pgidstack []pgid, fn func(*page, int, []pgid)) { +func (tx *Tx) forEachPageInternal(pgidstack []common.Pgid, fn func(*common.Page, int, []common.Pgid)) { p := tx.page(pgidstack[len(pgidstack)-1]) // Execute function. fn(p, len(pgidstack)-1, pgidstack) // Recursively loop over children. - if (p.flags & branchPageFlag) != 0 { - for i := 0; i < int(p.count); i++ { - elem := p.branchPageElement(uint16(i)) - tx.forEachPageInternal(append(pgidstack, elem.pgid), fn) + if (p.Flags() & common.BranchPageFlag) != 0 { + for i := 0; i < int(p.Count()); i++ { + elem := p.BranchPageElement(uint16(i)) + tx.forEachPageInternal(append(pgidstack, elem.Pgid()), fn) } } } // Page returns page information for a given page number. // This is only safe for concurrent use when used by a writable transaction. -func (tx *Tx) Page(id int) (*PageInfo, error) { +func (tx *Tx) Page(id int) (*common.PageInfo, error) { if tx.db == nil { - return nil, ErrTxClosed - } else if pgid(id) >= tx.meta.pgid { + return nil, common.ErrTxClosed + } else if common.Pgid(id) >= tx.meta.Pgid() { return nil, nil } if tx.db.freelist == nil { - return nil, ErrFreePagesNotLoaded + return nil, common.ErrFreePagesNotLoaded } // Build the page info. - p := tx.db.page(pgid(id)) - info := &PageInfo{ + p := tx.db.page(common.Pgid(id)) + info := &common.PageInfo{ ID: id, - Count: int(p.count), - OverflowCount: int(p.overflow), + Count: int(p.Count()), + OverflowCount: int(p.Overflow()), } // Determine the type (or if it's free). - if tx.db.freelist.freed(pgid(id)) { + if tx.db.freelist.freed(common.Pgid(id)) { info.Type = "free" } else { - info.Type = p.typ() + info.Type = p.Typ() } return info, nil diff --git a/tx_check.go b/tx_check.go index 75c7c08..2ee03ec 100644 --- a/tx_check.go +++ b/tx_check.go @@ -3,6 +3,8 @@ package bbolt import ( "encoding/hex" "fmt" + + "go.etcd.io/bbolt/internal/common" ) // Check performs several consistency checks on the database for this transaction. @@ -37,8 +39,8 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) { tx.db.loadFreelist() // Check if any pages are double freed. - freed := make(map[pgid]bool) - all := make([]pgid, tx.db.freelist.count()) + freed := make(map[common.Pgid]bool) + all := make([]common.Pgid, tx.db.freelist.count()) tx.db.freelist.copyall(all) for _, id := range all { if freed[id] { @@ -48,12 +50,12 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) { } // Track every reachable page. - reachable := make(map[pgid]*page) + reachable := make(map[common.Pgid]*common.Page) reachable[0] = tx.page(0) // meta0 reachable[1] = tx.page(1) // meta1 - if tx.meta.freelist != pgidNoFreelist { - for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ { - reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist) + if tx.meta.Freelist() != common.PgidNoFreelist { + for i := uint32(0); i <= tx.page(tx.meta.Freelist()).Overflow(); i++ { + reachable[tx.meta.Freelist()+common.Pgid(i)] = tx.page(tx.meta.Freelist()) } } @@ -61,7 +63,7 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) { tx.checkBucket(&tx.root, reachable, freed, kvStringer, ch) // Ensure all pages below high water mark are either reachable or freed. - for i := pgid(0); i < tx.meta.pgid; i++ { + for i := common.Pgid(0); i < tx.meta.Pgid(); i++ { _, isReachable := reachable[i] if !isReachable && !freed[i] { ch <- fmt.Errorf("page %d: unreachable unfreed", int(i)) @@ -72,22 +74,22 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) { close(ch) } -func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, +func (tx *Tx) checkBucket(b *Bucket, reachable map[common.Pgid]*common.Page, freed map[common.Pgid]bool, kvStringer KVStringer, ch chan error) { // Ignore inline buckets. - if b.root == 0 { + if b.RootPage() == 0 { return } // Check every page used by this bucket. - b.tx.forEachPage(b.root, func(p *page, _ int, stack []pgid) { - if p.id > tx.meta.pgid { - ch <- fmt.Errorf("page %d: out of bounds: %d (stack: %v)", int(p.id), int(b.tx.meta.pgid), stack) + b.tx.forEachPage(b.RootPage(), func(p *common.Page, _ int, stack []common.Pgid) { + if p.Id() > tx.meta.Pgid() { + ch <- fmt.Errorf("page %d: out of bounds: %d (stack: %v)", int(p.Id()), int(b.tx.meta.Pgid()), stack) } // Ensure each page is only referenced once. - for i := pgid(0); i <= pgid(p.overflow); i++ { - var id = p.id + i + for i := common.Pgid(0); i <= common.Pgid(p.Overflow()); i++ { + var id = p.Id() + i if _, ok := reachable[id]; ok { ch <- fmt.Errorf("page %d: multiple references (stack: %v)", int(id), stack) } @@ -95,14 +97,14 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo } // We should only encounter un-freed leaf and branch pages. - if freed[p.id] { - ch <- fmt.Errorf("page %d: reachable freed", int(p.id)) - } else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 { - ch <- fmt.Errorf("page %d: invalid type: %s (stack: %v)", int(p.id), p.typ(), stack) + if freed[p.Id()] { + ch <- fmt.Errorf("page %d: reachable freed", int(p.Id())) + } else if (p.Flags()&common.BranchPageFlag) == 0 && (p.Flags()&common.LeafPageFlag) == 0 { + ch <- fmt.Errorf("page %d: invalid type: %s (stack: %v)", int(p.Id()), p.Typ(), stack) } }) - tx.recursivelyCheckPages(b.root, kvStringer.KeyToString, ch) + tx.recursivelyCheckPages(b.RootPage(), kvStringer.KeyToString, ch) // Check each bucket within this bucket. _ = b.ForEachBucket(func(k []byte) error { @@ -117,7 +119,7 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo // key order constraints: // - keys on pages must be sorted // - keys on children pages are between 2 consecutive keys on the parent's branch page). -func (tx *Tx) recursivelyCheckPages(pgId pgid, keyToString func([]byte) string, ch chan error) { +func (tx *Tx) recursivelyCheckPages(pgId common.Pgid, keyToString func([]byte) string, ch chan error) { tx.recursivelyCheckPagesInternal(pgId, nil, nil, nil, keyToString, ch) } @@ -127,36 +129,36 @@ func (tx *Tx) recursivelyCheckPages(pgId pgid, keyToString func([]byte) string, // - Are in right ordering relationship to their parents. // `pagesStack` is expected to contain IDs of pages from the tree root to `pgid` for the clean debugging message. func (tx *Tx) recursivelyCheckPagesInternal( - pgId pgid, minKeyClosed, maxKeyOpen []byte, pagesStack []pgid, + pgId common.Pgid, minKeyClosed, maxKeyOpen []byte, pagesStack []common.Pgid, keyToString func([]byte) string, ch chan error) (maxKeyInSubtree []byte) { p := tx.page(pgId) pagesStack = append(pagesStack, pgId) switch { - case p.flags&branchPageFlag != 0: + case p.Flags()&common.BranchPageFlag != 0: // For branch page we navigate ranges of all subpages. runningMin := minKeyClosed - for i := range p.branchPageElements() { - elem := p.branchPageElement(uint16(i)) - verifyKeyOrder(elem.pgid, "branch", i, elem.key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack) + for i := range p.BranchPageElements() { + elem := p.BranchPageElement(uint16(i)) + verifyKeyOrder(elem.Pgid(), "branch", i, elem.Key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack) maxKey := maxKeyOpen - if i < len(p.branchPageElements())-1 { - maxKey = p.branchPageElement(uint16(i + 1)).key() + if i < len(p.BranchPageElements())-1 { + maxKey = p.BranchPageElement(uint16(i + 1)).Key() } - maxKeyInSubtree = tx.recursivelyCheckPagesInternal(elem.pgid, elem.key(), maxKey, pagesStack, keyToString, ch) + maxKeyInSubtree = tx.recursivelyCheckPagesInternal(elem.Pgid(), elem.Key(), maxKey, pagesStack, keyToString, ch) runningMin = maxKeyInSubtree } return maxKeyInSubtree - case p.flags&leafPageFlag != 0: + case p.Flags()&common.LeafPageFlag != 0: runningMin := minKeyClosed - for i := range p.leafPageElements() { - elem := p.leafPageElement(uint16(i)) - verifyKeyOrder(pgId, "leaf", i, elem.key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack) - runningMin = elem.key() + for i := range p.LeafPageElements() { + elem := p.LeafPageElement(uint16(i)) + verifyKeyOrder(pgId, "leaf", i, elem.Key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack) + runningMin = elem.Key() } - if p.count > 0 { - return p.leafPageElement(p.count - 1).key() + if p.Count() > 0 { + return p.LeafPageElement(p.Count() - 1).Key() } default: ch <- fmt.Errorf("unexpected page type for pgId:%d", pgId) @@ -168,7 +170,7 @@ func (tx *Tx) recursivelyCheckPagesInternal( * verifyKeyOrder checks whether an entry with given #index on pgId (pageType: "branch|leaf") that has given "key", * is within range determined by (previousKey..maxKeyOpen) and reports found violations to the channel (ch). */ -func verifyKeyOrder(pgId pgid, pageType string, index int, key []byte, previousKey []byte, maxKeyOpen []byte, ch chan error, keyToString func([]byte) string, pagesStack []pgid) { +func verifyKeyOrder(pgId common.Pgid, pageType string, index int, key []byte, previousKey []byte, maxKeyOpen []byte, ch chan error, keyToString func([]byte) string, pagesStack []common.Pgid) { if index == 0 && previousKey != nil && compareKeys(previousKey, key) > 0 { ch <- fmt.Errorf("the first key[%d]=(hex)%s on %s page(%d) needs to be >= the key in the ancestor (%s). Stack: %v", index, keyToString(key), pageType, pgId, keyToString(previousKey), pagesStack) diff --git a/tx_test.go b/tx_test.go index fa8302d..44cbbf1 100644 --- a/tx_test.go +++ b/tx_test.go @@ -15,6 +15,7 @@ import ( bolt "go.etcd.io/bbolt" "go.etcd.io/bbolt/internal/btesting" + "go.etcd.io/bbolt/internal/common" ) // TestTx_Check_ReadOnly tests consistency checking on a ReadOnly database. @@ -84,7 +85,7 @@ func TestTx_Commit_ErrTxClosed(t *testing.T) { t.Fatal(err) } - if err := tx.Commit(); err != bolt.ErrTxClosed { + if err := tx.Commit(); err != common.ErrTxClosed { t.Fatalf("unexpected error: %s", err) } } @@ -101,7 +102,7 @@ func TestTx_Rollback_ErrTxClosed(t *testing.T) { if err := tx.Rollback(); err != nil { t.Fatal(err) } - if err := tx.Rollback(); err != bolt.ErrTxClosed { + if err := tx.Rollback(); err != common.ErrTxClosed { t.Fatalf("unexpected error: %s", err) } } @@ -113,7 +114,7 @@ func TestTx_Commit_ErrTxNotWritable(t *testing.T) { if err != nil { t.Fatal(err) } - if err := tx.Commit(); err != bolt.ErrTxNotWritable { + if err := tx.Commit(); err != common.ErrTxNotWritable { t.Fatal(err) } // Close the view transaction @@ -165,7 +166,7 @@ func TestTx_CreateBucket_ErrTxNotWritable(t *testing.T) { db := btesting.MustCreateDB(t) if err := db.View(func(tx *bolt.Tx) error { _, err := tx.CreateBucket([]byte("foo")) - if err != bolt.ErrTxNotWritable { + if err != common.ErrTxNotWritable { t.Fatalf("unexpected error: %s", err) } return nil @@ -185,7 +186,7 @@ func TestTx_CreateBucket_ErrTxClosed(t *testing.T) { t.Fatal(err) } - if _, err := tx.CreateBucket([]byte("foo")); err != bolt.ErrTxClosed { + if _, err := tx.CreateBucket([]byte("foo")); err != common.ErrTxClosed { t.Fatalf("unexpected error: %s", err) } } @@ -293,11 +294,11 @@ func TestTx_CreateBucketIfNotExists(t *testing.T) { func TestTx_CreateBucketIfNotExists_ErrBucketNameRequired(t *testing.T) { db := btesting.MustCreateDB(t) if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucketIfNotExists([]byte{}); err != bolt.ErrBucketNameRequired { + if _, err := tx.CreateBucketIfNotExists([]byte{}); err != common.ErrBucketNameRequired { t.Fatalf("unexpected error: %s", err) } - if _, err := tx.CreateBucketIfNotExists(nil); err != bolt.ErrBucketNameRequired { + if _, err := tx.CreateBucketIfNotExists(nil); err != common.ErrBucketNameRequired { t.Fatalf("unexpected error: %s", err) } @@ -323,7 +324,7 @@ func TestTx_CreateBucket_ErrBucketExists(t *testing.T) { // Create the same bucket again. if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != bolt.ErrBucketExists { + if _, err := tx.CreateBucket([]byte("widgets")); err != common.ErrBucketExists { t.Fatalf("unexpected error: %s", err) } return nil @@ -336,7 +337,7 @@ func TestTx_CreateBucket_ErrBucketExists(t *testing.T) { func TestTx_CreateBucket_ErrBucketNameRequired(t *testing.T) { db := btesting.MustCreateDB(t) if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket(nil); err != bolt.ErrBucketNameRequired { + if _, err := tx.CreateBucket(nil); err != common.ErrBucketNameRequired { t.Fatalf("unexpected error: %s", err) } return nil @@ -401,7 +402,7 @@ func TestTx_DeleteBucket_ErrTxClosed(t *testing.T) { if err := tx.Commit(); err != nil { t.Fatal(err) } - if err := tx.DeleteBucket([]byte("foo")); err != bolt.ErrTxClosed { + if err := tx.DeleteBucket([]byte("foo")); err != common.ErrTxClosed { t.Fatalf("unexpected error: %s", err) } } @@ -410,7 +411,7 @@ func TestTx_DeleteBucket_ErrTxClosed(t *testing.T) { func TestTx_DeleteBucket_ReadOnly(t *testing.T) { db := btesting.MustCreateDB(t) if err := db.View(func(tx *bolt.Tx) error { - if err := tx.DeleteBucket([]byte("foo")); err != bolt.ErrTxNotWritable { + if err := tx.DeleteBucket([]byte("foo")); err != common.ErrTxNotWritable { t.Fatalf("unexpected error: %s", err) } return nil @@ -423,7 +424,7 @@ func TestTx_DeleteBucket_ReadOnly(t *testing.T) { func TestTx_DeleteBucket_NotFound(t *testing.T) { db := btesting.MustCreateDB(t) if err := db.Update(func(tx *bolt.Tx) error { - if err := tx.DeleteBucket([]byte("widgets")); err != bolt.ErrBucketNotFound { + if err := tx.DeleteBucket([]byte("widgets")); err != common.ErrBucketNotFound { t.Fatalf("unexpected error: %s", err) } return nil diff --git a/unsafe.go b/unsafe.go deleted file mode 100644 index c0e5037..0000000 --- a/unsafe.go +++ /dev/null @@ -1,39 +0,0 @@ -package bbolt - -import ( - "reflect" - "unsafe" -) - -func unsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer { - return unsafe.Pointer(uintptr(base) + offset) -} - -func unsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer { - return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz) -} - -func unsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte { - // See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices - // - // This memory is not allocated from C, but it is unmanaged by Go's - // garbage collector and should behave similarly, and the compiler - // should produce similar code. Note that this conversion allows a - // subslice to begin after the base address, with an optional offset, - // while the URL above does not cover this case and only slices from - // index 0. However, the wiki never says that the address must be to - // the beginning of a C allocation (or even that malloc was used at - // all), so this is believed to be correct. - return (*[maxAllocSize]byte)(unsafeAdd(base, offset))[i:j:j] -} - -// unsafeSlice modifies the data, len, and cap of a slice variable pointed to by -// the slice parameter. This helper should be used over other direct -// manipulation of reflect.SliceHeader to prevent misuse, namely, converting -// from reflect.SliceHeader to a Go slice type. -func unsafeSlice(slice, data unsafe.Pointer, len int) { - s := (*reflect.SliceHeader)(slice) - s.Data = uintptr(data) - s.Cap = len - s.Len = len -}