mirror of https://github.com/etcd-io/bbolt.git
refactor both bolt and guts_cli based on the common package
Signed-off-by: Benjamin Wang <wachao@vmware.com>pull/407/head
parent
34595e7231
commit
ea511567eb
|
@ -2,20 +2,22 @@ package bbolt
|
|||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
func TestTx_allocatePageStats(t *testing.T) {
|
||||
f := newTestFreelist()
|
||||
ids := []pgid{2, 3}
|
||||
ids := []common.Pgid{2, 3}
|
||||
f.readIDs(ids)
|
||||
|
||||
tx := &Tx{
|
||||
db: &DB{
|
||||
freelist: f,
|
||||
pageSize: defaultPageSize,
|
||||
pageSize: common.DefaultPageSize,
|
||||
},
|
||||
meta: &meta{},
|
||||
pages: make(map[pgid]*page),
|
||||
meta: &common.Meta{},
|
||||
pages: make(map[common.Pgid]*common.Page),
|
||||
}
|
||||
|
||||
txStats := tx.Stats()
|
||||
|
|
|
@ -10,6 +10,8 @@ import (
|
|||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// flock acquires an advisory lock on a file descriptor.
|
||||
|
@ -36,7 +38,7 @@ func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
|||
|
||||
// If we timed out then return an error.
|
||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||
return ErrTimeout
|
||||
return common.ErrTimeout
|
||||
}
|
||||
|
||||
// Wait for a bit and try again.
|
||||
|
|
|
@ -8,6 +8,8 @@ import (
|
|||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// fdatasync flushes written data to a file descriptor.
|
||||
|
@ -42,7 +44,7 @@ func flock(db *DB, exclusive bool, timeout time.Duration) error {
|
|||
|
||||
// If we timed oumercit then return an error.
|
||||
if timeout != 0 && time.Since(t) > timeout-flockRetryTimeout {
|
||||
return ErrTimeout
|
||||
return common.ErrTimeout
|
||||
}
|
||||
|
||||
// Wait for a bit and try again.
|
||||
|
@ -93,7 +95,7 @@ func mmap(db *DB, sz int) error {
|
|||
}
|
||||
|
||||
// Convert to a byte array.
|
||||
db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr)))
|
||||
db.data = (*[maxMapSize]byte)(unsafe.Pointer(addr))
|
||||
db.datasz = sz
|
||||
|
||||
return nil
|
||||
|
|
237
bucket.go
237
bucket.go
|
@ -4,6 +4,8 @@ import (
|
|||
"bytes"
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -14,8 +16,6 @@ const (
|
|||
MaxValueSize = (1 << 31) - 2
|
||||
)
|
||||
|
||||
const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
|
||||
|
||||
const (
|
||||
minFillPercent = 0.1
|
||||
maxFillPercent = 1.0
|
||||
|
@ -27,12 +27,12 @@ const DefaultFillPercent = 0.5
|
|||
|
||||
// Bucket represents a collection of key/value pairs inside the database.
|
||||
type Bucket struct {
|
||||
*bucket
|
||||
tx *Tx // the associated transaction
|
||||
buckets map[string]*Bucket // subbucket cache
|
||||
page *page // inline page reference
|
||||
rootNode *node // materialized node for the root page.
|
||||
nodes map[pgid]*node // node cache
|
||||
*common.InBucket
|
||||
tx *Tx // the associated transaction
|
||||
buckets map[string]*Bucket // subbucket cache
|
||||
page *common.Page // inline page reference
|
||||
rootNode *node // materialized node for the root page.
|
||||
nodes map[common.Pgid]*node // node cache
|
||||
|
||||
// Sets the threshold for filling nodes when they split. By default,
|
||||
// the bucket will fill to 50% but it can be useful to increase this
|
||||
|
@ -42,21 +42,12 @@ type Bucket struct {
|
|||
FillPercent float64
|
||||
}
|
||||
|
||||
// bucket represents the on-file representation of a bucket.
|
||||
// This is stored as the "value" of a bucket key. If the bucket is small enough,
|
||||
// then its root page can be stored inline in the "value", after the bucket
|
||||
// header. In the case of inline buckets, the "root" will be 0.
|
||||
type bucket struct {
|
||||
root pgid // page id of the bucket's root-level page
|
||||
sequence uint64 // monotonically incrementing, used by NextSequence()
|
||||
}
|
||||
|
||||
// newBucket returns a new bucket associated with a transaction.
|
||||
func newBucket(tx *Tx) Bucket {
|
||||
var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
|
||||
if tx.writable {
|
||||
b.buckets = make(map[string]*Bucket)
|
||||
b.nodes = make(map[pgid]*node)
|
||||
b.nodes = make(map[common.Pgid]*node)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
@ -67,8 +58,8 @@ func (b *Bucket) Tx() *Tx {
|
|||
}
|
||||
|
||||
// Root returns the root of the bucket.
|
||||
func (b *Bucket) Root() pgid {
|
||||
return b.root
|
||||
func (b *Bucket) Root() common.Pgid {
|
||||
return b.RootPage()
|
||||
}
|
||||
|
||||
// Writable returns whether the bucket is writable.
|
||||
|
@ -105,7 +96,7 @@ func (b *Bucket) Bucket(name []byte) *Bucket {
|
|||
k, v, flags := c.seek(name)
|
||||
|
||||
// Return nil if the key doesn't exist or it is not a bucket.
|
||||
if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
|
||||
if !bytes.Equal(name, k) || (flags&common.BucketLeafFlag) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -125,8 +116,8 @@ func (b *Bucket) openBucket(value []byte) *Bucket {
|
|||
|
||||
// Unaligned access requires a copy to be made.
|
||||
const unalignedMask = unsafe.Alignof(struct {
|
||||
bucket
|
||||
page
|
||||
common.InBucket
|
||||
common.Page
|
||||
}{}) - 1
|
||||
unaligned := uintptr(unsafe.Pointer(&value[0]))&unalignedMask != 0
|
||||
if unaligned {
|
||||
|
@ -136,15 +127,15 @@ func (b *Bucket) openBucket(value []byte) *Bucket {
|
|||
// If this is a writable transaction then we need to copy the bucket entry.
|
||||
// Read-only transactions can point directly at the mmap entry.
|
||||
if b.tx.writable && !unaligned {
|
||||
child.bucket = &bucket{}
|
||||
*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
|
||||
child.InBucket = &common.InBucket{}
|
||||
*child.InBucket = *(*common.InBucket)(unsafe.Pointer(&value[0]))
|
||||
} else {
|
||||
child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
||||
child.InBucket = (*common.InBucket)(unsafe.Pointer(&value[0]))
|
||||
}
|
||||
|
||||
// Save a reference to the inline page if the bucket is inline.
|
||||
if child.root == 0 {
|
||||
child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
|
||||
if child.RootPage() == 0 {
|
||||
child.page = (*common.Page)(unsafe.Pointer(&value[common.BucketHeaderSize]))
|
||||
}
|
||||
|
||||
return &child
|
||||
|
@ -155,11 +146,11 @@ func (b *Bucket) openBucket(value []byte) *Bucket {
|
|||
// The bucket instance is only valid for the lifetime of the transaction.
|
||||
func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
|
||||
if b.tx.db == nil {
|
||||
return nil, ErrTxClosed
|
||||
return nil, common.ErrTxClosed
|
||||
} else if !b.tx.writable {
|
||||
return nil, ErrTxNotWritable
|
||||
return nil, common.ErrTxNotWritable
|
||||
} else if len(key) == 0 {
|
||||
return nil, ErrBucketNameRequired
|
||||
return nil, common.ErrBucketNameRequired
|
||||
}
|
||||
|
||||
// Move cursor to correct position.
|
||||
|
@ -168,15 +159,15 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
|
|||
|
||||
// Return an error if there is an existing key.
|
||||
if bytes.Equal(key, k) {
|
||||
if (flags & bucketLeafFlag) != 0 {
|
||||
return nil, ErrBucketExists
|
||||
if (flags & common.BucketLeafFlag) != 0 {
|
||||
return nil, common.ErrBucketExists
|
||||
}
|
||||
return nil, ErrIncompatibleValue
|
||||
return nil, common.ErrIncompatibleValue
|
||||
}
|
||||
|
||||
// Create empty, inline bucket.
|
||||
var bucket = Bucket{
|
||||
bucket: &bucket{},
|
||||
InBucket: &common.InBucket{},
|
||||
rootNode: &node{isLeaf: true},
|
||||
FillPercent: DefaultFillPercent,
|
||||
}
|
||||
|
@ -184,7 +175,7 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
|
|||
|
||||
// Insert into node.
|
||||
key = cloneBytes(key)
|
||||
c.node().put(key, key, value, 0, bucketLeafFlag)
|
||||
c.node().put(key, key, value, 0, common.BucketLeafFlag)
|
||||
|
||||
// Since subbuckets are not allowed on inline buckets, we need to
|
||||
// dereference the inline page, if it exists. This will cause the bucket
|
||||
|
@ -199,7 +190,7 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
|
|||
// The bucket instance is only valid for the lifetime of the transaction.
|
||||
func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
|
||||
child, err := b.CreateBucket(key)
|
||||
if err == ErrBucketExists {
|
||||
if err == common.ErrBucketExists {
|
||||
return b.Bucket(key), nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
|
@ -211,9 +202,9 @@ func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
|
|||
// Returns an error if the bucket does not exist, or if the key represents a non-bucket value.
|
||||
func (b *Bucket) DeleteBucket(key []byte) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
return common.ErrTxClosed
|
||||
} else if !b.Writable() {
|
||||
return ErrTxNotWritable
|
||||
return common.ErrTxNotWritable
|
||||
}
|
||||
|
||||
// Move cursor to correct position.
|
||||
|
@ -222,9 +213,9 @@ func (b *Bucket) DeleteBucket(key []byte) error {
|
|||
|
||||
// Return an error if bucket doesn't exist or is not a bucket.
|
||||
if !bytes.Equal(key, k) {
|
||||
return ErrBucketNotFound
|
||||
} else if (flags & bucketLeafFlag) == 0 {
|
||||
return ErrIncompatibleValue
|
||||
return common.ErrBucketNotFound
|
||||
} else if (flags & common.BucketLeafFlag) == 0 {
|
||||
return common.ErrIncompatibleValue
|
||||
}
|
||||
|
||||
// Recursively delete all child buckets.
|
||||
|
@ -260,7 +251,7 @@ func (b *Bucket) Get(key []byte) []byte {
|
|||
k, v, flags := b.Cursor().seek(key)
|
||||
|
||||
// Return nil if this is a bucket.
|
||||
if (flags & bucketLeafFlag) != 0 {
|
||||
if (flags & common.BucketLeafFlag) != 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -277,15 +268,15 @@ func (b *Bucket) Get(key []byte) []byte {
|
|||
// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
|
||||
func (b *Bucket) Put(key []byte, value []byte) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
return common.ErrTxClosed
|
||||
} else if !b.Writable() {
|
||||
return ErrTxNotWritable
|
||||
return common.ErrTxNotWritable
|
||||
} else if len(key) == 0 {
|
||||
return ErrKeyRequired
|
||||
return common.ErrKeyRequired
|
||||
} else if len(key) > MaxKeySize {
|
||||
return ErrKeyTooLarge
|
||||
return common.ErrKeyTooLarge
|
||||
} else if int64(len(value)) > MaxValueSize {
|
||||
return ErrValueTooLarge
|
||||
return common.ErrValueTooLarge
|
||||
}
|
||||
|
||||
// Move cursor to correct position.
|
||||
|
@ -293,8 +284,8 @@ func (b *Bucket) Put(key []byte, value []byte) error {
|
|||
k, _, flags := c.seek(key)
|
||||
|
||||
// Return an error if there is an existing key with a bucket value.
|
||||
if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
|
||||
return ErrIncompatibleValue
|
||||
if bytes.Equal(key, k) && (flags&common.BucketLeafFlag) != 0 {
|
||||
return common.ErrIncompatibleValue
|
||||
}
|
||||
|
||||
// Insert into node.
|
||||
|
@ -309,9 +300,9 @@ func (b *Bucket) Put(key []byte, value []byte) error {
|
|||
// Returns an error if the bucket was created from a read-only transaction.
|
||||
func (b *Bucket) Delete(key []byte) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
return common.ErrTxClosed
|
||||
} else if !b.Writable() {
|
||||
return ErrTxNotWritable
|
||||
return common.ErrTxNotWritable
|
||||
}
|
||||
|
||||
// Move cursor to correct position.
|
||||
|
@ -324,8 +315,8 @@ func (b *Bucket) Delete(key []byte) error {
|
|||
}
|
||||
|
||||
// Return an error if there is already existing bucket value.
|
||||
if (flags & bucketLeafFlag) != 0 {
|
||||
return ErrIncompatibleValue
|
||||
if (flags & common.BucketLeafFlag) != 0 {
|
||||
return common.ErrIncompatibleValue
|
||||
}
|
||||
|
||||
// Delete the node if we have a matching key.
|
||||
|
@ -335,44 +326,46 @@ func (b *Bucket) Delete(key []byte) error {
|
|||
}
|
||||
|
||||
// Sequence returns the current integer for the bucket without incrementing it.
|
||||
func (b *Bucket) Sequence() uint64 { return b.bucket.sequence }
|
||||
func (b *Bucket) Sequence() uint64 {
|
||||
return b.InSequence()
|
||||
}
|
||||
|
||||
// SetSequence updates the sequence number for the bucket.
|
||||
func (b *Bucket) SetSequence(v uint64) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
return common.ErrTxClosed
|
||||
} else if !b.Writable() {
|
||||
return ErrTxNotWritable
|
||||
return common.ErrTxNotWritable
|
||||
}
|
||||
|
||||
// Materialize the root node if it hasn't been already so that the
|
||||
// bucket will be saved during commit.
|
||||
if b.rootNode == nil {
|
||||
_ = b.node(b.root, nil)
|
||||
_ = b.node(b.RootPage(), nil)
|
||||
}
|
||||
|
||||
// Set the sequence.
|
||||
b.bucket.sequence = v
|
||||
b.SetInSequence(v)
|
||||
return nil
|
||||
}
|
||||
|
||||
// NextSequence returns an autoincrementing integer for the bucket.
|
||||
func (b *Bucket) NextSequence() (uint64, error) {
|
||||
if b.tx.db == nil {
|
||||
return 0, ErrTxClosed
|
||||
return 0, common.ErrTxClosed
|
||||
} else if !b.Writable() {
|
||||
return 0, ErrTxNotWritable
|
||||
return 0, common.ErrTxNotWritable
|
||||
}
|
||||
|
||||
// Materialize the root node if it hasn't been already so that the
|
||||
// bucket will be saved during commit.
|
||||
if b.rootNode == nil {
|
||||
_ = b.node(b.root, nil)
|
||||
_ = b.node(b.RootPage(), nil)
|
||||
}
|
||||
|
||||
// Increment and return the sequence.
|
||||
b.bucket.sequence++
|
||||
return b.bucket.sequence, nil
|
||||
b.IncSequence()
|
||||
return b.Sequence(), nil
|
||||
}
|
||||
|
||||
// ForEach executes a function for each key/value pair in a bucket.
|
||||
|
@ -382,7 +375,7 @@ func (b *Bucket) NextSequence() (uint64, error) {
|
|||
// the bucket; this will result in undefined behavior.
|
||||
func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
return common.ErrTxClosed
|
||||
}
|
||||
c := b.Cursor()
|
||||
for k, v := c.First(); k != nil; k, v = c.Next() {
|
||||
|
@ -395,11 +388,11 @@ func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
|
|||
|
||||
func (b *Bucket) ForEachBucket(fn func(k []byte) error) error {
|
||||
if b.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
return common.ErrTxClosed
|
||||
}
|
||||
c := b.Cursor()
|
||||
for k, _, flags := c.first(); k != nil; k, _, flags = c.next() {
|
||||
if flags&bucketLeafFlag != 0 {
|
||||
if flags&common.BucketLeafFlag != 0 {
|
||||
if err := fn(k); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -413,64 +406,64 @@ func (b *Bucket) Stats() BucketStats {
|
|||
var s, subStats BucketStats
|
||||
pageSize := b.tx.db.pageSize
|
||||
s.BucketN += 1
|
||||
if b.root == 0 {
|
||||
if b.RootPage() == 0 {
|
||||
s.InlineBucketN += 1
|
||||
}
|
||||
b.forEachPage(func(p *page, depth int, pgstack []pgid) {
|
||||
if (p.flags & leafPageFlag) != 0 {
|
||||
s.KeyN += int(p.count)
|
||||
b.forEachPage(func(p *common.Page, depth int, pgstack []common.Pgid) {
|
||||
if (p.Flags() & common.LeafPageFlag) != 0 {
|
||||
s.KeyN += int(p.Count())
|
||||
|
||||
// used totals the used bytes for the page
|
||||
used := pageHeaderSize
|
||||
used := common.PageHeaderSize
|
||||
|
||||
if p.count != 0 {
|
||||
if p.Count() != 0 {
|
||||
// If page has any elements, add all element headers.
|
||||
used += leafPageElementSize * uintptr(p.count-1)
|
||||
used += common.LeafPageElementSize * uintptr(p.Count()-1)
|
||||
|
||||
// Add all element key, value sizes.
|
||||
// The computation takes advantage of the fact that the position
|
||||
// of the last element's key/value equals to the total of the sizes
|
||||
// of all previous elements' keys and values.
|
||||
// It also includes the last element's header.
|
||||
lastElement := p.leafPageElement(p.count - 1)
|
||||
used += uintptr(lastElement.pos + lastElement.ksize + lastElement.vsize)
|
||||
lastElement := p.LeafPageElement(p.Count() - 1)
|
||||
used += uintptr(lastElement.Pos() + lastElement.Ksize() + lastElement.Vsize())
|
||||
}
|
||||
|
||||
if b.root == 0 {
|
||||
if b.RootPage() == 0 {
|
||||
// For inlined bucket just update the inline stats
|
||||
s.InlineBucketInuse += int(used)
|
||||
} else {
|
||||
// For non-inlined bucket update all the leaf stats
|
||||
s.LeafPageN++
|
||||
s.LeafInuse += int(used)
|
||||
s.LeafOverflowN += int(p.overflow)
|
||||
s.LeafOverflowN += int(p.Overflow())
|
||||
|
||||
// Collect stats from sub-buckets.
|
||||
// Do that by iterating over all element headers
|
||||
// looking for the ones with the bucketLeafFlag.
|
||||
for i := uint16(0); i < p.count; i++ {
|
||||
e := p.leafPageElement(i)
|
||||
if (e.flags & bucketLeafFlag) != 0 {
|
||||
for i := uint16(0); i < p.Count(); i++ {
|
||||
e := p.LeafPageElement(i)
|
||||
if (e.Flags() & common.BucketLeafFlag) != 0 {
|
||||
// For any bucket element, open the element value
|
||||
// and recursively call Stats on the contained bucket.
|
||||
subStats.Add(b.openBucket(e.value()).Stats())
|
||||
subStats.Add(b.openBucket(e.Value()).Stats())
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (p.flags & branchPageFlag) != 0 {
|
||||
} else if (p.Flags() & common.BranchPageFlag) != 0 {
|
||||
s.BranchPageN++
|
||||
lastElement := p.branchPageElement(p.count - 1)
|
||||
lastElement := p.BranchPageElement(p.Count() - 1)
|
||||
|
||||
// used totals the used bytes for the page
|
||||
// Add header and all element headers.
|
||||
used := pageHeaderSize + (branchPageElementSize * uintptr(p.count-1))
|
||||
used := common.PageHeaderSize + (common.BranchPageElementSize * uintptr(p.Count()-1))
|
||||
|
||||
// Add size of all keys and values.
|
||||
// Again, use the fact that last element's position equals to
|
||||
// the total of key, value sizes of all previous elements.
|
||||
used += uintptr(lastElement.pos + lastElement.ksize)
|
||||
used += uintptr(lastElement.Pos() + lastElement.Ksize())
|
||||
s.BranchInuse += int(used)
|
||||
s.BranchOverflowN += int(p.overflow)
|
||||
s.BranchOverflowN += int(p.Overflow())
|
||||
}
|
||||
|
||||
// Keep track of maximum page depth.
|
||||
|
@ -491,29 +484,29 @@ func (b *Bucket) Stats() BucketStats {
|
|||
}
|
||||
|
||||
// forEachPage iterates over every page in a bucket, including inline pages.
|
||||
func (b *Bucket) forEachPage(fn func(*page, int, []pgid)) {
|
||||
func (b *Bucket) forEachPage(fn func(*common.Page, int, []common.Pgid)) {
|
||||
// If we have an inline page then just use that.
|
||||
if b.page != nil {
|
||||
fn(b.page, 0, []pgid{b.root})
|
||||
fn(b.page, 0, []common.Pgid{b.RootPage()})
|
||||
return
|
||||
}
|
||||
|
||||
// Otherwise traverse the page hierarchy.
|
||||
b.tx.forEachPage(b.root, fn)
|
||||
b.tx.forEachPage(b.RootPage(), fn)
|
||||
}
|
||||
|
||||
// forEachPageNode iterates over every page (or node) in a bucket.
|
||||
// This also includes inline pages.
|
||||
func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
|
||||
func (b *Bucket) forEachPageNode(fn func(*common.Page, *node, int)) {
|
||||
// If we have an inline page or root node then just use that.
|
||||
if b.page != nil {
|
||||
fn(b.page, nil, 0)
|
||||
return
|
||||
}
|
||||
b._forEachPageNode(b.root, 0, fn)
|
||||
b._forEachPageNode(b.RootPage(), 0, fn)
|
||||
}
|
||||
|
||||
func (b *Bucket) _forEachPageNode(pgId pgid, depth int, fn func(*page, *node, int)) {
|
||||
func (b *Bucket) _forEachPageNode(pgId common.Pgid, depth int, fn func(*common.Page, *node, int)) {
|
||||
var p, n = b.pageNode(pgId)
|
||||
|
||||
// Execute function.
|
||||
|
@ -521,10 +514,10 @@ func (b *Bucket) _forEachPageNode(pgId pgid, depth int, fn func(*page, *node, in
|
|||
|
||||
// Recursively loop over children.
|
||||
if p != nil {
|
||||
if (p.flags & branchPageFlag) != 0 {
|
||||
for i := 0; i < int(p.count); i++ {
|
||||
elem := p.branchPageElement(uint16(i))
|
||||
b._forEachPageNode(elem.pgid, depth+1, fn)
|
||||
if (p.Flags() & common.BranchPageFlag) != 0 {
|
||||
for i := 0; i < int(p.Count()); i++ {
|
||||
elem := p.BranchPageElement(uint16(i))
|
||||
b._forEachPageNode(elem.Pgid(), depth+1, fn)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -553,9 +546,9 @@ func (b *Bucket) spill() error {
|
|||
}
|
||||
|
||||
// Update the child bucket header in this bucket.
|
||||
value = make([]byte, unsafe.Sizeof(bucket{}))
|
||||
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
||||
*bucket = *child.bucket
|
||||
value = make([]byte, unsafe.Sizeof(common.InBucket{}))
|
||||
var bucket = (*common.InBucket)(unsafe.Pointer(&value[0]))
|
||||
*bucket = *child.InBucket
|
||||
}
|
||||
|
||||
// Skip writing the bucket if there are no materialized nodes.
|
||||
|
@ -569,10 +562,10 @@ func (b *Bucket) spill() error {
|
|||
if !bytes.Equal([]byte(name), k) {
|
||||
panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
|
||||
}
|
||||
if flags&bucketLeafFlag == 0 {
|
||||
if flags&common.BucketLeafFlag == 0 {
|
||||
panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
|
||||
}
|
||||
c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
|
||||
c.node().put([]byte(name), []byte(name), value, 0, common.BucketLeafFlag)
|
||||
}
|
||||
|
||||
// Ignore if there's not a materialized root node.
|
||||
|
@ -587,16 +580,16 @@ func (b *Bucket) spill() error {
|
|||
b.rootNode = b.rootNode.root()
|
||||
|
||||
// Update the root node for this bucket.
|
||||
if b.rootNode.pgid >= b.tx.meta.pgid {
|
||||
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
|
||||
if b.rootNode.pgid >= b.tx.meta.Pgid() {
|
||||
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.Pgid()))
|
||||
}
|
||||
b.root = b.rootNode.pgid
|
||||
b.SetRootPage(b.rootNode.pgid)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// inlineable returns true if a bucket is small enough to be written inline
|
||||
// and if it contains no subbuckets. Otherwise returns false.
|
||||
// and if it contains no subbuckets. Otherwise, returns false.
|
||||
func (b *Bucket) inlineable() bool {
|
||||
var n = b.rootNode
|
||||
|
||||
|
@ -607,11 +600,11 @@ func (b *Bucket) inlineable() bool {
|
|||
|
||||
// Bucket is not inlineable if it contains subbuckets or if it goes beyond
|
||||
// our threshold for inline bucket size.
|
||||
var size = pageHeaderSize
|
||||
var size = common.PageHeaderSize
|
||||
for _, inode := range n.inodes {
|
||||
size += leafPageElementSize + uintptr(len(inode.key)) + uintptr(len(inode.value))
|
||||
size += common.LeafPageElementSize + uintptr(len(inode.key)) + uintptr(len(inode.value))
|
||||
|
||||
if inode.flags&bucketLeafFlag != 0 {
|
||||
if inode.flags&common.BucketLeafFlag != 0 {
|
||||
return false
|
||||
} else if size > b.maxInlineBucketSize() {
|
||||
return false
|
||||
|
@ -630,14 +623,14 @@ func (b *Bucket) maxInlineBucketSize() uintptr {
|
|||
func (b *Bucket) write() []byte {
|
||||
// Allocate the appropriate size.
|
||||
var n = b.rootNode
|
||||
var value = make([]byte, bucketHeaderSize+n.size())
|
||||
var value = make([]byte, common.BucketHeaderSize+n.size())
|
||||
|
||||
// Write a bucket header.
|
||||
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
|
||||
*bucket = *b.bucket
|
||||
var bucket = (*common.InBucket)(unsafe.Pointer(&value[0]))
|
||||
*bucket = *b.InBucket
|
||||
|
||||
// Convert byte slice to a fake page and write the root node.
|
||||
var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
|
||||
var p = (*common.Page)(unsafe.Pointer(&value[common.BucketHeaderSize]))
|
||||
n.write(p)
|
||||
|
||||
return value
|
||||
|
@ -654,8 +647,8 @@ func (b *Bucket) rebalance() {
|
|||
}
|
||||
|
||||
// node creates a node from a page and associates it with a given parent.
|
||||
func (b *Bucket) node(pgId pgid, parent *node) *node {
|
||||
_assert(b.nodes != nil, "nodes map expected")
|
||||
func (b *Bucket) node(pgId common.Pgid, parent *node) *node {
|
||||
common.Assert(b.nodes != nil, "nodes map expected")
|
||||
|
||||
// Retrieve node if it's already been created.
|
||||
if n := b.nodes[pgId]; n != nil {
|
||||
|
@ -688,19 +681,19 @@ func (b *Bucket) node(pgId pgid, parent *node) *node {
|
|||
|
||||
// free recursively frees all pages in the bucket.
|
||||
func (b *Bucket) free() {
|
||||
if b.root == 0 {
|
||||
if b.RootPage() == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
var tx = b.tx
|
||||
b.forEachPageNode(func(p *page, n *node, _ int) {
|
||||
b.forEachPageNode(func(p *common.Page, n *node, _ int) {
|
||||
if p != nil {
|
||||
tx.db.freelist.free(tx.meta.txid, p)
|
||||
tx.db.freelist.free(tx.meta.Txid(), p)
|
||||
} else {
|
||||
n.free()
|
||||
}
|
||||
})
|
||||
b.root = 0
|
||||
b.SetRootPage(0)
|
||||
}
|
||||
|
||||
// dereference removes all references to the old mmap.
|
||||
|
@ -715,11 +708,11 @@ func (b *Bucket) dereference() {
|
|||
}
|
||||
|
||||
// pageNode returns the in-memory node, if it exists.
|
||||
// Otherwise returns the underlying page.
|
||||
func (b *Bucket) pageNode(id pgid) (*page, *node) {
|
||||
// Otherwise, returns the underlying page.
|
||||
func (b *Bucket) pageNode(id common.Pgid) (*common.Page, *node) {
|
||||
// Inline buckets have a fake page embedded in their value so treat them
|
||||
// differently. We'll return the rootNode (if available) or the fake page.
|
||||
if b.root == 0 {
|
||||
if b.RootPage() == 0 {
|
||||
if id != 0 {
|
||||
panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
"go.etcd.io/bbolt/internal/btesting"
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// Ensure that a bucket that gets a non-existent key returns nil.
|
||||
|
@ -246,7 +247,7 @@ func TestBucket_Put_IncompatibleValue(t *testing.T) {
|
|||
if _, err := tx.Bucket([]byte("widgets")).CreateBucket([]byte("foo")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := b0.Put([]byte("foo"), []byte("bar")); err != bolt.ErrIncompatibleValue {
|
||||
if err := b0.Put([]byte("foo"), []byte("bar")); err != common.ErrIncompatibleValue {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -272,7 +273,7 @@ func TestBucket_Put_Closed(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := b.Put([]byte("foo"), []byte("bar")); err != bolt.ErrTxClosed {
|
||||
if err := b.Put([]byte("foo"), []byte("bar")); err != common.ErrTxClosed {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -292,7 +293,7 @@ func TestBucket_Put_ReadOnly(t *testing.T) {
|
|||
|
||||
if err := db.View(func(tx *bolt.Tx) error {
|
||||
b := tx.Bucket([]byte("widgets"))
|
||||
if err := b.Put([]byte("foo"), []byte("bar")); err != bolt.ErrTxNotWritable {
|
||||
if err := b.Put([]byte("foo"), []byte("bar")); err != common.ErrTxNotWritable {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -560,7 +561,7 @@ func TestBucket_Delete_Bucket(t *testing.T) {
|
|||
if _, err := b.CreateBucket([]byte("foo")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := b.Delete([]byte("foo")); err != bolt.ErrIncompatibleValue {
|
||||
if err := b.Delete([]byte("foo")); err != common.ErrIncompatibleValue {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -583,7 +584,7 @@ func TestBucket_Delete_ReadOnly(t *testing.T) {
|
|||
}
|
||||
|
||||
if err := db.View(func(tx *bolt.Tx) error {
|
||||
if err := tx.Bucket([]byte("widgets")).Delete([]byte("foo")); err != bolt.ErrTxNotWritable {
|
||||
if err := tx.Bucket([]byte("widgets")).Delete([]byte("foo")); err != common.ErrTxNotWritable {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -609,7 +610,7 @@ func TestBucket_Delete_Closed(t *testing.T) {
|
|||
if err := tx.Rollback(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := b.Delete([]byte("foo")); err != bolt.ErrTxClosed {
|
||||
if err := b.Delete([]byte("foo")); err != common.ErrTxClosed {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -780,7 +781,7 @@ func TestBucket_CreateBucket_IncompatibleValue(t *testing.T) {
|
|||
if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := widgets.CreateBucket([]byte("foo")); err != bolt.ErrIncompatibleValue {
|
||||
if _, err := widgets.CreateBucket([]byte("foo")); err != common.ErrIncompatibleValue {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -801,7 +802,7 @@ func TestBucket_DeleteBucket_IncompatibleValue(t *testing.T) {
|
|||
if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := tx.Bucket([]byte("widgets")).DeleteBucket([]byte("foo")); err != bolt.ErrIncompatibleValue {
|
||||
if err := tx.Bucket([]byte("widgets")).DeleteBucket([]byte("foo")); err != common.ErrIncompatibleValue {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -943,7 +944,7 @@ func TestBucket_NextSequence_ReadOnly(t *testing.T) {
|
|||
|
||||
if err := db.View(func(tx *bolt.Tx) error {
|
||||
_, err := tx.Bucket([]byte("widgets")).NextSequence()
|
||||
if err != bolt.ErrTxNotWritable {
|
||||
if err != common.ErrTxNotWritable {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -966,7 +967,7 @@ func TestBucket_NextSequence_Closed(t *testing.T) {
|
|||
if err := tx.Rollback(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := b.NextSequence(); err != bolt.ErrTxClosed {
|
||||
if _, err := b.NextSequence(); err != common.ErrTxClosed {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
@ -1158,7 +1159,7 @@ func TestBucket_ForEach_Closed(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := b.ForEach(func(k, v []byte) error { return nil }); err != bolt.ErrTxClosed {
|
||||
if err := b.ForEach(func(k, v []byte) error { return nil }); err != common.ErrTxClosed {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -1172,10 +1173,10 @@ func TestBucket_Put_EmptyKey(t *testing.T) {
|
|||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := b.Put([]byte(""), []byte("bar")); err != bolt.ErrKeyRequired {
|
||||
if err := b.Put([]byte(""), []byte("bar")); err != common.ErrKeyRequired {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if err := b.Put(nil, []byte("bar")); err != bolt.ErrKeyRequired {
|
||||
if err := b.Put(nil, []byte("bar")); err != common.ErrKeyRequired {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -1192,7 +1193,7 @@ func TestBucket_Put_KeyTooLarge(t *testing.T) {
|
|||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := b.Put(make([]byte, 32769), []byte("bar")); err != bolt.ErrKeyTooLarge {
|
||||
if err := b.Put(make([]byte, 32769), []byte("bar")); err != common.ErrKeyTooLarge {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -1215,7 +1216,7 @@ func TestBucket_Put_ValueTooLarge(t *testing.T) {
|
|||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := b.Put([]byte("foo"), make([]byte, bolt.MaxValueSize+1)); err != bolt.ErrValueTooLarge {
|
||||
if err := b.Put([]byte("foo"), make([]byte, bolt.MaxValueSize+1)); err != common.ErrValueTooLarge {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
|
|
@ -18,11 +18,10 @@ import (
|
|||
"time"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
"unsafe"
|
||||
|
||||
"go.etcd.io/bbolt/internal/guts_cli"
|
||||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
"go.etcd.io/bbolt/internal/guts_cli"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -52,12 +51,6 @@ var (
|
|||
// ErrBucketRequired is returned when a bucket is not specified.
|
||||
ErrBucketRequired = errors.New("bucket required")
|
||||
|
||||
// ErrBucketNotFound is returned when a bucket is not found.
|
||||
ErrBucketNotFound = errors.New("bucket not found")
|
||||
|
||||
// ErrKeyRequired is returned when a key is not specified.
|
||||
ErrKeyRequired = errors.New("key required")
|
||||
|
||||
// ErrKeyNotFound is returned when a key is not found.
|
||||
ErrKeyNotFound = errors.New("key not found")
|
||||
)
|
||||
|
@ -509,16 +502,16 @@ func (cmd *pageItemCommand) Run(args ...string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// leafPageElement retrieves a leaf page element.
|
||||
func (cmd *pageItemCommand) leafPageElement(pageBytes []byte, index uint16) (*guts_cli.LeafPageElement, error) {
|
||||
p := (*guts_cli.Page)(unsafe.Pointer(&pageBytes[0]))
|
||||
func (cmd *pageItemCommand) validateLeafPage(pageBytes []byte, index uint16) (*common.Page, error) {
|
||||
p := common.LoadPage(pageBytes)
|
||||
if index >= p.Count() {
|
||||
return nil, fmt.Errorf("leafPageElement: expected item index less than %d, but got %d.", p.Count(), index)
|
||||
return nil, fmt.Errorf("leafPageElement: expected item index less than %d, but got %d", p.Count(), index)
|
||||
}
|
||||
if p.Type() != "leaf" {
|
||||
return nil, fmt.Errorf("leafPageElement: expected page type of 'leaf', but got '%s'", p.Type())
|
||||
if p.Typ() != "leaf" {
|
||||
return nil, fmt.Errorf("leafPageElement: expected page type of 'leaf', but got '%s'", p.Typ())
|
||||
}
|
||||
return p.LeafPageElement(index), nil
|
||||
|
||||
return p, nil
|
||||
}
|
||||
|
||||
const FORMAT_MODES = "auto|ascii-encoded|hex|bytes|redacted"
|
||||
|
@ -568,19 +561,21 @@ func writelnBytes(w io.Writer, b []byte, format string) error {
|
|||
|
||||
// PrintLeafItemKey writes the bytes of a leaf element's key.
|
||||
func (cmd *pageItemCommand) PrintLeafItemKey(w io.Writer, pageBytes []byte, index uint16, format string) error {
|
||||
e, err := cmd.leafPageElement(pageBytes, index)
|
||||
p, err := cmd.validateLeafPage(pageBytes, index)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e := p.LeafPageElement(index)
|
||||
return writelnBytes(w, e.Key(), format)
|
||||
}
|
||||
|
||||
// PrintLeafItemKey writes the bytes of a leaf element's value.
|
||||
// PrintLeafItemValue writes the bytes of a leaf element's value.
|
||||
func (cmd *pageItemCommand) PrintLeafItemValue(w io.Writer, pageBytes []byte, index uint16, format string) error {
|
||||
e, err := cmd.leafPageElement(pageBytes, index)
|
||||
p, err := cmd.validateLeafPage(pageBytes, index)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e := p.LeafPageElement(index)
|
||||
return writelnBytes(w, e.Value(), format)
|
||||
}
|
||||
|
||||
|
@ -931,12 +926,12 @@ func (cmd *keysCommand) Run(args ...string) error {
|
|||
// Find bucket.
|
||||
var lastbucket *bolt.Bucket = tx.Bucket([]byte(buckets[0]))
|
||||
if lastbucket == nil {
|
||||
return ErrBucketNotFound
|
||||
return common.ErrBucketNotFound
|
||||
}
|
||||
for _, bucket := range buckets[1:] {
|
||||
lastbucket = lastbucket.Bucket([]byte(bucket))
|
||||
if lastbucket == nil {
|
||||
return ErrBucketNotFound
|
||||
return common.ErrBucketNotFound
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1007,7 +1002,7 @@ func (cmd *getCommand) Run(args ...string) error {
|
|||
} else if len(buckets) == 0 {
|
||||
return ErrBucketRequired
|
||||
} else if len(key) == 0 {
|
||||
return ErrKeyRequired
|
||||
return common.ErrKeyRequired
|
||||
}
|
||||
|
||||
// Open database.
|
||||
|
@ -1022,12 +1017,12 @@ func (cmd *getCommand) Run(args ...string) error {
|
|||
// Find bucket.
|
||||
var lastbucket *bolt.Bucket = tx.Bucket([]byte(buckets[0]))
|
||||
if lastbucket == nil {
|
||||
return ErrBucketNotFound
|
||||
return common.ErrBucketNotFound
|
||||
}
|
||||
for _, bucket := range buckets[1:] {
|
||||
lastbucket = lastbucket.Bucket([]byte(bucket))
|
||||
if lastbucket == nil {
|
||||
return ErrBucketNotFound
|
||||
return common.ErrBucketNotFound
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"os"
|
||||
"strings"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
"go.etcd.io/bbolt/internal/guts_cli"
|
||||
)
|
||||
|
||||
|
@ -113,12 +114,12 @@ func (cmd *pageCommand) printPage(path string, pageID uint64, formatValue string
|
|||
|
||||
// Print basic page info.
|
||||
fmt.Fprintf(cmd.Stdout, "Page ID: %d\n", p.Id())
|
||||
fmt.Fprintf(cmd.Stdout, "Page Type: %s\n", p.Type())
|
||||
fmt.Fprintf(cmd.Stdout, "Page Type: %s\n", p.Typ())
|
||||
fmt.Fprintf(cmd.Stdout, "Total Size: %d bytes\n", len(buf))
|
||||
fmt.Fprintf(cmd.Stdout, "Overflow pages: %d\n", p.Overflow())
|
||||
|
||||
// Print type-specific data.
|
||||
switch p.Type() {
|
||||
switch p.Typ() {
|
||||
case "meta":
|
||||
err = cmd.PrintMeta(cmd.Stdout, buf)
|
||||
case "leaf":
|
||||
|
@ -136,14 +137,14 @@ func (cmd *pageCommand) printPage(path string, pageID uint64, formatValue string
|
|||
|
||||
// PrintMeta prints the data from the meta page.
|
||||
func (cmd *pageCommand) PrintMeta(w io.Writer, buf []byte) error {
|
||||
m := guts_cli.LoadPageMeta(buf)
|
||||
m := common.LoadPageMeta(buf)
|
||||
m.Print(w)
|
||||
return nil
|
||||
}
|
||||
|
||||
// PrintLeaf prints the data for a leaf page.
|
||||
func (cmd *pageCommand) PrintLeaf(w io.Writer, buf []byte, formatValue string) error {
|
||||
p := guts_cli.LoadPage(buf)
|
||||
p := common.LoadPage(buf)
|
||||
|
||||
// Print number of items.
|
||||
fmt.Fprintf(w, "Item Count: %d\n", p.Count())
|
||||
|
@ -182,7 +183,7 @@ func (cmd *pageCommand) PrintLeaf(w io.Writer, buf []byte, formatValue string) e
|
|||
|
||||
// PrintBranch prints the data for a leaf page.
|
||||
func (cmd *pageCommand) PrintBranch(w io.Writer, buf []byte) error {
|
||||
p := guts_cli.LoadPage(buf)
|
||||
p := common.LoadPage(buf)
|
||||
|
||||
// Print number of items.
|
||||
fmt.Fprintf(w, "Item Count: %d\n", p.Count())
|
||||
|
@ -200,7 +201,7 @@ func (cmd *pageCommand) PrintBranch(w io.Writer, buf []byte) error {
|
|||
k = fmt.Sprintf("%x", string(e.Key()))
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "%s: <pgid=%d>\n", k, e.PgId())
|
||||
fmt.Fprintf(w, "%s: <pgid=%d>\n", k, e.Pgid())
|
||||
}
|
||||
fmt.Fprintf(w, "\n")
|
||||
return nil
|
||||
|
@ -208,16 +209,17 @@ func (cmd *pageCommand) PrintBranch(w io.Writer, buf []byte) error {
|
|||
|
||||
// PrintFreelist prints the data for a freelist page.
|
||||
func (cmd *pageCommand) PrintFreelist(w io.Writer, buf []byte) error {
|
||||
p := guts_cli.LoadPage(buf)
|
||||
p := common.LoadPage(buf)
|
||||
|
||||
// Print number of items.
|
||||
fmt.Fprintf(w, "Item Count: %d\n", p.FreelistPageCount())
|
||||
_, cnt := p.FreelistPageCount()
|
||||
fmt.Fprintf(w, "Item Count: %d\n", cnt)
|
||||
fmt.Fprintf(w, "Overflow: %d\n", p.Overflow())
|
||||
|
||||
fmt.Fprintf(w, "\n")
|
||||
|
||||
// Print each page in the freelist.
|
||||
ids := p.FreelistPagePages()
|
||||
ids := p.FreelistPageIds()
|
||||
for _, ids := range ids {
|
||||
fmt.Fprintf(w, "%d\n", ids)
|
||||
}
|
||||
|
@ -244,7 +246,7 @@ func (cmd *pageCommand) PrintPage(w io.Writer, r io.ReaderAt, pageID int, pageSi
|
|||
for offset := 0; offset < pageSize; offset += bytesPerLineN {
|
||||
// Retrieve current 16-byte line.
|
||||
line := buf[offset : offset+bytesPerLineN]
|
||||
isLastLine := (offset == (pageSize - bytesPerLineN))
|
||||
isLastLine := offset == (pageSize - bytesPerLineN)
|
||||
|
||||
// If it's the same as the previous line then print a skip.
|
||||
if bytes.Equal(line, prev) && !isLastLine {
|
||||
|
|
|
@ -9,7 +9,7 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
|
||||
"go.etcd.io/bbolt/internal/guts_cli"
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
"go.etcd.io/bbolt/internal/surgeon"
|
||||
)
|
||||
|
||||
|
@ -224,7 +224,7 @@ func (cmd *copyPageCommand) Run(args ...string) error {
|
|||
}
|
||||
|
||||
// copy the page
|
||||
if err := surgeon.CopyPage(cmd.dstPath, guts_cli.Pgid(srcPageId), guts_cli.Pgid(dstPageId)); err != nil {
|
||||
if err := surgeon.CopyPage(cmd.dstPath, common.Pgid(srcPageId), common.Pgid(dstPageId)); err != nil {
|
||||
return fmt.Errorf("copyPageCommand failed: %w", err)
|
||||
}
|
||||
|
||||
|
@ -279,7 +279,7 @@ func (cmd *clearPageCommand) Run(args ...string) error {
|
|||
return err
|
||||
}
|
||||
|
||||
if err := surgeon.ClearPage(cmd.dstPath, guts_cli.Pgid(pageId)); err != nil {
|
||||
if err := surgeon.ClearPage(cmd.dstPath, common.Pgid(pageId)); err != nil {
|
||||
return fmt.Errorf("clearPageCommand failed: %w", err)
|
||||
}
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ import (
|
|||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
"go.etcd.io/bbolt/internal/btesting"
|
||||
"go.etcd.io/bbolt/internal/guts_cli"
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
func TestSurgery_RevertMetaPage(t *testing.T) {
|
||||
|
@ -28,8 +28,8 @@ func TestSurgery_RevertMetaPage(t *testing.T) {
|
|||
// Read both meta0 and meta1 from srcFile
|
||||
srcBuf0 := readPage(t, srcPath, 0, pageSize)
|
||||
srcBuf1 := readPage(t, srcPath, 1, pageSize)
|
||||
meta0Page := guts_cli.LoadPageMeta(srcBuf0)
|
||||
meta1Page := guts_cli.LoadPageMeta(srcBuf1)
|
||||
meta0Page := common.LoadPageMeta(srcBuf0)
|
||||
meta1Page := common.LoadPageMeta(srcBuf1)
|
||||
|
||||
// Get the non-active meta page
|
||||
nonActiveSrcBuf := srcBuf0
|
||||
|
@ -115,7 +115,7 @@ func TestSurgery_ClearPage(t *testing.T) {
|
|||
t.Log("Verify result")
|
||||
dstPageId3Data := readPage(t, dstPath, 3, pageSize)
|
||||
|
||||
p := guts_cli.LoadPage(dstPageId3Data)
|
||||
p := common.LoadPage(dstPageId3Data)
|
||||
assert.Equal(t, uint16(0), p.Count())
|
||||
assert.Equal(t, uint32(0), p.Overflow())
|
||||
}
|
||||
|
|
86
cursor.go
86
cursor.go
|
@ -4,6 +4,8 @@ import (
|
|||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// Cursor represents an iterator that can traverse over all key/value pairs in a bucket
|
||||
|
@ -30,9 +32,9 @@ func (c *Cursor) Bucket() *Bucket {
|
|||
// If the bucket is empty then a nil key and value are returned.
|
||||
// The returned key and value are only valid for the life of the transaction.
|
||||
func (c *Cursor) First() (key []byte, value []byte) {
|
||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||
common.Assert(c.bucket.tx.db != nil, "tx closed")
|
||||
k, v, flags := c.first()
|
||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
if (flags & uint32(common.BucketLeafFlag)) != 0 {
|
||||
return k, nil
|
||||
}
|
||||
return k, v
|
||||
|
@ -40,7 +42,7 @@ func (c *Cursor) First() (key []byte, value []byte) {
|
|||
|
||||
func (c *Cursor) first() (key []byte, value []byte, flags uint32) {
|
||||
c.stack = c.stack[:0]
|
||||
p, n := c.bucket.pageNode(c.bucket.root)
|
||||
p, n := c.bucket.pageNode(c.bucket.RootPage())
|
||||
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
||||
c.goToFirstElementOnTheStack()
|
||||
|
||||
|
@ -51,7 +53,7 @@ func (c *Cursor) first() (key []byte, value []byte, flags uint32) {
|
|||
}
|
||||
|
||||
k, v, flags := c.keyValue()
|
||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
if (flags & uint32(common.BucketLeafFlag)) != 0 {
|
||||
return k, nil, flags
|
||||
}
|
||||
return k, v, flags
|
||||
|
@ -61,9 +63,9 @@ func (c *Cursor) first() (key []byte, value []byte, flags uint32) {
|
|||
// If the bucket is empty then a nil key and value are returned.
|
||||
// The returned key and value are only valid for the life of the transaction.
|
||||
func (c *Cursor) Last() (key []byte, value []byte) {
|
||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||
common.Assert(c.bucket.tx.db != nil, "tx closed")
|
||||
c.stack = c.stack[:0]
|
||||
p, n := c.bucket.pageNode(c.bucket.root)
|
||||
p, n := c.bucket.pageNode(c.bucket.RootPage())
|
||||
ref := elemRef{page: p, node: n}
|
||||
ref.index = ref.count() - 1
|
||||
c.stack = append(c.stack, ref)
|
||||
|
@ -80,7 +82,7 @@ func (c *Cursor) Last() (key []byte, value []byte) {
|
|||
}
|
||||
|
||||
k, v, flags := c.keyValue()
|
||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
if (flags & uint32(common.BucketLeafFlag)) != 0 {
|
||||
return k, nil
|
||||
}
|
||||
return k, v
|
||||
|
@ -90,9 +92,9 @@ func (c *Cursor) Last() (key []byte, value []byte) {
|
|||
// If the cursor is at the end of the bucket then a nil key and value are returned.
|
||||
// The returned key and value are only valid for the life of the transaction.
|
||||
func (c *Cursor) Next() (key []byte, value []byte) {
|
||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||
common.Assert(c.bucket.tx.db != nil, "tx closed")
|
||||
k, v, flags := c.next()
|
||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
if (flags & uint32(common.BucketLeafFlag)) != 0 {
|
||||
return k, nil
|
||||
}
|
||||
return k, v
|
||||
|
@ -102,9 +104,9 @@ func (c *Cursor) Next() (key []byte, value []byte) {
|
|||
// If the cursor is at the beginning of the bucket then a nil key and value are returned.
|
||||
// The returned key and value are only valid for the life of the transaction.
|
||||
func (c *Cursor) Prev() (key []byte, value []byte) {
|
||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||
common.Assert(c.bucket.tx.db != nil, "tx closed")
|
||||
k, v, flags := c.prev()
|
||||
if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
if (flags & uint32(common.BucketLeafFlag)) != 0 {
|
||||
return k, nil
|
||||
}
|
||||
return k, v
|
||||
|
@ -115,7 +117,7 @@ func (c *Cursor) Prev() (key []byte, value []byte) {
|
|||
// follow, a nil key is returned.
|
||||
// The returned key and value are only valid for the life of the transaction.
|
||||
func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
|
||||
_assert(c.bucket.tx.db != nil, "tx closed")
|
||||
common.Assert(c.bucket.tx.db != nil, "tx closed")
|
||||
|
||||
k, v, flags := c.seek(seek)
|
||||
|
||||
|
@ -126,7 +128,7 @@ func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
|
|||
|
||||
if k == nil {
|
||||
return nil, nil
|
||||
} else if (flags & uint32(bucketLeafFlag)) != 0 {
|
||||
} else if (flags & uint32(common.BucketLeafFlag)) != 0 {
|
||||
return k, nil
|
||||
}
|
||||
return k, v
|
||||
|
@ -136,15 +138,15 @@ func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
|
|||
// Delete fails if current key/value is a bucket or if the transaction is not writable.
|
||||
func (c *Cursor) Delete() error {
|
||||
if c.bucket.tx.db == nil {
|
||||
return ErrTxClosed
|
||||
return common.ErrTxClosed
|
||||
} else if !c.bucket.Writable() {
|
||||
return ErrTxNotWritable
|
||||
return common.ErrTxNotWritable
|
||||
}
|
||||
|
||||
key, _, flags := c.keyValue()
|
||||
// Return an error if current value is a bucket.
|
||||
if (flags & bucketLeafFlag) != 0 {
|
||||
return ErrIncompatibleValue
|
||||
if (flags & common.BucketLeafFlag) != 0 {
|
||||
return common.ErrIncompatibleValue
|
||||
}
|
||||
c.node().del(key)
|
||||
|
||||
|
@ -156,7 +158,7 @@ func (c *Cursor) Delete() error {
|
|||
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
|
||||
// Start from root page/node and traverse to correct page.
|
||||
c.stack = c.stack[:0]
|
||||
c.search(seek, c.bucket.root)
|
||||
c.search(seek, c.bucket.RootPage())
|
||||
|
||||
// If this is a bucket then return a nil value.
|
||||
return c.keyValue()
|
||||
|
@ -172,11 +174,11 @@ func (c *Cursor) goToFirstElementOnTheStack() {
|
|||
}
|
||||
|
||||
// Keep adding pages pointing to the first element to the stack.
|
||||
var pgId pgid
|
||||
var pgId common.Pgid
|
||||
if ref.node != nil {
|
||||
pgId = ref.node.inodes[ref.index].pgid
|
||||
} else {
|
||||
pgId = ref.page.branchPageElement(uint16(ref.index)).pgid
|
||||
pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid()
|
||||
}
|
||||
p, n := c.bucket.pageNode(pgId)
|
||||
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
||||
|
@ -193,11 +195,11 @@ func (c *Cursor) last() {
|
|||
}
|
||||
|
||||
// Keep adding pages pointing to the last element in the stack.
|
||||
var pgId pgid
|
||||
var pgId common.Pgid
|
||||
if ref.node != nil {
|
||||
pgId = ref.node.inodes[ref.index].pgid
|
||||
} else {
|
||||
pgId = ref.page.branchPageElement(uint16(ref.index)).pgid
|
||||
pgId = ref.page.BranchPageElement(uint16(ref.index)).Pgid()
|
||||
}
|
||||
p, n := c.bucket.pageNode(pgId)
|
||||
|
||||
|
@ -268,10 +270,10 @@ func (c *Cursor) prev() (key []byte, value []byte, flags uint32) {
|
|||
}
|
||||
|
||||
// search recursively performs a binary search against a given page/node until it finds a given key.
|
||||
func (c *Cursor) search(key []byte, pgId pgid) {
|
||||
func (c *Cursor) search(key []byte, pgId common.Pgid) {
|
||||
p, n := c.bucket.pageNode(pgId)
|
||||
if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
|
||||
panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
|
||||
if p != nil && (p.Flags()&(common.BranchPageFlag|common.LeafPageFlag)) == 0 {
|
||||
panic(fmt.Sprintf("invalid page type: %d: %x", p.Id(), p.Flags()))
|
||||
}
|
||||
e := elemRef{page: p, node: n}
|
||||
c.stack = append(c.stack, e)
|
||||
|
@ -309,15 +311,15 @@ func (c *Cursor) searchNode(key []byte, n *node) {
|
|||
c.search(key, n.inodes[index].pgid)
|
||||
}
|
||||
|
||||
func (c *Cursor) searchPage(key []byte, p *page) {
|
||||
func (c *Cursor) searchPage(key []byte, p *common.Page) {
|
||||
// Binary search for the correct range.
|
||||
inodes := p.branchPageElements()
|
||||
inodes := p.BranchPageElements()
|
||||
|
||||
var exact bool
|
||||
index := sort.Search(int(p.count), func(i int) bool {
|
||||
index := sort.Search(int(p.Count()), func(i int) bool {
|
||||
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
|
||||
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
|
||||
ret := bytes.Compare(inodes[i].key(), key)
|
||||
ret := bytes.Compare(inodes[i].Key(), key)
|
||||
if ret == 0 {
|
||||
exact = true
|
||||
}
|
||||
|
@ -329,7 +331,7 @@ func (c *Cursor) searchPage(key []byte, p *page) {
|
|||
c.stack[len(c.stack)-1].index = index
|
||||
|
||||
// Recursively search to the next page.
|
||||
c.search(key, inodes[index].pgid)
|
||||
c.search(key, inodes[index].Pgid())
|
||||
}
|
||||
|
||||
// nsearch searches the leaf node on the top of the stack for a key.
|
||||
|
@ -347,9 +349,9 @@ func (c *Cursor) nsearch(key []byte) {
|
|||
}
|
||||
|
||||
// If we have a page then search its leaf elements.
|
||||
inodes := p.leafPageElements()
|
||||
index := sort.Search(int(p.count), func(i int) bool {
|
||||
return bytes.Compare(inodes[i].key(), key) != -1
|
||||
inodes := p.LeafPageElements()
|
||||
index := sort.Search(int(p.Count()), func(i int) bool {
|
||||
return bytes.Compare(inodes[i].Key(), key) != -1
|
||||
})
|
||||
e.index = index
|
||||
}
|
||||
|
@ -370,13 +372,13 @@ func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
|
|||
}
|
||||
|
||||
// Or retrieve value from page.
|
||||
elem := ref.page.leafPageElement(uint16(ref.index))
|
||||
return elem.key(), elem.value(), elem.flags
|
||||
elem := ref.page.LeafPageElement(uint16(ref.index))
|
||||
return elem.Key(), elem.Value(), elem.Flags()
|
||||
}
|
||||
|
||||
// node returns the node that the cursor is currently positioned on.
|
||||
func (c *Cursor) node() *node {
|
||||
_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
|
||||
common.Assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
|
||||
|
||||
// If the top of the stack is a leaf node then just return it.
|
||||
if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
|
||||
|
@ -386,19 +388,19 @@ func (c *Cursor) node() *node {
|
|||
// Start from root and traverse down the hierarchy.
|
||||
var n = c.stack[0].node
|
||||
if n == nil {
|
||||
n = c.bucket.node(c.stack[0].page.id, nil)
|
||||
n = c.bucket.node(c.stack[0].page.Id(), nil)
|
||||
}
|
||||
for _, ref := range c.stack[:len(c.stack)-1] {
|
||||
_assert(!n.isLeaf, "expected branch node")
|
||||
common.Assert(!n.isLeaf, "expected branch node")
|
||||
n = n.childAt(ref.index)
|
||||
}
|
||||
_assert(n.isLeaf, "expected leaf node")
|
||||
common.Assert(n.isLeaf, "expected leaf node")
|
||||
return n
|
||||
}
|
||||
|
||||
// elemRef represents a reference to an element on a given page/node.
|
||||
type elemRef struct {
|
||||
page *page
|
||||
page *common.Page
|
||||
node *node
|
||||
index int
|
||||
}
|
||||
|
@ -408,7 +410,7 @@ func (r *elemRef) isLeaf() bool {
|
|||
if r.node != nil {
|
||||
return r.node.isLeaf
|
||||
}
|
||||
return (r.page.flags & leafPageFlag) != 0
|
||||
return (r.page.Flags() & common.LeafPageFlag) != 0
|
||||
}
|
||||
|
||||
// count returns the number of inodes or page elements.
|
||||
|
@ -416,5 +418,5 @@ func (r *elemRef) count() int {
|
|||
if r.node != nil {
|
||||
return len(r.node.inodes)
|
||||
}
|
||||
return int(r.page.count)
|
||||
return int(r.page.Count())
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
"go.etcd.io/bbolt/internal/btesting"
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// Ensure that a cursor can return a reference to the bucket that created it.
|
||||
|
@ -139,7 +140,7 @@ func TestCursor_Delete(t *testing.T) {
|
|||
}
|
||||
|
||||
c.Seek([]byte("sub"))
|
||||
if err := c.Delete(); err != bolt.ErrIncompatibleValue {
|
||||
if err := c.Delete(); err != common.ErrIncompatibleValue {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
|
|
260
db.go
260
db.go
|
@ -3,7 +3,6 @@ package bbolt
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"os"
|
||||
"runtime"
|
||||
|
@ -11,48 +10,13 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// The largest step that can be taken when remapping the mmap.
|
||||
const maxMmapStep = 1 << 30 // 1GB
|
||||
|
||||
// The data file format version.
|
||||
const version = 2
|
||||
|
||||
// Represents a marker value to indicate that a file is a Bolt DB.
|
||||
const magic uint32 = 0xED0CDAED
|
||||
|
||||
const pgidNoFreelist pgid = 0xffffffffffffffff
|
||||
|
||||
// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
|
||||
// syncing changes to a file. This is required as some operating systems,
|
||||
// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
|
||||
// must be synchronized using the msync(2) syscall.
|
||||
const IgnoreNoSync = runtime.GOOS == "openbsd"
|
||||
|
||||
// Default values if not set in a DB instance.
|
||||
const (
|
||||
DefaultMaxBatchSize int = 1000
|
||||
DefaultMaxBatchDelay = 10 * time.Millisecond
|
||||
DefaultAllocSize = 16 * 1024 * 1024
|
||||
)
|
||||
|
||||
// default page size for db is set to the OS page size.
|
||||
var defaultPageSize = os.Getpagesize()
|
||||
|
||||
// The time elapsed between consecutive file locking attempts.
|
||||
const flockRetryTimeout = 50 * time.Millisecond
|
||||
|
||||
// FreelistType is the type of the freelist backend
|
||||
type FreelistType string
|
||||
|
||||
const (
|
||||
// FreelistArrayType indicates backend freelist type is array
|
||||
FreelistArrayType = FreelistType("array")
|
||||
// FreelistMapType indicates backend freelist type is hashmap
|
||||
FreelistMapType = FreelistType("hashmap")
|
||||
)
|
||||
|
||||
// DB represents a collection of buckets persisted to a file on disk.
|
||||
// All data access is performed through transactions which can be obtained through the DB.
|
||||
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
|
||||
|
@ -85,7 +49,7 @@ type DB struct {
|
|||
// The alternative one is using hashmap, it is faster in almost all circumstances
|
||||
// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
|
||||
// The default type is array
|
||||
FreelistType FreelistType
|
||||
FreelistType common.FreelistType
|
||||
|
||||
// When true, skips the truncate call when growing the database.
|
||||
// Setting this to true is only safe on non-ext3/ext4 systems.
|
||||
|
@ -141,8 +105,8 @@ type DB struct {
|
|||
data *[maxMapSize]byte
|
||||
datasz int
|
||||
filesz int // current on disk file size
|
||||
meta0 *meta
|
||||
meta1 *meta
|
||||
meta0 *common.Meta
|
||||
meta1 *common.Meta
|
||||
pageSize int
|
||||
opened bool
|
||||
rwtx *Tx
|
||||
|
@ -206,9 +170,9 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
|||
db.Mlock = options.Mlock
|
||||
|
||||
// Set default values for later DB operations.
|
||||
db.MaxBatchSize = DefaultMaxBatchSize
|
||||
db.MaxBatchDelay = DefaultMaxBatchDelay
|
||||
db.AllocSize = DefaultAllocSize
|
||||
db.MaxBatchSize = common.DefaultMaxBatchSize
|
||||
db.MaxBatchDelay = common.DefaultMaxBatchDelay
|
||||
db.AllocSize = common.DefaultAllocSize
|
||||
|
||||
flag := os.O_RDWR
|
||||
if options.ReadOnly {
|
||||
|
@ -249,7 +213,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
|||
|
||||
if db.pageSize = options.PageSize; db.pageSize == 0 {
|
||||
// Set the default page size to the OS page size.
|
||||
db.pageSize = defaultPageSize
|
||||
db.pageSize = common.DefaultPageSize
|
||||
}
|
||||
|
||||
// Initialize the database if it doesn't exist.
|
||||
|
@ -269,7 +233,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
|||
db.pageSize = pgSize
|
||||
} else {
|
||||
_ = db.close()
|
||||
return nil, ErrInvalid
|
||||
return nil, common.ErrInvalid
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -347,7 +311,7 @@ func (db *DB) getPageSize() (int, error) {
|
|||
return db.pageSize, nil
|
||||
}
|
||||
|
||||
return 0, ErrInvalid
|
||||
return 0, common.ErrInvalid
|
||||
}
|
||||
|
||||
// getPageSizeFromFirstMeta reads the pageSize from the first meta page
|
||||
|
@ -356,11 +320,11 @@ func (db *DB) getPageSizeFromFirstMeta() (int, bool, error) {
|
|||
var metaCanRead bool
|
||||
if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) {
|
||||
metaCanRead = true
|
||||
if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
|
||||
return int(m.pageSize), metaCanRead, nil
|
||||
if m := db.pageInBuffer(buf[:], 0).Meta(); m.Validate() == nil {
|
||||
return int(m.PageSize()), metaCanRead, nil
|
||||
}
|
||||
}
|
||||
return 0, metaCanRead, ErrInvalid
|
||||
return 0, metaCanRead, common.ErrInvalid
|
||||
}
|
||||
|
||||
// getPageSizeFromSecondMeta reads the pageSize from the second meta page
|
||||
|
@ -392,13 +356,13 @@ func (db *DB) getPageSizeFromSecondMeta() (int, bool, error) {
|
|||
bw, err := db.file.ReadAt(buf[:], pos)
|
||||
if (err == nil && bw == len(buf)) || (err == io.EOF && int64(bw) == (fileSize-pos)) {
|
||||
metaCanRead = true
|
||||
if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil {
|
||||
return int(m.pageSize), metaCanRead, nil
|
||||
if m := db.pageInBuffer(buf[:], 0).Meta(); m.Validate() == nil {
|
||||
return int(m.PageSize()), metaCanRead, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0, metaCanRead, ErrInvalid
|
||||
return 0, metaCanRead, common.ErrInvalid
|
||||
}
|
||||
|
||||
// loadFreelist reads the freelist if it is synced, or reconstructs it
|
||||
|
@ -412,14 +376,14 @@ func (db *DB) loadFreelist() {
|
|||
db.freelist.readIDs(db.freepages())
|
||||
} else {
|
||||
// Read free list from freelist page.
|
||||
db.freelist.read(db.page(db.meta().freelist))
|
||||
db.freelist.read(db.page(db.meta().Freelist()))
|
||||
}
|
||||
db.stats.FreePageN = db.freelist.free_count()
|
||||
})
|
||||
}
|
||||
|
||||
func (db *DB) hasSyncedFreelist() bool {
|
||||
return db.meta().freelist != pgidNoFreelist
|
||||
return db.meta().Freelist() != common.PgidNoFreelist
|
||||
}
|
||||
|
||||
// mmap opens the underlying memory-mapped file and initializes the meta references.
|
||||
|
@ -478,14 +442,14 @@ func (db *DB) mmap(minsz int) error {
|
|||
}
|
||||
|
||||
// Save references to the meta pages.
|
||||
db.meta0 = db.page(0).meta()
|
||||
db.meta1 = db.page(1).meta()
|
||||
db.meta0 = db.page(0).Meta()
|
||||
db.meta1 = db.page(1).Meta()
|
||||
|
||||
// Validate the meta pages. We only return an error if both meta pages fail
|
||||
// validation, since meta0 failing validation means that it wasn't saved
|
||||
// properly -- but we can recover using meta1. And vice-versa.
|
||||
err0 := db.meta0.validate()
|
||||
err1 := db.meta1.validate()
|
||||
err0 := db.meta0.Validate()
|
||||
err1 := db.meta1.Validate()
|
||||
if err0 != nil && err1 != nil {
|
||||
return err0
|
||||
}
|
||||
|
@ -533,8 +497,8 @@ func (db *DB) mmapSize(size int) (int, error) {
|
|||
|
||||
// If larger than 1GB then grow by 1GB at a time.
|
||||
sz := int64(size)
|
||||
if remainder := sz % int64(maxMmapStep); remainder > 0 {
|
||||
sz += int64(maxMmapStep) - remainder
|
||||
if remainder := sz % int64(common.MaxMmapStep); remainder > 0 {
|
||||
sz += int64(common.MaxMmapStep) - remainder
|
||||
}
|
||||
|
||||
// Ensure that the mmap size is a multiple of the page size.
|
||||
|
@ -581,33 +545,33 @@ func (db *DB) init() error {
|
|||
// Create two meta pages on a buffer.
|
||||
buf := make([]byte, db.pageSize*4)
|
||||
for i := 0; i < 2; i++ {
|
||||
p := db.pageInBuffer(buf, pgid(i))
|
||||
p.id = pgid(i)
|
||||
p.flags = metaPageFlag
|
||||
p := db.pageInBuffer(buf, common.Pgid(i))
|
||||
p.SetId(common.Pgid(i))
|
||||
p.SetFlags(common.MetaPageFlag)
|
||||
|
||||
// Initialize the meta page.
|
||||
m := p.meta()
|
||||
m.magic = magic
|
||||
m.version = version
|
||||
m.pageSize = uint32(db.pageSize)
|
||||
m.freelist = 2
|
||||
m.root = bucket{root: 3}
|
||||
m.pgid = 4
|
||||
m.txid = txid(i)
|
||||
m.checksum = m.sum64()
|
||||
m := p.Meta()
|
||||
m.SetMagic(common.Magic)
|
||||
m.SetVersion(common.Version)
|
||||
m.SetPageSize(uint32(db.pageSize))
|
||||
m.SetFreelist(2)
|
||||
m.SetRootBucket(common.NewInBucket(3, 0))
|
||||
m.SetPgid(4)
|
||||
m.SetTxid(common.Txid(i))
|
||||
m.SetChecksum(m.Sum64())
|
||||
}
|
||||
|
||||
// Write an empty freelist at page 3.
|
||||
p := db.pageInBuffer(buf, pgid(2))
|
||||
p.id = pgid(2)
|
||||
p.flags = freelistPageFlag
|
||||
p.count = 0
|
||||
p := db.pageInBuffer(buf, common.Pgid(2))
|
||||
p.SetId(2)
|
||||
p.SetFlags(common.FreelistPageFlag)
|
||||
p.SetCount(0)
|
||||
|
||||
// Write an empty leaf page at page 4.
|
||||
p = db.pageInBuffer(buf, pgid(3))
|
||||
p.id = pgid(3)
|
||||
p.flags = leafPageFlag
|
||||
p.count = 0
|
||||
p = db.pageInBuffer(buf, common.Pgid(3))
|
||||
p.SetId(3)
|
||||
p.SetFlags(common.LeafPageFlag)
|
||||
p.SetCount(0)
|
||||
|
||||
// Write the buffer to our data file.
|
||||
if _, err := db.ops.writeAt(buf, 0); err != nil {
|
||||
|
@ -719,14 +683,14 @@ func (db *DB) beginTx() (*Tx, error) {
|
|||
if !db.opened {
|
||||
db.mmaplock.RUnlock()
|
||||
db.metalock.Unlock()
|
||||
return nil, ErrDatabaseNotOpen
|
||||
return nil, common.ErrDatabaseNotOpen
|
||||
}
|
||||
|
||||
// Exit if the database is not correctly mapped.
|
||||
if db.data == nil {
|
||||
db.mmaplock.RUnlock()
|
||||
db.metalock.Unlock()
|
||||
return nil, ErrInvalidMapping
|
||||
return nil, common.ErrInvalidMapping
|
||||
}
|
||||
|
||||
// Create a transaction associated with the database.
|
||||
|
@ -752,7 +716,7 @@ func (db *DB) beginTx() (*Tx, error) {
|
|||
func (db *DB) beginRWTx() (*Tx, error) {
|
||||
// If the database was opened with Options.ReadOnly, return an error.
|
||||
if db.readOnly {
|
||||
return nil, ErrDatabaseReadOnly
|
||||
return nil, common.ErrDatabaseReadOnly
|
||||
}
|
||||
|
||||
// Obtain writer lock. This is released by the transaction when it closes.
|
||||
|
@ -767,13 +731,13 @@ func (db *DB) beginRWTx() (*Tx, error) {
|
|||
// Exit if the database is not open yet.
|
||||
if !db.opened {
|
||||
db.rwlock.Unlock()
|
||||
return nil, ErrDatabaseNotOpen
|
||||
return nil, common.ErrDatabaseNotOpen
|
||||
}
|
||||
|
||||
// Exit if the database is not correctly mapped.
|
||||
if db.data == nil {
|
||||
db.rwlock.Unlock()
|
||||
return nil, ErrInvalidMapping
|
||||
return nil, common.ErrInvalidMapping
|
||||
}
|
||||
|
||||
// Create a transaction associated with the database.
|
||||
|
@ -788,19 +752,19 @@ func (db *DB) beginRWTx() (*Tx, error) {
|
|||
func (db *DB) freePages() {
|
||||
// Free all pending pages prior to earliest open transaction.
|
||||
sort.Sort(txsById(db.txs))
|
||||
minid := txid(0xFFFFFFFFFFFFFFFF)
|
||||
minid := common.Txid(0xFFFFFFFFFFFFFFFF)
|
||||
if len(db.txs) > 0 {
|
||||
minid = db.txs[0].meta.txid
|
||||
minid = db.txs[0].meta.Txid()
|
||||
}
|
||||
if minid > 0 {
|
||||
db.freelist.release(minid - 1)
|
||||
}
|
||||
// Release unused txid extents.
|
||||
for _, t := range db.txs {
|
||||
db.freelist.releaseRange(minid, t.meta.txid-1)
|
||||
minid = t.meta.txid + 1
|
||||
db.freelist.releaseRange(minid, t.meta.Txid()-1)
|
||||
minid = t.meta.Txid() + 1
|
||||
}
|
||||
db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
|
||||
db.freelist.releaseRange(minid, common.Txid(0xFFFFFFFFFFFFFFFF))
|
||||
// Any page both allocated and freed in an extent is safe to release.
|
||||
}
|
||||
|
||||
|
@ -808,7 +772,7 @@ type txsById []*Tx
|
|||
|
||||
func (t txsById) Len() int { return len(t) }
|
||||
func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
|
||||
func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
|
||||
func (t txsById) Less(i, j int) bool { return t[i].meta.Txid() < t[j].meta.Txid() }
|
||||
|
||||
// removeTx removes a transaction from the database.
|
||||
func (db *DB) removeTx(tx *Tx) {
|
||||
|
@ -1050,37 +1014,37 @@ func (db *DB) Stats() Stats {
|
|||
// This is for internal access to the raw data bytes from the C cursor, use
|
||||
// carefully, or not at all.
|
||||
func (db *DB) Info() *Info {
|
||||
_assert(db.data != nil, "database file isn't correctly mapped")
|
||||
common.Assert(db.data != nil, "database file isn't correctly mapped")
|
||||
return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
|
||||
}
|
||||
|
||||
// page retrieves a page reference from the mmap based on the current page size.
|
||||
func (db *DB) page(id pgid) *page {
|
||||
pos := id * pgid(db.pageSize)
|
||||
return (*page)(unsafe.Pointer(&db.data[pos]))
|
||||
func (db *DB) page(id common.Pgid) *common.Page {
|
||||
pos := id * common.Pgid(db.pageSize)
|
||||
return (*common.Page)(unsafe.Pointer(&db.data[pos]))
|
||||
}
|
||||
|
||||
// pageInBuffer retrieves a page reference from a given byte array based on the current page size.
|
||||
func (db *DB) pageInBuffer(b []byte, id pgid) *page {
|
||||
return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
|
||||
func (db *DB) pageInBuffer(b []byte, id common.Pgid) *common.Page {
|
||||
return (*common.Page)(unsafe.Pointer(&b[id*common.Pgid(db.pageSize)]))
|
||||
}
|
||||
|
||||
// meta retrieves the current meta page reference.
|
||||
func (db *DB) meta() *meta {
|
||||
func (db *DB) meta() *common.Meta {
|
||||
// We have to return the meta with the highest txid which doesn't fail
|
||||
// validation. Otherwise, we can cause errors when in fact the database is
|
||||
// in a consistent state. metaA is the one with the higher txid.
|
||||
metaA := db.meta0
|
||||
metaB := db.meta1
|
||||
if db.meta1.txid > db.meta0.txid {
|
||||
if db.meta1.Txid() > db.meta0.Txid() {
|
||||
metaA = db.meta1
|
||||
metaB = db.meta0
|
||||
}
|
||||
|
||||
// Use higher meta page if valid. Otherwise, fallback to previous, if valid.
|
||||
if err := metaA.validate(); err == nil {
|
||||
if err := metaA.Validate(); err == nil {
|
||||
return metaA
|
||||
} else if err := metaB.validate(); err == nil {
|
||||
} else if err := metaB.Validate(); err == nil {
|
||||
return metaB
|
||||
}
|
||||
|
||||
|
@ -1090,7 +1054,7 @@ func (db *DB) meta() *meta {
|
|||
}
|
||||
|
||||
// allocate returns a contiguous block of memory starting at a given page.
|
||||
func (db *DB) allocate(txid txid, count int) (*page, error) {
|
||||
func (db *DB) allocate(txid common.Txid, count int) (*common.Page, error) {
|
||||
// Allocate a temporary buffer for the page.
|
||||
var buf []byte
|
||||
if count == 1 {
|
||||
|
@ -1098,17 +1062,18 @@ func (db *DB) allocate(txid txid, count int) (*page, error) {
|
|||
} else {
|
||||
buf = make([]byte, count*db.pageSize)
|
||||
}
|
||||
p := (*page)(unsafe.Pointer(&buf[0]))
|
||||
p.overflow = uint32(count - 1)
|
||||
p := (*common.Page)(unsafe.Pointer(&buf[0]))
|
||||
p.SetOverflow(uint32(count - 1))
|
||||
|
||||
// Use pages from the freelist if they are available.
|
||||
if p.id = db.freelist.allocate(txid, count); p.id != 0 {
|
||||
p.SetId(db.freelist.allocate(txid, count))
|
||||
if p.Id() != 0 {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
// Resize mmap() if we're at the end.
|
||||
p.id = db.rwtx.meta.pgid
|
||||
var minsz = int((p.id+pgid(count))+1) * db.pageSize
|
||||
p.SetId(db.rwtx.meta.Pgid())
|
||||
var minsz = int((p.Id()+common.Pgid(count))+1) * db.pageSize
|
||||
if minsz >= db.datasz {
|
||||
if err := db.mmap(minsz); err != nil {
|
||||
return nil, fmt.Errorf("mmap allocate error: %s", err)
|
||||
|
@ -1116,7 +1081,8 @@ func (db *DB) allocate(txid txid, count int) (*page, error) {
|
|||
}
|
||||
|
||||
// Move the page id high water mark.
|
||||
db.rwtx.meta.pgid += pgid(count)
|
||||
curPgid := db.rwtx.meta.Pgid()
|
||||
db.rwtx.meta.SetPgid(curPgid + common.Pgid(count))
|
||||
|
||||
return p, nil
|
||||
}
|
||||
|
@ -1163,7 +1129,7 @@ func (db *DB) IsReadOnly() bool {
|
|||
return db.readOnly
|
||||
}
|
||||
|
||||
func (db *DB) freepages() []pgid {
|
||||
func (db *DB) freepages() []common.Pgid {
|
||||
tx, err := db.beginTx()
|
||||
defer func() {
|
||||
err = tx.Rollback()
|
||||
|
@ -1175,8 +1141,8 @@ func (db *DB) freepages() []pgid {
|
|||
panic("freepages: failed to open read only tx")
|
||||
}
|
||||
|
||||
reachable := make(map[pgid]*page)
|
||||
nofreed := make(map[pgid]bool)
|
||||
reachable := make(map[common.Pgid]*common.Page)
|
||||
nofreed := make(map[common.Pgid]bool)
|
||||
ech := make(chan error)
|
||||
go func() {
|
||||
for e := range ech {
|
||||
|
@ -1188,8 +1154,8 @@ func (db *DB) freepages() []pgid {
|
|||
|
||||
// TODO: If check bucket reported any corruptions (ech) we shouldn't proceed to freeing the pages.
|
||||
|
||||
var fids []pgid
|
||||
for i := pgid(2); i < db.meta().pgid; i++ {
|
||||
var fids []common.Pgid
|
||||
for i := common.Pgid(2); i < db.meta().Pgid(); i++ {
|
||||
if _, ok := reachable[i]; !ok {
|
||||
fids = append(fids, i)
|
||||
}
|
||||
|
@ -1221,7 +1187,7 @@ type Options struct {
|
|||
// The alternative one is using hashmap, it is faster in almost all circumstances
|
||||
// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
|
||||
// The default type is array
|
||||
FreelistType FreelistType
|
||||
FreelistType common.FreelistType
|
||||
|
||||
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
|
||||
// grab a shared lock (UNIX).
|
||||
|
@ -1263,7 +1229,7 @@ type Options struct {
|
|||
var DefaultOptions = &Options{
|
||||
Timeout: 0,
|
||||
NoGrowSync: false,
|
||||
FreelistType: FreelistArrayType,
|
||||
FreelistType: common.FreelistArrayType,
|
||||
}
|
||||
|
||||
// Stats represents statistics about the database.
|
||||
|
@ -1302,65 +1268,3 @@ type Info struct {
|
|||
Data uintptr
|
||||
PageSize int
|
||||
}
|
||||
|
||||
type meta struct {
|
||||
magic uint32
|
||||
version uint32
|
||||
pageSize uint32
|
||||
flags uint32
|
||||
root bucket
|
||||
freelist pgid
|
||||
pgid pgid
|
||||
txid txid
|
||||
checksum uint64
|
||||
}
|
||||
|
||||
// validate checks the marker bytes and version of the meta page to ensure it matches this binary.
|
||||
func (m *meta) validate() error {
|
||||
if m.magic != magic {
|
||||
return ErrInvalid
|
||||
} else if m.version != version {
|
||||
return ErrVersionMismatch
|
||||
} else if m.checksum != m.sum64() {
|
||||
return ErrChecksum
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// copy copies one meta object to another.
|
||||
func (m *meta) copy(dest *meta) {
|
||||
*dest = *m
|
||||
}
|
||||
|
||||
// write writes the meta onto a page.
|
||||
func (m *meta) write(p *page) {
|
||||
if m.root.root >= m.pgid {
|
||||
panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
|
||||
} else if m.freelist >= m.pgid && m.freelist != pgidNoFreelist {
|
||||
// TODO: reject pgidNoFreeList if !NoFreelistSync
|
||||
panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
|
||||
}
|
||||
|
||||
// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
|
||||
p.id = pgid(m.txid % 2)
|
||||
p.flags |= metaPageFlag
|
||||
|
||||
// Calculate the checksum.
|
||||
m.checksum = m.sum64()
|
||||
|
||||
m.copy(p.meta())
|
||||
}
|
||||
|
||||
// generates the checksum for the meta.
|
||||
func (m *meta) sum64() uint64 {
|
||||
var h = fnv.New64a()
|
||||
_, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
// _assert will panic with a given formatted message if the given condition is false.
|
||||
func _assert(condition bool, msg string, v ...interface{}) {
|
||||
if !condition {
|
||||
panic(fmt.Sprintf("assertion failed: "+msg, v...))
|
||||
}
|
||||
}
|
||||
|
|
15
db_test.go
15
db_test.go
|
@ -21,6 +21,7 @@ import (
|
|||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
"go.etcd.io/bbolt/internal/btesting"
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// pageSize is the size of one page in the data file.
|
||||
|
@ -136,7 +137,7 @@ func TestOpen_ErrInvalid(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrInvalid {
|
||||
if _, err := bolt.Open(path, 0666, nil); err != common.ErrInvalid {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -172,7 +173,7 @@ func TestOpen_ErrVersionMismatch(t *testing.T) {
|
|||
}
|
||||
|
||||
// Reopen data file.
|
||||
if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrVersionMismatch {
|
||||
if _, err := bolt.Open(path, 0666, nil); err != common.ErrVersionMismatch {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -208,7 +209,7 @@ func TestOpen_ErrChecksum(t *testing.T) {
|
|||
}
|
||||
|
||||
// Reopen data file.
|
||||
if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrChecksum {
|
||||
if _, err := bolt.Open(path, 0666, nil); err != common.ErrChecksum {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -552,7 +553,7 @@ func TestDB_Open_ReadOnly(t *testing.T) {
|
|||
}
|
||||
|
||||
// Can't launch read-write transaction.
|
||||
if _, err := readOnlyDB.Begin(true); err != bolt.ErrDatabaseReadOnly {
|
||||
if _, err := readOnlyDB.Begin(true); err != common.ErrDatabaseReadOnly {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
|
@ -641,7 +642,7 @@ func TestOpen_RecoverFreeList(t *testing.T) {
|
|||
// Ensure that a database cannot open a transaction when it's not open.
|
||||
func TestDB_Begin_ErrDatabaseNotOpen(t *testing.T) {
|
||||
var db bolt.DB
|
||||
if _, err := db.Begin(false); err != bolt.ErrDatabaseNotOpen {
|
||||
if _, err := db.Begin(false); err != common.ErrDatabaseNotOpen {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -727,7 +728,7 @@ func TestDB_Concurrent_WriteTo(t *testing.T) {
|
|||
// Ensure that opening a transaction while the DB is closed returns an error.
|
||||
func TestDB_BeginRW_Closed(t *testing.T) {
|
||||
var db bolt.DB
|
||||
if _, err := db.Begin(true); err != bolt.ErrDatabaseNotOpen {
|
||||
if _, err := db.Begin(true); err != common.ErrDatabaseNotOpen {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -828,7 +829,7 @@ func TestDB_Update_Closed(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
return nil
|
||||
}); err != bolt.ErrDatabaseNotOpen {
|
||||
}); err != common.ErrDatabaseNotOpen {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,8 @@ import (
|
|||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
func TestOpenWithPreLoadFreelist(t *testing.T) {
|
||||
|
@ -76,7 +78,7 @@ func TestMethodPage(t *testing.T) {
|
|||
name: "readonly mode without preloading free pages",
|
||||
readonly: true,
|
||||
preLoadFreePage: false,
|
||||
expectedError: ErrFreePagesNotLoaded,
|
||||
expectedError: common.ErrFreePagesNotLoaded,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
78
errors.go
78
errors.go
|
@ -1,78 +0,0 @@
|
|||
package bbolt
|
||||
|
||||
import "errors"
|
||||
|
||||
// These errors can be returned when opening or calling methods on a DB.
|
||||
var (
|
||||
// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
|
||||
// is opened or after it is closed.
|
||||
ErrDatabaseNotOpen = errors.New("database not open")
|
||||
|
||||
// ErrDatabaseOpen is returned when opening a database that is
|
||||
// already open.
|
||||
ErrDatabaseOpen = errors.New("database already open")
|
||||
|
||||
// ErrInvalid is returned when both meta pages on a database are invalid.
|
||||
// This typically occurs when a file is not a bolt database.
|
||||
ErrInvalid = errors.New("invalid database")
|
||||
|
||||
// ErrInvalidMapping is returned when the database file fails to get mapped.
|
||||
ErrInvalidMapping = errors.New("database isn't correctly mapped")
|
||||
|
||||
// ErrVersionMismatch is returned when the data file was created with a
|
||||
// different version of Bolt.
|
||||
ErrVersionMismatch = errors.New("version mismatch")
|
||||
|
||||
// ErrChecksum is returned when either meta page checksum does not match.
|
||||
ErrChecksum = errors.New("checksum error")
|
||||
|
||||
// ErrTimeout is returned when a database cannot obtain an exclusive lock
|
||||
// on the data file after the timeout passed to Open().
|
||||
ErrTimeout = errors.New("timeout")
|
||||
)
|
||||
|
||||
// These errors can occur when beginning or committing a Tx.
|
||||
var (
|
||||
// ErrTxNotWritable is returned when performing a write operation on a
|
||||
// read-only transaction.
|
||||
ErrTxNotWritable = errors.New("tx not writable")
|
||||
|
||||
// ErrTxClosed is returned when committing or rolling back a transaction
|
||||
// that has already been committed or rolled back.
|
||||
ErrTxClosed = errors.New("tx closed")
|
||||
|
||||
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
|
||||
// read-only database.
|
||||
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
|
||||
|
||||
// ErrFreePagesNotLoaded is returned when a readonly transaction without
|
||||
// preloading the free pages is trying to access the free pages.
|
||||
ErrFreePagesNotLoaded = errors.New("free pages are not pre-loaded")
|
||||
)
|
||||
|
||||
// These errors can occur when putting or deleting a value or a bucket.
|
||||
var (
|
||||
// ErrBucketNotFound is returned when trying to access a bucket that has
|
||||
// not been created yet.
|
||||
ErrBucketNotFound = errors.New("bucket not found")
|
||||
|
||||
// ErrBucketExists is returned when creating a bucket that already exists.
|
||||
ErrBucketExists = errors.New("bucket already exists")
|
||||
|
||||
// ErrBucketNameRequired is returned when creating a bucket with a blank name.
|
||||
ErrBucketNameRequired = errors.New("bucket name required")
|
||||
|
||||
// ErrKeyRequired is returned when inserting a zero-length key.
|
||||
ErrKeyRequired = errors.New("key required")
|
||||
|
||||
// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
|
||||
ErrKeyTooLarge = errors.New("key too large")
|
||||
|
||||
// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
|
||||
ErrValueTooLarge = errors.New("value too large")
|
||||
|
||||
// ErrIncompatibleValue is returned when trying create or delete a bucket
|
||||
// on an existing non-bucket key or when trying to create or delete a
|
||||
// non-bucket key on an existing bucket key.
|
||||
ErrIncompatibleValue = errors.New("incompatible value")
|
||||
)
|
173
freelist.go
173
freelist.go
|
@ -4,50 +4,52 @@ import (
|
|||
"fmt"
|
||||
"sort"
|
||||
"unsafe"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// txPending holds a list of pgids and corresponding allocation txns
|
||||
// that are pending to be freed.
|
||||
type txPending struct {
|
||||
ids []pgid
|
||||
alloctx []txid // txids allocating the ids
|
||||
lastReleaseBegin txid // beginning txid of last matching releaseRange
|
||||
ids []common.Pgid
|
||||
alloctx []common.Txid // txids allocating the ids
|
||||
lastReleaseBegin common.Txid // beginning txid of last matching releaseRange
|
||||
}
|
||||
|
||||
// pidSet holds the set of starting pgids which have the same span size
|
||||
type pidSet map[pgid]struct{}
|
||||
type pidSet map[common.Pgid]struct{}
|
||||
|
||||
// freelist represents a list of all pages that are available for allocation.
|
||||
// It also tracks pages that have been freed but are still in use by open transactions.
|
||||
type freelist struct {
|
||||
freelistType FreelistType // freelist type
|
||||
ids []pgid // all free and available free page ids.
|
||||
allocs map[pgid]txid // mapping of txid that allocated a pgid.
|
||||
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
|
||||
cache map[pgid]struct{} // fast lookup of all free and pending page ids.
|
||||
freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
|
||||
forwardMap map[pgid]uint64 // key is start pgid, value is its span size
|
||||
backwardMap map[pgid]uint64 // key is end pgid, value is its span size
|
||||
allocate func(txid txid, n int) pgid // the freelist allocate func
|
||||
free_count func() int // the function which gives you free page number
|
||||
mergeSpans func(ids pgids) // the mergeSpan func
|
||||
getFreePageIDs func() []pgid // get free pgids func
|
||||
readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist
|
||||
freelistType common.FreelistType // freelist type
|
||||
ids []common.Pgid // all free and available free page ids.
|
||||
allocs map[common.Pgid]common.Txid // mapping of Txid that allocated a pgid.
|
||||
pending map[common.Txid]*txPending // mapping of soon-to-be free page ids by tx.
|
||||
cache map[common.Pgid]struct{} // fast lookup of all free and pending page ids.
|
||||
freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
|
||||
forwardMap map[common.Pgid]uint64 // key is start pgid, value is its span size
|
||||
backwardMap map[common.Pgid]uint64 // key is end pgid, value is its span size
|
||||
allocate func(txid common.Txid, n int) common.Pgid // the freelist allocate func
|
||||
free_count func() int // the function which gives you free page number
|
||||
mergeSpans func(ids common.Pgids) // the mergeSpan func
|
||||
getFreePageIDs func() []common.Pgid // get free pgids func
|
||||
readIDs func(pgids []common.Pgid) // readIDs func reads list of pages and init the freelist
|
||||
}
|
||||
|
||||
// newFreelist returns an empty, initialized freelist.
|
||||
func newFreelist(freelistType FreelistType) *freelist {
|
||||
func newFreelist(freelistType common.FreelistType) *freelist {
|
||||
f := &freelist{
|
||||
freelistType: freelistType,
|
||||
allocs: make(map[pgid]txid),
|
||||
pending: make(map[txid]*txPending),
|
||||
cache: make(map[pgid]struct{}),
|
||||
allocs: make(map[common.Pgid]common.Txid),
|
||||
pending: make(map[common.Txid]*txPending),
|
||||
cache: make(map[common.Pgid]struct{}),
|
||||
freemaps: make(map[uint64]pidSet),
|
||||
forwardMap: make(map[pgid]uint64),
|
||||
backwardMap: make(map[pgid]uint64),
|
||||
forwardMap: make(map[common.Pgid]uint64),
|
||||
backwardMap: make(map[common.Pgid]uint64),
|
||||
}
|
||||
|
||||
if freelistType == FreelistMapType {
|
||||
if freelistType == common.FreelistMapType {
|
||||
f.allocate = f.hashmapAllocate
|
||||
f.free_count = f.hashmapFreeCount
|
||||
f.mergeSpans = f.hashmapMergeSpans
|
||||
|
@ -71,7 +73,7 @@ func (f *freelist) size() int {
|
|||
// The first element will be used to store the count. See freelist.write.
|
||||
n++
|
||||
}
|
||||
return int(pageHeaderSize) + (int(unsafe.Sizeof(pgid(0))) * n)
|
||||
return int(common.PageHeaderSize) + (int(unsafe.Sizeof(common.Pgid(0))) * n)
|
||||
}
|
||||
|
||||
// count returns count of pages on the freelist
|
||||
|
@ -95,23 +97,23 @@ func (f *freelist) pending_count() int {
|
|||
|
||||
// copyall copies a list of all free ids and all pending ids in one sorted list.
|
||||
// f.count returns the minimum length required for dst.
|
||||
func (f *freelist) copyall(dst []pgid) {
|
||||
m := make(pgids, 0, f.pending_count())
|
||||
func (f *freelist) copyall(dst []common.Pgid) {
|
||||
m := make(common.Pgids, 0, f.pending_count())
|
||||
for _, txp := range f.pending {
|
||||
m = append(m, txp.ids...)
|
||||
}
|
||||
sort.Sort(m)
|
||||
mergepgids(dst, f.getFreePageIDs(), m)
|
||||
common.Mergepgids(dst, f.getFreePageIDs(), m)
|
||||
}
|
||||
|
||||
// arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
|
||||
// If a contiguous block cannot be found then 0 is returned.
|
||||
func (f *freelist) arrayAllocate(txid txid, n int) pgid {
|
||||
func (f *freelist) arrayAllocate(txid common.Txid, n int) common.Pgid {
|
||||
if len(f.ids) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var initial, previd pgid
|
||||
var initial, previd common.Pgid
|
||||
for i, id := range f.ids {
|
||||
if id <= 1 {
|
||||
panic(fmt.Sprintf("invalid page allocation: %d", id))
|
||||
|
@ -123,7 +125,7 @@ func (f *freelist) arrayAllocate(txid txid, n int) pgid {
|
|||
}
|
||||
|
||||
// If we found a contiguous block then remove it and return it.
|
||||
if (id-initial)+1 == pgid(n) {
|
||||
if (id-initial)+1 == common.Pgid(n) {
|
||||
// If we're allocating off the beginning then take the fast path
|
||||
// and just adjust the existing slice. This will use extra memory
|
||||
// temporarily but the append() in free() will realloc the slice
|
||||
|
@ -136,7 +138,7 @@ func (f *freelist) arrayAllocate(txid txid, n int) pgid {
|
|||
}
|
||||
|
||||
// Remove from the free cache.
|
||||
for i := pgid(0); i < pgid(n); i++ {
|
||||
for i := common.Pgid(0); i < common.Pgid(n); i++ {
|
||||
delete(f.cache, initial+i)
|
||||
}
|
||||
f.allocs[initial] = txid
|
||||
|
@ -150,9 +152,9 @@ func (f *freelist) arrayAllocate(txid txid, n int) pgid {
|
|||
|
||||
// free releases a page and its overflow for a given transaction id.
|
||||
// If the page is already free then a panic will occur.
|
||||
func (f *freelist) free(txid txid, p *page) {
|
||||
if p.id <= 1 {
|
||||
panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
|
||||
func (f *freelist) free(txid common.Txid, p *common.Page) {
|
||||
if p.Id() <= 1 {
|
||||
panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.Id()))
|
||||
}
|
||||
|
||||
// Free page and all its overflow pages.
|
||||
|
@ -161,15 +163,15 @@ func (f *freelist) free(txid txid, p *page) {
|
|||
txp = &txPending{}
|
||||
f.pending[txid] = txp
|
||||
}
|
||||
allocTxid, ok := f.allocs[p.id]
|
||||
allocTxid, ok := f.allocs[p.Id()]
|
||||
if ok {
|
||||
delete(f.allocs, p.id)
|
||||
} else if (p.flags & freelistPageFlag) != 0 {
|
||||
delete(f.allocs, p.Id())
|
||||
} else if (p.Flags() & common.FreelistPageFlag) != 0 {
|
||||
// Freelist is always allocated by prior tx.
|
||||
allocTxid = txid - 1
|
||||
}
|
||||
|
||||
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
|
||||
for id := p.Id(); id <= p.Id()+common.Pgid(p.Overflow()); id++ {
|
||||
// Verify that page is not already free.
|
||||
if _, ok := f.cache[id]; ok {
|
||||
panic(fmt.Sprintf("page %d already freed", id))
|
||||
|
@ -182,8 +184,8 @@ func (f *freelist) free(txid txid, p *page) {
|
|||
}
|
||||
|
||||
// release moves all page ids for a transaction id (or older) to the freelist.
|
||||
func (f *freelist) release(txid txid) {
|
||||
m := make(pgids, 0)
|
||||
func (f *freelist) release(txid common.Txid) {
|
||||
m := make(common.Pgids, 0)
|
||||
for tid, txp := range f.pending {
|
||||
if tid <= txid {
|
||||
// Move transaction's pending pages to the available freelist.
|
||||
|
@ -196,11 +198,11 @@ func (f *freelist) release(txid txid) {
|
|||
}
|
||||
|
||||
// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
|
||||
func (f *freelist) releaseRange(begin, end txid) {
|
||||
func (f *freelist) releaseRange(begin, end common.Txid) {
|
||||
if begin > end {
|
||||
return
|
||||
}
|
||||
var m pgids
|
||||
var m common.Pgids
|
||||
for tid, txp := range f.pending {
|
||||
if tid < begin || tid > end {
|
||||
continue
|
||||
|
@ -229,13 +231,13 @@ func (f *freelist) releaseRange(begin, end txid) {
|
|||
}
|
||||
|
||||
// rollback removes the pages from a given pending tx.
|
||||
func (f *freelist) rollback(txid txid) {
|
||||
func (f *freelist) rollback(txid common.Txid) {
|
||||
// Remove page ids from cache.
|
||||
txp := f.pending[txid]
|
||||
if txp == nil {
|
||||
return
|
||||
}
|
||||
var m pgids
|
||||
var m common.Pgids
|
||||
for i, pgid := range txp.ids {
|
||||
delete(f.cache, pgid)
|
||||
tx := txp.alloctx[i]
|
||||
|
@ -256,82 +258,69 @@ func (f *freelist) rollback(txid txid) {
|
|||
}
|
||||
|
||||
// freed returns whether a given page is in the free list.
|
||||
func (f *freelist) freed(pgId pgid) bool {
|
||||
func (f *freelist) freed(pgId common.Pgid) bool {
|
||||
_, ok := f.cache[pgId]
|
||||
return ok
|
||||
}
|
||||
|
||||
// read initializes the freelist from a freelist page.
|
||||
func (f *freelist) read(p *page) {
|
||||
if (p.flags & freelistPageFlag) == 0 {
|
||||
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.id, p.typ()))
|
||||
}
|
||||
// If the page.count is at the max uint16 value (64k) then it's considered
|
||||
// an overflow and the size of the freelist is stored as the first element.
|
||||
var idx, count = 0, int(p.count)
|
||||
if count == 0xFFFF {
|
||||
idx = 1
|
||||
c := *(*pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
|
||||
count = int(c)
|
||||
if count < 0 {
|
||||
panic(fmt.Sprintf("leading element count %d overflows int", c))
|
||||
}
|
||||
func (f *freelist) read(p *common.Page) {
|
||||
if (p.Flags() & common.FreelistPageFlag) == 0 {
|
||||
panic(fmt.Sprintf("invalid freelist page: %d, page type is %s", p.Id(), p.Typ()))
|
||||
}
|
||||
|
||||
ids := p.FreelistPageIds()
|
||||
|
||||
// Copy the list of page ids from the freelist.
|
||||
if count == 0 {
|
||||
if len(ids) == 0 {
|
||||
f.ids = nil
|
||||
} else {
|
||||
var ids []pgid
|
||||
data := unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), unsafe.Sizeof(ids[0]), idx)
|
||||
unsafeSlice(unsafe.Pointer(&ids), data, count)
|
||||
|
||||
// copy the ids, so we don't modify on the freelist page directly
|
||||
idsCopy := make([]pgid, count)
|
||||
idsCopy := make([]common.Pgid, len(ids))
|
||||
copy(idsCopy, ids)
|
||||
// Make sure they're sorted.
|
||||
sort.Sort(pgids(idsCopy))
|
||||
sort.Sort(common.Pgids(idsCopy))
|
||||
|
||||
f.readIDs(idsCopy)
|
||||
}
|
||||
}
|
||||
|
||||
// arrayReadIDs initializes the freelist from a given list of ids.
|
||||
func (f *freelist) arrayReadIDs(ids []pgid) {
|
||||
func (f *freelist) arrayReadIDs(ids []common.Pgid) {
|
||||
f.ids = ids
|
||||
f.reindex()
|
||||
}
|
||||
|
||||
func (f *freelist) arrayGetFreePageIDs() []pgid {
|
||||
func (f *freelist) arrayGetFreePageIDs() []common.Pgid {
|
||||
return f.ids
|
||||
}
|
||||
|
||||
// write writes the page ids onto a freelist page. All free and pending ids are
|
||||
// saved to disk since in the event of a program crash, all pending ids will
|
||||
// become free.
|
||||
func (f *freelist) write(p *page) error {
|
||||
func (f *freelist) write(p *common.Page) error {
|
||||
// Combine the old free pgids and pgids waiting on an open transaction.
|
||||
|
||||
// Update the header flag.
|
||||
p.flags |= freelistPageFlag
|
||||
p.FlagsXOR(common.FreelistPageFlag)
|
||||
|
||||
// The page.count can only hold up to 64k elements so if we overflow that
|
||||
// number then we handle it by putting the size in the first element.
|
||||
l := f.count()
|
||||
if l == 0 {
|
||||
p.count = uint16(l)
|
||||
p.SetCount(uint16(l))
|
||||
} else if l < 0xFFFF {
|
||||
p.count = uint16(l)
|
||||
var ids []pgid
|
||||
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
|
||||
unsafeSlice(unsafe.Pointer(&ids), data, l)
|
||||
p.SetCount(uint16(l))
|
||||
var ids []common.Pgid
|
||||
data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
|
||||
common.UnsafeSlice(unsafe.Pointer(&ids), data, l)
|
||||
f.copyall(ids)
|
||||
} else {
|
||||
p.count = 0xFFFF
|
||||
var ids []pgid
|
||||
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
|
||||
unsafeSlice(unsafe.Pointer(&ids), data, l+1)
|
||||
ids[0] = pgid(l)
|
||||
p.SetCount(0xFFFF)
|
||||
var ids []common.Pgid
|
||||
data := common.UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
|
||||
common.UnsafeSlice(unsafe.Pointer(&ids), data, l+1)
|
||||
ids[0] = common.Pgid(l)
|
||||
f.copyall(ids[1:])
|
||||
}
|
||||
|
||||
|
@ -339,11 +328,11 @@ func (f *freelist) write(p *page) error {
|
|||
}
|
||||
|
||||
// reload reads the freelist from a page and filters out pending items.
|
||||
func (f *freelist) reload(p *page) {
|
||||
func (f *freelist) reload(p *common.Page) {
|
||||
f.read(p)
|
||||
|
||||
// Build a cache of only pending pages.
|
||||
pcache := make(map[pgid]bool)
|
||||
pcache := make(map[common.Pgid]bool)
|
||||
for _, txp := range f.pending {
|
||||
for _, pendingID := range txp.ids {
|
||||
pcache[pendingID] = true
|
||||
|
@ -352,7 +341,7 @@ func (f *freelist) reload(p *page) {
|
|||
|
||||
// Check each page in the freelist and build a new available freelist
|
||||
// with any pages not in the pending lists.
|
||||
var a []pgid
|
||||
var a []common.Pgid
|
||||
for _, id := range f.getFreePageIDs() {
|
||||
if !pcache[id] {
|
||||
a = append(a, id)
|
||||
|
@ -362,10 +351,10 @@ func (f *freelist) reload(p *page) {
|
|||
f.readIDs(a)
|
||||
}
|
||||
|
||||
// noSyncReload reads the freelist from pgids and filters out pending items.
|
||||
func (f *freelist) noSyncReload(pgids []pgid) {
|
||||
// noSyncReload reads the freelist from Pgids and filters out pending items.
|
||||
func (f *freelist) noSyncReload(Pgids []common.Pgid) {
|
||||
// Build a cache of only pending pages.
|
||||
pcache := make(map[pgid]bool)
|
||||
pcache := make(map[common.Pgid]bool)
|
||||
for _, txp := range f.pending {
|
||||
for _, pendingID := range txp.ids {
|
||||
pcache[pendingID] = true
|
||||
|
@ -374,8 +363,8 @@ func (f *freelist) noSyncReload(pgids []pgid) {
|
|||
|
||||
// Check each page in the freelist and build a new available freelist
|
||||
// with any pages not in the pending lists.
|
||||
var a []pgid
|
||||
for _, id := range pgids {
|
||||
var a []common.Pgid
|
||||
for _, id := range Pgids {
|
||||
if !pcache[id] {
|
||||
a = append(a, id)
|
||||
}
|
||||
|
@ -387,7 +376,7 @@ func (f *freelist) noSyncReload(pgids []pgid) {
|
|||
// reindex rebuilds the free cache based on available and pending free lists.
|
||||
func (f *freelist) reindex() {
|
||||
ids := f.getFreePageIDs()
|
||||
f.cache = make(map[pgid]struct{}, len(ids))
|
||||
f.cache = make(map[common.Pgid]struct{}, len(ids))
|
||||
for _, id := range ids {
|
||||
f.cache[id] = struct{}{}
|
||||
}
|
||||
|
@ -399,7 +388,7 @@ func (f *freelist) reindex() {
|
|||
}
|
||||
|
||||
// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
|
||||
func (f *freelist) arrayMergeSpans(ids pgids) {
|
||||
func (f *freelist) arrayMergeSpans(ids common.Pgids) {
|
||||
sort.Sort(ids)
|
||||
f.ids = pgids(f.ids).merge(ids)
|
||||
f.ids = common.Pgids(f.ids).Merge(ids)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
package bbolt
|
||||
|
||||
import "sort"
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// hashmapFreeCount returns count of free pages(hashmap version)
|
||||
func (f *freelist) hashmapFreeCount() int {
|
||||
|
@ -13,7 +17,7 @@ func (f *freelist) hashmapFreeCount() int {
|
|||
}
|
||||
|
||||
// hashmapAllocate serves the same purpose as arrayAllocate, but use hashmap as backend
|
||||
func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
|
||||
func (f *freelist) hashmapAllocate(txid common.Txid, n int) common.Pgid {
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
|
@ -26,7 +30,7 @@ func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
|
|||
|
||||
f.allocs[pid] = txid
|
||||
|
||||
for i := pgid(0); i < pgid(n); i++ {
|
||||
for i := common.Pgid(0); i < common.Pgid(n); i++ {
|
||||
delete(f.cache, pid+i)
|
||||
}
|
||||
return pid
|
||||
|
@ -48,9 +52,9 @@ func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
|
|||
remain := size - uint64(n)
|
||||
|
||||
// add remain span
|
||||
f.addSpan(pid+pgid(n), remain)
|
||||
f.addSpan(pid+common.Pgid(n), remain)
|
||||
|
||||
for i := pgid(0); i < pgid(n); i++ {
|
||||
for i := common.Pgid(0); i < common.Pgid(n); i++ {
|
||||
delete(f.cache, pid+i)
|
||||
}
|
||||
return pid
|
||||
|
@ -61,7 +65,7 @@ func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
|
|||
}
|
||||
|
||||
// hashmapReadIDs reads pgids as input an initial the freelist(hashmap version)
|
||||
func (f *freelist) hashmapReadIDs(pgids []pgid) {
|
||||
func (f *freelist) hashmapReadIDs(pgids []common.Pgid) {
|
||||
f.init(pgids)
|
||||
|
||||
// Rebuild the page cache.
|
||||
|
@ -69,25 +73,25 @@ func (f *freelist) hashmapReadIDs(pgids []pgid) {
|
|||
}
|
||||
|
||||
// hashmapGetFreePageIDs returns the sorted free page ids
|
||||
func (f *freelist) hashmapGetFreePageIDs() []pgid {
|
||||
func (f *freelist) hashmapGetFreePageIDs() []common.Pgid {
|
||||
count := f.free_count()
|
||||
if count == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
m := make([]pgid, 0, count)
|
||||
m := make([]common.Pgid, 0, count)
|
||||
for start, size := range f.forwardMap {
|
||||
for i := 0; i < int(size); i++ {
|
||||
m = append(m, start+pgid(i))
|
||||
m = append(m, start+common.Pgid(i))
|
||||
}
|
||||
}
|
||||
sort.Sort(pgids(m))
|
||||
sort.Sort(common.Pgids(m))
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
// hashmapMergeSpans try to merge list of pages(represented by pgids) with existing spans
|
||||
func (f *freelist) hashmapMergeSpans(ids pgids) {
|
||||
func (f *freelist) hashmapMergeSpans(ids common.Pgids) {
|
||||
for _, id := range ids {
|
||||
// try to see if we can merge and update
|
||||
f.mergeWithExistingSpan(id)
|
||||
|
@ -95,7 +99,7 @@ func (f *freelist) hashmapMergeSpans(ids pgids) {
|
|||
}
|
||||
|
||||
// mergeWithExistingSpan merges pid to the existing free spans, try to merge it backward and forward
|
||||
func (f *freelist) mergeWithExistingSpan(pid pgid) {
|
||||
func (f *freelist) mergeWithExistingSpan(pid common.Pgid) {
|
||||
prev := pid - 1
|
||||
next := pid + 1
|
||||
|
||||
|
@ -106,10 +110,10 @@ func (f *freelist) mergeWithExistingSpan(pid pgid) {
|
|||
|
||||
if mergeWithPrev {
|
||||
//merge with previous span
|
||||
start := prev + 1 - pgid(preSize)
|
||||
start := prev + 1 - common.Pgid(preSize)
|
||||
f.delSpan(start, preSize)
|
||||
|
||||
newStart -= pgid(preSize)
|
||||
newStart -= common.Pgid(preSize)
|
||||
newSize += preSize
|
||||
}
|
||||
|
||||
|
@ -122,19 +126,19 @@ func (f *freelist) mergeWithExistingSpan(pid pgid) {
|
|||
f.addSpan(newStart, newSize)
|
||||
}
|
||||
|
||||
func (f *freelist) addSpan(start pgid, size uint64) {
|
||||
f.backwardMap[start-1+pgid(size)] = size
|
||||
func (f *freelist) addSpan(start common.Pgid, size uint64) {
|
||||
f.backwardMap[start-1+common.Pgid(size)] = size
|
||||
f.forwardMap[start] = size
|
||||
if _, ok := f.freemaps[size]; !ok {
|
||||
f.freemaps[size] = make(map[pgid]struct{})
|
||||
f.freemaps[size] = make(map[common.Pgid]struct{})
|
||||
}
|
||||
|
||||
f.freemaps[size][start] = struct{}{}
|
||||
}
|
||||
|
||||
func (f *freelist) delSpan(start pgid, size uint64) {
|
||||
func (f *freelist) delSpan(start common.Pgid, size uint64) {
|
||||
delete(f.forwardMap, start)
|
||||
delete(f.backwardMap, start+pgid(size-1))
|
||||
delete(f.backwardMap, start+common.Pgid(size-1))
|
||||
delete(f.freemaps[size], start)
|
||||
if len(f.freemaps[size]) == 0 {
|
||||
delete(f.freemaps, size)
|
||||
|
@ -143,7 +147,7 @@ func (f *freelist) delSpan(start pgid, size uint64) {
|
|||
|
||||
// initial from pgids using when use hashmap version
|
||||
// pgids must be sorted
|
||||
func (f *freelist) init(pgids []pgid) {
|
||||
func (f *freelist) init(pgids []common.Pgid) {
|
||||
if len(pgids) == 0 {
|
||||
return
|
||||
}
|
||||
|
@ -151,13 +155,13 @@ func (f *freelist) init(pgids []pgid) {
|
|||
size := uint64(1)
|
||||
start := pgids[0]
|
||||
|
||||
if !sort.SliceIsSorted([]pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) {
|
||||
if !sort.SliceIsSorted([]common.Pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) {
|
||||
panic("pgids not sorted")
|
||||
}
|
||||
|
||||
f.freemaps = make(map[uint64]pidSet)
|
||||
f.forwardMap = make(map[pgid]uint64)
|
||||
f.backwardMap = make(map[pgid]uint64)
|
||||
f.forwardMap = make(map[common.Pgid]uint64)
|
||||
f.backwardMap = make(map[common.Pgid]uint64)
|
||||
|
||||
for i := 1; i < len(pgids); i++ {
|
||||
// continuous page
|
||||
|
|
142
freelist_test.go
142
freelist_test.go
|
@ -7,6 +7,8 @@ import (
|
|||
"sort"
|
||||
"testing"
|
||||
"unsafe"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// TestFreelistType is used as a env variable for test to indicate the backend type
|
||||
|
@ -15,17 +17,17 @@ const TestFreelistType = "TEST_FREELIST_TYPE"
|
|||
// Ensure that a page is added to a transaction's freelist.
|
||||
func TestFreelist_free(t *testing.T) {
|
||||
f := newTestFreelist()
|
||||
f.free(100, &page{id: 12})
|
||||
if !reflect.DeepEqual([]pgid{12}, f.pending[100].ids) {
|
||||
t.Fatalf("exp=%v; got=%v", []pgid{12}, f.pending[100].ids)
|
||||
f.free(100, common.NewPage(12, 0, 0, 0))
|
||||
if !reflect.DeepEqual([]common.Pgid{12}, f.pending[100].ids) {
|
||||
t.Fatalf("exp=%v; got=%v", []common.Pgid{12}, f.pending[100].ids)
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure that a page and its overflow is added to a transaction's freelist.
|
||||
func TestFreelist_free_overflow(t *testing.T) {
|
||||
f := newTestFreelist()
|
||||
f.free(100, &page{id: 12, overflow: 3})
|
||||
if exp := []pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100].ids) {
|
||||
f.free(100, common.NewPage(12, 0, 0, 3))
|
||||
if exp := []common.Pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100].ids) {
|
||||
t.Fatalf("exp=%v; got=%v", exp, f.pending[100].ids)
|
||||
}
|
||||
}
|
||||
|
@ -33,17 +35,17 @@ func TestFreelist_free_overflow(t *testing.T) {
|
|||
// Ensure that a transaction's free pages can be released.
|
||||
func TestFreelist_release(t *testing.T) {
|
||||
f := newTestFreelist()
|
||||
f.free(100, &page{id: 12, overflow: 1})
|
||||
f.free(100, &page{id: 9})
|
||||
f.free(102, &page{id: 39})
|
||||
f.free(100, common.NewPage(12, 0, 0, 1))
|
||||
f.free(100, common.NewPage(9, 0, 0, 0))
|
||||
f.free(102, common.NewPage(39, 0, 0, 0))
|
||||
f.release(100)
|
||||
f.release(101)
|
||||
if exp := []pgid{9, 12, 13}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
|
||||
if exp := []common.Pgid{9, 12, 13}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
|
||||
t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs())
|
||||
}
|
||||
|
||||
f.release(102)
|
||||
if exp := []pgid{9, 12, 13, 39}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
|
||||
if exp := []common.Pgid{9, 12, 13, 39}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
|
||||
t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs())
|
||||
}
|
||||
}
|
||||
|
@ -51,33 +53,33 @@ func TestFreelist_release(t *testing.T) {
|
|||
// Ensure that releaseRange handles boundary conditions correctly
|
||||
func TestFreelist_releaseRange(t *testing.T) {
|
||||
type testRange struct {
|
||||
begin, end txid
|
||||
begin, end common.Txid
|
||||
}
|
||||
|
||||
type testPage struct {
|
||||
id pgid
|
||||
id common.Pgid
|
||||
n int
|
||||
allocTxn txid
|
||||
freeTxn txid
|
||||
allocTxn common.Txid
|
||||
freeTxn common.Txid
|
||||
}
|
||||
|
||||
var releaseRangeTests = []struct {
|
||||
title string
|
||||
pagesIn []testPage
|
||||
releaseRanges []testRange
|
||||
wantFree []pgid
|
||||
wantFree []common.Pgid
|
||||
}{
|
||||
{
|
||||
title: "Single pending in range",
|
||||
pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
|
||||
releaseRanges: []testRange{{1, 300}},
|
||||
wantFree: []pgid{3},
|
||||
wantFree: []common.Pgid{3},
|
||||
},
|
||||
{
|
||||
title: "Single pending with minimum end range",
|
||||
pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
|
||||
releaseRanges: []testRange{{1, 200}},
|
||||
wantFree: []pgid{3},
|
||||
wantFree: []common.Pgid{3},
|
||||
},
|
||||
{
|
||||
title: "Single pending outsize minimum end range",
|
||||
|
@ -89,7 +91,7 @@ func TestFreelist_releaseRange(t *testing.T) {
|
|||
title: "Single pending with minimum begin range",
|
||||
pagesIn: []testPage{{id: 3, n: 1, allocTxn: 100, freeTxn: 200}},
|
||||
releaseRanges: []testRange{{100, 300}},
|
||||
wantFree: []pgid{3},
|
||||
wantFree: []common.Pgid{3},
|
||||
},
|
||||
{
|
||||
title: "Single pending outside minimum begin range",
|
||||
|
@ -101,7 +103,7 @@ func TestFreelist_releaseRange(t *testing.T) {
|
|||
title: "Single pending in minimum range",
|
||||
pagesIn: []testPage{{id: 3, n: 1, allocTxn: 199, freeTxn: 200}},
|
||||
releaseRanges: []testRange{{199, 200}},
|
||||
wantFree: []pgid{3},
|
||||
wantFree: []common.Pgid{3},
|
||||
},
|
||||
{
|
||||
title: "Single pending and read transaction at 199",
|
||||
|
@ -146,16 +148,16 @@ func TestFreelist_releaseRange(t *testing.T) {
|
|||
{id: 9, n: 2, allocTxn: 175, freeTxn: 200},
|
||||
},
|
||||
releaseRanges: []testRange{{50, 149}, {151, 300}},
|
||||
wantFree: []pgid{4, 9, 10},
|
||||
wantFree: []common.Pgid{4, 9, 10},
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range releaseRangeTests {
|
||||
f := newTestFreelist()
|
||||
var ids []pgid
|
||||
var ids []common.Pgid
|
||||
for _, p := range c.pagesIn {
|
||||
for i := uint64(0); i < uint64(p.n); i++ {
|
||||
ids = append(ids, pgid(uint64(p.id)+i))
|
||||
ids = append(ids, common.Pgid(uint64(p.id)+i))
|
||||
}
|
||||
}
|
||||
f.readIDs(ids)
|
||||
|
@ -164,7 +166,7 @@ func TestFreelist_releaseRange(t *testing.T) {
|
|||
}
|
||||
|
||||
for _, p := range c.pagesIn {
|
||||
f.free(p.freeTxn, &page{id: p.id, overflow: uint32(p.n - 1)})
|
||||
f.free(p.freeTxn, common.NewPage(p.id, 0, 0, uint32(p.n-1)))
|
||||
}
|
||||
|
||||
for _, r := range c.releaseRanges {
|
||||
|
@ -179,11 +181,11 @@ func TestFreelist_releaseRange(t *testing.T) {
|
|||
|
||||
func TestFreelistHashmap_allocate(t *testing.T) {
|
||||
f := newTestFreelist()
|
||||
if f.freelistType != FreelistMapType {
|
||||
if f.freelistType != common.FreelistMapType {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
ids := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
|
||||
ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
|
||||
f.readIDs(ids)
|
||||
|
||||
f.allocate(1, 3)
|
||||
|
@ -209,10 +211,10 @@ func TestFreelistHashmap_allocate(t *testing.T) {
|
|||
// Ensure that a freelist can find contiguous blocks of pages.
|
||||
func TestFreelistArray_allocate(t *testing.T) {
|
||||
f := newTestFreelist()
|
||||
if f.freelistType != FreelistArrayType {
|
||||
if f.freelistType != common.FreelistArrayType {
|
||||
t.Skip()
|
||||
}
|
||||
ids := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
|
||||
ids := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
|
||||
f.readIDs(ids)
|
||||
if id := int(f.allocate(1, 3)); id != 3 {
|
||||
t.Fatalf("exp=3; got=%v", id)
|
||||
|
@ -235,7 +237,7 @@ func TestFreelistArray_allocate(t *testing.T) {
|
|||
if id := int(f.allocate(1, 0)); id != 0 {
|
||||
t.Fatalf("exp=0; got=%v", id)
|
||||
}
|
||||
if exp := []pgid{9, 18}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
|
||||
if exp := []common.Pgid{9, 18}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
|
||||
t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs())
|
||||
}
|
||||
|
||||
|
@ -248,7 +250,7 @@ func TestFreelistArray_allocate(t *testing.T) {
|
|||
if id := int(f.allocate(1, 1)); id != 0 {
|
||||
t.Fatalf("exp=0; got=%v", id)
|
||||
}
|
||||
if exp := []pgid{}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
|
||||
if exp := []common.Pgid{}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
|
||||
t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs())
|
||||
}
|
||||
}
|
||||
|
@ -257,12 +259,12 @@ func TestFreelistArray_allocate(t *testing.T) {
|
|||
func TestFreelist_read(t *testing.T) {
|
||||
// Create a page.
|
||||
var buf [4096]byte
|
||||
page := (*page)(unsafe.Pointer(&buf[0]))
|
||||
page.flags = freelistPageFlag
|
||||
page.count = 2
|
||||
page := (*common.Page)(unsafe.Pointer(&buf[0]))
|
||||
page.SetFlags(common.FreelistPageFlag)
|
||||
page.SetCount(2)
|
||||
|
||||
// Insert 2 page ids.
|
||||
ids := (*[3]pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page)))
|
||||
ids := (*[3]common.Pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page)))
|
||||
ids[0] = 23
|
||||
ids[1] = 50
|
||||
|
||||
|
@ -271,7 +273,7 @@ func TestFreelist_read(t *testing.T) {
|
|||
f.read(page)
|
||||
|
||||
// Ensure that there are two page ids in the freelist.
|
||||
if exp := []pgid{23, 50}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
|
||||
if exp := []common.Pgid{23, 50}; !reflect.DeepEqual(exp, f.getFreePageIDs()) {
|
||||
t.Fatalf("exp=%v; got=%v", exp, f.getFreePageIDs())
|
||||
}
|
||||
}
|
||||
|
@ -282,10 +284,10 @@ func TestFreelist_write(t *testing.T) {
|
|||
var buf [4096]byte
|
||||
f := newTestFreelist()
|
||||
|
||||
f.readIDs([]pgid{12, 39})
|
||||
f.pending[100] = &txPending{ids: []pgid{28, 11}}
|
||||
f.pending[101] = &txPending{ids: []pgid{3}}
|
||||
p := (*page)(unsafe.Pointer(&buf[0]))
|
||||
f.readIDs([]common.Pgid{12, 39})
|
||||
f.pending[100] = &txPending{ids: []common.Pgid{28, 11}}
|
||||
f.pending[101] = &txPending{ids: []common.Pgid{3}}
|
||||
p := (*common.Page)(unsafe.Pointer(&buf[0]))
|
||||
if err := f.write(p); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -296,7 +298,7 @@ func TestFreelist_write(t *testing.T) {
|
|||
|
||||
// Ensure that the freelist is correct.
|
||||
// All pages should be present and in reverse order.
|
||||
if exp := []pgid{3, 11, 12, 28, 39}; !reflect.DeepEqual(exp, f2.getFreePageIDs()) {
|
||||
if exp := []common.Pgid{3, 11, 12, 28, 39}; !reflect.DeepEqual(exp, f2.getFreePageIDs()) {
|
||||
t.Fatalf("exp=%v; got=%v", exp, f2.getFreePageIDs())
|
||||
}
|
||||
}
|
||||
|
@ -313,17 +315,17 @@ func benchmark_FreelistRelease(b *testing.B, size int) {
|
|||
for i := 0; i < b.N; i++ {
|
||||
txp := &txPending{ids: pending}
|
||||
f := newTestFreelist()
|
||||
f.pending = map[txid]*txPending{1: txp}
|
||||
f.pending = map[common.Txid]*txPending{1: txp}
|
||||
f.readIDs(ids)
|
||||
f.release(1)
|
||||
}
|
||||
}
|
||||
|
||||
func randomPgids(n int) []pgid {
|
||||
func randomPgids(n int) []common.Pgid {
|
||||
rand.Seed(42)
|
||||
pgids := make(pgids, n)
|
||||
pgids := make(common.Pgids, n)
|
||||
for i := range pgids {
|
||||
pgids[i] = pgid(rand.Int63())
|
||||
pgids[i] = common.Pgid(rand.Int63())
|
||||
}
|
||||
sort.Sort(pgids)
|
||||
return pgids
|
||||
|
@ -331,7 +333,7 @@ func randomPgids(n int) []pgid {
|
|||
|
||||
func Test_freelist_ReadIDs_and_getFreePageIDs(t *testing.T) {
|
||||
f := newTestFreelist()
|
||||
exp := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
|
||||
exp := []common.Pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
|
||||
|
||||
f.readIDs(exp)
|
||||
|
||||
|
@ -340,7 +342,7 @@ func Test_freelist_ReadIDs_and_getFreePageIDs(t *testing.T) {
|
|||
}
|
||||
|
||||
f2 := newTestFreelist()
|
||||
var exp2 []pgid
|
||||
var exp2 []common.Pgid
|
||||
f2.readIDs(exp2)
|
||||
|
||||
if got2 := f2.getFreePageIDs(); !reflect.DeepEqual(got2, exp2) {
|
||||
|
@ -355,53 +357,53 @@ func Test_freelist_mergeWithExist(t *testing.T) {
|
|||
bm2 := pidSet{5: struct{}{}}
|
||||
tests := []struct {
|
||||
name string
|
||||
ids []pgid
|
||||
pgid pgid
|
||||
want []pgid
|
||||
wantForwardmap map[pgid]uint64
|
||||
wantBackwardmap map[pgid]uint64
|
||||
ids []common.Pgid
|
||||
pgid common.Pgid
|
||||
want []common.Pgid
|
||||
wantForwardmap map[common.Pgid]uint64
|
||||
wantBackwardmap map[common.Pgid]uint64
|
||||
wantfreemap map[uint64]pidSet
|
||||
}{
|
||||
{
|
||||
name: "test1",
|
||||
ids: []pgid{1, 2, 4, 5, 6},
|
||||
ids: []common.Pgid{1, 2, 4, 5, 6},
|
||||
pgid: 3,
|
||||
want: []pgid{1, 2, 3, 4, 5, 6},
|
||||
wantForwardmap: map[pgid]uint64{1: 6},
|
||||
wantBackwardmap: map[pgid]uint64{6: 6},
|
||||
want: []common.Pgid{1, 2, 3, 4, 5, 6},
|
||||
wantForwardmap: map[common.Pgid]uint64{1: 6},
|
||||
wantBackwardmap: map[common.Pgid]uint64{6: 6},
|
||||
wantfreemap: map[uint64]pidSet{6: bm1},
|
||||
},
|
||||
{
|
||||
name: "test2",
|
||||
ids: []pgid{1, 2, 5, 6},
|
||||
ids: []common.Pgid{1, 2, 5, 6},
|
||||
pgid: 3,
|
||||
want: []pgid{1, 2, 3, 5, 6},
|
||||
wantForwardmap: map[pgid]uint64{1: 3, 5: 2},
|
||||
wantBackwardmap: map[pgid]uint64{6: 2, 3: 3},
|
||||
want: []common.Pgid{1, 2, 3, 5, 6},
|
||||
wantForwardmap: map[common.Pgid]uint64{1: 3, 5: 2},
|
||||
wantBackwardmap: map[common.Pgid]uint64{6: 2, 3: 3},
|
||||
wantfreemap: map[uint64]pidSet{3: bm1, 2: bm2},
|
||||
},
|
||||
{
|
||||
name: "test3",
|
||||
ids: []pgid{1, 2},
|
||||
ids: []common.Pgid{1, 2},
|
||||
pgid: 3,
|
||||
want: []pgid{1, 2, 3},
|
||||
wantForwardmap: map[pgid]uint64{1: 3},
|
||||
wantBackwardmap: map[pgid]uint64{3: 3},
|
||||
want: []common.Pgid{1, 2, 3},
|
||||
wantForwardmap: map[common.Pgid]uint64{1: 3},
|
||||
wantBackwardmap: map[common.Pgid]uint64{3: 3},
|
||||
wantfreemap: map[uint64]pidSet{3: bm1},
|
||||
},
|
||||
{
|
||||
name: "test4",
|
||||
ids: []pgid{2, 3},
|
||||
ids: []common.Pgid{2, 3},
|
||||
pgid: 1,
|
||||
want: []pgid{1, 2, 3},
|
||||
wantForwardmap: map[pgid]uint64{1: 3},
|
||||
wantBackwardmap: map[pgid]uint64{3: 3},
|
||||
want: []common.Pgid{1, 2, 3},
|
||||
wantForwardmap: map[common.Pgid]uint64{1: 3},
|
||||
wantBackwardmap: map[common.Pgid]uint64{3: 3},
|
||||
wantfreemap: map[uint64]pidSet{3: bm1},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
f := newTestFreelist()
|
||||
if f.freelistType == FreelistArrayType {
|
||||
if f.freelistType == common.FreelistArrayType {
|
||||
t.Skip()
|
||||
}
|
||||
f.readIDs(tt.ids)
|
||||
|
@ -425,9 +427,9 @@ func Test_freelist_mergeWithExist(t *testing.T) {
|
|||
|
||||
// newTestFreelist get the freelist type from env and initial the freelist
|
||||
func newTestFreelist() *freelist {
|
||||
freelistType := FreelistArrayType
|
||||
if env := os.Getenv(TestFreelistType); env == string(FreelistMapType) {
|
||||
freelistType = FreelistMapType
|
||||
freelistType := common.FreelistArrayType
|
||||
if env := os.Getenv(TestFreelistType); env == string(common.FreelistMapType) {
|
||||
freelistType = common.FreelistMapType
|
||||
}
|
||||
|
||||
return newFreelist(freelistType)
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
var statsFlag = flag.Bool("stats", false, "show performance stats")
|
||||
|
@ -44,9 +45,9 @@ func MustOpenDBWithOption(t testing.TB, f string, o *bolt.Options) *DB {
|
|||
o = bolt.DefaultOptions
|
||||
}
|
||||
|
||||
freelistType := bolt.FreelistArrayType
|
||||
if env := os.Getenv(TestFreelistType); env == string(bolt.FreelistMapType) {
|
||||
freelistType = bolt.FreelistMapType
|
||||
freelistType := common.FreelistArrayType
|
||||
if env := os.Getenv(TestFreelistType); env == string(common.FreelistMapType) {
|
||||
freelistType = common.FreelistMapType
|
||||
}
|
||||
|
||||
o.FreelistType = freelistType
|
||||
|
|
|
@ -2,14 +2,13 @@ package guts_cli
|
|||
|
||||
// Low level access to pages / data-structures of the bbolt file.
|
||||
|
||||
// TODO(ptab): Merge with bbolt/page file that should get ported to internal.
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"unsafe"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -17,231 +16,9 @@ var (
|
|||
ErrCorrupt = errors.New("invalid value")
|
||||
)
|
||||
|
||||
// PageHeaderSize represents the size of the bolt.Page header.
|
||||
const PageHeaderSize = 16
|
||||
|
||||
// Represents a marker value to indicate that a file (Meta Page) is a Bolt DB.
|
||||
const magic uint32 = 0xED0CDAED
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
const maxAllocSize = 0xFFFFFFF
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
const (
|
||||
branchPageFlag = 0x01
|
||||
leafPageFlag = 0x02
|
||||
metaPageFlag = 0x04
|
||||
freelistPageFlag = 0x10
|
||||
)
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
const bucketLeafFlag = 0x01
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
type Pgid uint64
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
type txid uint64
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
type Meta struct {
|
||||
magic uint32
|
||||
version uint32
|
||||
pageSize uint32
|
||||
flags uint32
|
||||
root Bucket
|
||||
freelist Pgid
|
||||
pgid Pgid // High Water Mark (id of next added Page if the file growths)
|
||||
txid txid
|
||||
checksum uint64
|
||||
}
|
||||
|
||||
func LoadPageMeta(buf []byte) *Meta {
|
||||
return (*Meta)(unsafe.Pointer(&buf[PageHeaderSize]))
|
||||
}
|
||||
|
||||
func (m *Meta) RootBucket() *Bucket {
|
||||
return &m.root
|
||||
}
|
||||
|
||||
func (m *Meta) Txid() uint64 {
|
||||
return uint64(m.txid)
|
||||
}
|
||||
|
||||
func (m *Meta) Print(w io.Writer) {
|
||||
fmt.Fprintf(w, "Version: %d\n", m.version)
|
||||
fmt.Fprintf(w, "Page Size: %d bytes\n", m.pageSize)
|
||||
fmt.Fprintf(w, "Flags: %08x\n", m.flags)
|
||||
fmt.Fprintf(w, "Root: <pgid=%d>\n", m.root.root)
|
||||
fmt.Fprintf(w, "Freelist: <pgid=%d>\n", m.freelist)
|
||||
fmt.Fprintf(w, "HWM: <pgid=%d>\n", m.pgid)
|
||||
fmt.Fprintf(w, "Txn ID: %d\n", m.txid)
|
||||
fmt.Fprintf(w, "Checksum: %016x\n", m.checksum)
|
||||
fmt.Fprintf(w, "\n")
|
||||
}
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
type Bucket struct {
|
||||
root Pgid
|
||||
sequence uint64
|
||||
}
|
||||
|
||||
const bucketHeaderSize = int(unsafe.Sizeof(Bucket{}))
|
||||
|
||||
func LoadBucket(buf []byte) *Bucket {
|
||||
return (*Bucket)(unsafe.Pointer(&buf[0]))
|
||||
}
|
||||
|
||||
func (b *Bucket) String() string {
|
||||
return fmt.Sprintf("<pgid=%d,seq=%d>", b.root, b.sequence)
|
||||
}
|
||||
|
||||
func (b *Bucket) RootPage() Pgid {
|
||||
return b.root
|
||||
}
|
||||
|
||||
func (b *Bucket) InlinePage(v []byte) *Page {
|
||||
return (*Page)(unsafe.Pointer(&v[bucketHeaderSize]))
|
||||
}
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
type Page struct {
|
||||
id Pgid
|
||||
flags uint16
|
||||
count uint16
|
||||
overflow uint32
|
||||
ptr uintptr
|
||||
}
|
||||
|
||||
func LoadPage(buf []byte) *Page {
|
||||
return (*Page)(unsafe.Pointer(&buf[0]))
|
||||
}
|
||||
|
||||
func (p *Page) FreelistPageCount() int {
|
||||
// Check for overflow and, if present, adjust actual element count.
|
||||
if p.count == 0xFFFF {
|
||||
return int(((*[maxAllocSize]Pgid)(unsafe.Pointer(&p.ptr)))[0])
|
||||
} else {
|
||||
return int(p.count)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Page) FreelistPagePages() []Pgid {
|
||||
// Check for overflow and, if present, adjust starting index.
|
||||
idx := 0
|
||||
if p.count == 0xFFFF {
|
||||
idx = 1
|
||||
}
|
||||
return (*[maxAllocSize]Pgid)(unsafe.Pointer(&p.ptr))[idx:p.FreelistPageCount()]
|
||||
}
|
||||
|
||||
func (p *Page) Overflow() uint32 {
|
||||
return p.overflow
|
||||
}
|
||||
|
||||
func (p *Page) String() string {
|
||||
return fmt.Sprintf("ID: %d, Type: %s, count: %d, overflow: %d", p.id, p.Type(), p.count, p.overflow)
|
||||
}
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
|
||||
// TODO(ptabor): Make the page-types an enum.
|
||||
func (p *Page) Type() string {
|
||||
if (p.flags & branchPageFlag) != 0 {
|
||||
return "branch"
|
||||
} else if (p.flags & leafPageFlag) != 0 {
|
||||
return "leaf"
|
||||
} else if (p.flags & metaPageFlag) != 0 {
|
||||
return "meta"
|
||||
} else if (p.flags & freelistPageFlag) != 0 {
|
||||
return "freelist"
|
||||
}
|
||||
return fmt.Sprintf("unknown<%02x>", p.flags)
|
||||
}
|
||||
|
||||
func (p *Page) Count() uint16 {
|
||||
return p.count
|
||||
}
|
||||
|
||||
func (p *Page) Id() Pgid {
|
||||
return p.id
|
||||
}
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
func (p *Page) LeafPageElement(index uint16) *LeafPageElement {
|
||||
n := &((*[0x7FFFFFF]LeafPageElement)(unsafe.Pointer(&p.ptr)))[index]
|
||||
return n
|
||||
}
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
func (p *Page) BranchPageElement(index uint16) *BranchPageElement {
|
||||
return &((*[0x7FFFFFF]BranchPageElement)(unsafe.Pointer(&p.ptr)))[index]
|
||||
}
|
||||
|
||||
func (p *Page) SetId(target Pgid) {
|
||||
p.id = target
|
||||
}
|
||||
|
||||
func (p *Page) SetCount(target uint16) {
|
||||
p.count = target
|
||||
}
|
||||
|
||||
func (p *Page) SetOverflow(target uint32) {
|
||||
p.overflow = target
|
||||
}
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
type BranchPageElement struct {
|
||||
pos uint32
|
||||
ksize uint32
|
||||
pgid Pgid
|
||||
}
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
func (n *BranchPageElement) Key() []byte {
|
||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||
return buf[n.pos : n.pos+n.ksize]
|
||||
}
|
||||
|
||||
func (n *BranchPageElement) PgId() Pgid {
|
||||
return n.pgid
|
||||
}
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
type LeafPageElement struct {
|
||||
flags uint32
|
||||
pos uint32
|
||||
ksize uint32
|
||||
vsize uint32
|
||||
}
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
func (n *LeafPageElement) Key() []byte {
|
||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||
return buf[n.pos : n.pos+n.ksize]
|
||||
}
|
||||
|
||||
// DO NOT EDIT. Copied from the "bolt" package.
|
||||
func (n *LeafPageElement) Value() []byte {
|
||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||
return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
|
||||
}
|
||||
|
||||
func (n *LeafPageElement) IsBucketEntry() bool {
|
||||
return n.flags&uint32(bucketLeafFlag) != 0
|
||||
}
|
||||
|
||||
func (n *LeafPageElement) Bucket() *Bucket {
|
||||
if n.IsBucketEntry() {
|
||||
return LoadBucket(n.Value())
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// ReadPage reads Page info & full Page data from a path.
|
||||
// This is not transactionally safe.
|
||||
func ReadPage(path string, pageID uint64) (*Page, []byte, error) {
|
||||
func ReadPage(path string, pageID uint64) (*common.Page, []byte, error) {
|
||||
// Find Page size.
|
||||
pageSize, hwm, err := ReadPageAndHWMSize(path)
|
||||
if err != nil {
|
||||
|
@ -264,11 +41,11 @@ func ReadPage(path string, pageID uint64) (*Page, []byte, error) {
|
|||
}
|
||||
|
||||
// Determine total number of blocks.
|
||||
p := LoadPage(buf)
|
||||
if p.id != Pgid(pageID) {
|
||||
return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.id, pageID)
|
||||
p := common.LoadPage(buf)
|
||||
if p.Id() != common.Pgid(pageID) {
|
||||
return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.Id(), pageID)
|
||||
}
|
||||
overflowN := p.overflow
|
||||
overflowN := p.Overflow()
|
||||
if overflowN >= uint32(hwm)-3 { // we exclude 2 Meta pages and the current Page.
|
||||
return nil, nil, fmt.Errorf("error: %w, Page claims to have %d overflow pages (>=hwm=%d). Interrupting to avoid risky OOM", ErrCorrupt, overflowN, hwm)
|
||||
}
|
||||
|
@ -280,16 +57,16 @@ func ReadPage(path string, pageID uint64) (*Page, []byte, error) {
|
|||
} else if n != len(buf) {
|
||||
return nil, nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
p = LoadPage(buf)
|
||||
if p.id != Pgid(pageID) {
|
||||
return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.id, pageID)
|
||||
p = common.LoadPage(buf)
|
||||
if p.Id() != common.Pgid(pageID) {
|
||||
return nil, nil, fmt.Errorf("error: %w due to unexpected Page id: %d != %d", ErrCorrupt, p.Id(), pageID)
|
||||
}
|
||||
|
||||
return p, buf, nil
|
||||
}
|
||||
|
||||
func WritePage(path string, pageBuf []byte) error {
|
||||
page := LoadPage(pageBuf)
|
||||
page := common.LoadPage(pageBuf)
|
||||
pageSize, _, err := ReadPageAndHWMSize(path)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -309,7 +86,7 @@ func WritePage(path string, pageBuf []byte) error {
|
|||
|
||||
// ReadPageAndHWMSize reads Page size and HWM (id of the last+1 Page).
|
||||
// This is not transactionally safe.
|
||||
func ReadPageAndHWMSize(path string) (uint64, Pgid, error) {
|
||||
func ReadPageAndHWMSize(path string) (uint64, common.Pgid, error) {
|
||||
// Open database file.
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
|
@ -324,28 +101,28 @@ func ReadPageAndHWMSize(path string) (uint64, Pgid, error) {
|
|||
}
|
||||
|
||||
// Read Page size from metadata.
|
||||
m := LoadPageMeta(buf)
|
||||
if m.magic != magic {
|
||||
m := common.LoadPageMeta(buf)
|
||||
if m.Magic() != common.Magic {
|
||||
return 0, 0, fmt.Errorf("the Meta Page has wrong (unexpected) magic")
|
||||
}
|
||||
return uint64(m.pageSize), Pgid(m.pgid), nil
|
||||
return uint64(m.PageSize()), common.Pgid(m.Pgid()), nil
|
||||
}
|
||||
|
||||
// GetRootPage returns the root-page (according to the most recent transaction).
|
||||
func GetRootPage(path string) (root Pgid, activeMeta Pgid, err error) {
|
||||
func GetRootPage(path string) (root common.Pgid, activeMeta common.Pgid, err error) {
|
||||
_, buf0, err0 := ReadPage(path, 0)
|
||||
if err0 != nil {
|
||||
return 0, 0, err0
|
||||
}
|
||||
m0 := LoadPageMeta(buf0)
|
||||
m0 := common.LoadPageMeta(buf0)
|
||||
_, buf1, err1 := ReadPage(path, 1)
|
||||
if err1 != nil {
|
||||
return 0, 1, err1
|
||||
}
|
||||
m1 := LoadPageMeta(buf1)
|
||||
if m0.txid < m1.txid {
|
||||
return m1.root.root, 1, nil
|
||||
m1 := common.LoadPageMeta(buf1)
|
||||
if m0.Txid() < m1.Txid() {
|
||||
return m1.RootBucket().RootPage(), 1, nil
|
||||
} else {
|
||||
return m0.root.root, 0, nil
|
||||
return m0.RootBucket().RootPage(), 0, nil
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,10 +2,11 @@ package surgeon
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
"go.etcd.io/bbolt/internal/guts_cli"
|
||||
)
|
||||
|
||||
func CopyPage(path string, srcPage guts_cli.Pgid, target guts_cli.Pgid) error {
|
||||
func CopyPage(path string, srcPage common.Pgid, target common.Pgid) error {
|
||||
p1, d1, err1 := guts_cli.ReadPage(path, uint64(srcPage))
|
||||
if err1 != nil {
|
||||
return err1
|
||||
|
@ -14,7 +15,7 @@ func CopyPage(path string, srcPage guts_cli.Pgid, target guts_cli.Pgid) error {
|
|||
return guts_cli.WritePage(path, d1)
|
||||
}
|
||||
|
||||
func ClearPage(path string, pgId guts_cli.Pgid) error {
|
||||
func ClearPage(path string, pgId common.Pgid) error {
|
||||
// Read the page
|
||||
p, buf, err := guts_cli.ReadPage(path, uint64(pgId))
|
||||
if err != nil {
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"bytes"
|
||||
"fmt"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
"go.etcd.io/bbolt/internal/guts_cli"
|
||||
)
|
||||
|
||||
|
@ -20,7 +21,7 @@ func NewXRay(path string) XRay {
|
|||
return XRay{path}
|
||||
}
|
||||
|
||||
func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page, stack []guts_cli.Pgid) error) error {
|
||||
func (n XRay) traverse(stack []common.Pgid, callback func(page *common.Page, stack []common.Pgid) error) error {
|
||||
p, data, err := guts_cli.ReadPage(n.path, uint64(stack[len(stack)-1]))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading page (stack %v): %w", stack, err)
|
||||
|
@ -29,10 +30,10 @@ func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page,
|
|||
if err != nil {
|
||||
return fmt.Errorf("failed callback for page (stack %v): %w", stack, err)
|
||||
}
|
||||
switch p.Type() {
|
||||
switch p.Typ() {
|
||||
case "meta":
|
||||
{
|
||||
m := guts_cli.LoadPageMeta(data)
|
||||
m := common.LoadPageMeta(data)
|
||||
r := m.RootBucket().RootPage()
|
||||
return n.traverse(append(stack, r), callback)
|
||||
}
|
||||
|
@ -40,7 +41,7 @@ func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page,
|
|||
{
|
||||
for i := uint16(0); i < p.Count(); i++ {
|
||||
bpe := p.BranchPageElement(i)
|
||||
if err := n.traverse(append(stack, bpe.PgId()), callback); err != nil {
|
||||
if err := n.traverse(append(stack, bpe.Pgid()), callback); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -73,19 +74,19 @@ func (n XRay) traverse(stack []guts_cli.Pgid, callback func(page *guts_cli.Page,
|
|||
// As it traverses multiple buckets, so in theory there might be multiple keys with the given name.
|
||||
// Note: For simplicity it's currently implemented as traversing of the whole reachable tree.
|
||||
// If key is a bucket name, a page-path referencing the key will be returned as well.
|
||||
func (n XRay) FindPathsToKey(key []byte) ([][]guts_cli.Pgid, error) {
|
||||
var found [][]guts_cli.Pgid
|
||||
func (n XRay) FindPathsToKey(key []byte) ([][]common.Pgid, error) {
|
||||
var found [][]common.Pgid
|
||||
|
||||
rootPage, _, err := guts_cli.GetRootPage(n.path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = n.traverse([]guts_cli.Pgid{rootPage},
|
||||
func(page *guts_cli.Page, stack []guts_cli.Pgid) error {
|
||||
if page.Type() == "leaf" {
|
||||
err = n.traverse([]common.Pgid{rootPage},
|
||||
func(page *common.Page, stack []common.Pgid) error {
|
||||
if page.Typ() == "leaf" {
|
||||
for i := uint16(0); i < page.Count(); i++ {
|
||||
if bytes.Equal(page.LeafPageElement(i).Key(), key) {
|
||||
var copyPath []guts_cli.Pgid
|
||||
var copyPath []common.Pgid
|
||||
copyPath = append(copyPath, stack...)
|
||||
found = append(found, copyPath)
|
||||
}
|
||||
|
|
120
node.go
120
node.go
|
@ -5,6 +5,8 @@ import (
|
|||
"fmt"
|
||||
"sort"
|
||||
"unsafe"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// node represents an in-memory, deserialized page.
|
||||
|
@ -14,7 +16,7 @@ type node struct {
|
|||
unbalanced bool
|
||||
spilled bool
|
||||
key []byte
|
||||
pgid pgid
|
||||
pgid common.Pgid
|
||||
parent *node
|
||||
children nodes
|
||||
inodes inodes
|
||||
|
@ -38,7 +40,7 @@ func (n *node) minKeys() int {
|
|||
|
||||
// size returns the size of the node after serialization.
|
||||
func (n *node) size() int {
|
||||
sz, elsz := pageHeaderSize, n.pageElementSize()
|
||||
sz, elsz := common.PageHeaderSize, n.pageElementSize()
|
||||
for i := 0; i < len(n.inodes); i++ {
|
||||
item := &n.inodes[i]
|
||||
sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value))
|
||||
|
@ -50,7 +52,7 @@ func (n *node) size() int {
|
|||
// This is an optimization to avoid calculating a large node when we only need
|
||||
// to know if it fits inside a certain page size.
|
||||
func (n *node) sizeLessThan(v uintptr) bool {
|
||||
sz, elsz := pageHeaderSize, n.pageElementSize()
|
||||
sz, elsz := common.PageHeaderSize, n.pageElementSize()
|
||||
for i := 0; i < len(n.inodes); i++ {
|
||||
item := &n.inodes[i]
|
||||
sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value))
|
||||
|
@ -64,9 +66,9 @@ func (n *node) sizeLessThan(v uintptr) bool {
|
|||
// pageElementSize returns the size of each page element based on the type of node.
|
||||
func (n *node) pageElementSize() uintptr {
|
||||
if n.isLeaf {
|
||||
return leafPageElementSize
|
||||
return common.LeafPageElementSize
|
||||
}
|
||||
return branchPageElementSize
|
||||
return common.BranchPageElementSize
|
||||
}
|
||||
|
||||
// childAt returns the child node at a given index.
|
||||
|
@ -113,9 +115,9 @@ func (n *node) prevSibling() *node {
|
|||
}
|
||||
|
||||
// put inserts a key/value.
|
||||
func (n *node) put(oldKey, newKey, value []byte, pgId pgid, flags uint32) {
|
||||
if pgId >= n.bucket.tx.meta.pgid {
|
||||
panic(fmt.Sprintf("pgId (%d) above high water mark (%d)", pgId, n.bucket.tx.meta.pgid))
|
||||
func (n *node) put(oldKey, newKey, value []byte, pgId common.Pgid, flags uint32) {
|
||||
if pgId >= n.bucket.tx.meta.Pgid() {
|
||||
panic(fmt.Sprintf("pgId (%d) above high water mark (%d)", pgId, n.bucket.tx.meta.Pgid()))
|
||||
} else if len(oldKey) <= 0 {
|
||||
panic("put: zero-length old key")
|
||||
} else if len(newKey) <= 0 {
|
||||
|
@ -126,7 +128,7 @@ func (n *node) put(oldKey, newKey, value []byte, pgId pgid, flags uint32) {
|
|||
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
|
||||
|
||||
// Add capacity and shift nodes if we don't have an exact match and need to insert.
|
||||
exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
|
||||
exact := len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey)
|
||||
if !exact {
|
||||
n.inodes = append(n.inodes, inode{})
|
||||
copy(n.inodes[index+1:], n.inodes[index:])
|
||||
|
@ -137,7 +139,7 @@ func (n *node) put(oldKey, newKey, value []byte, pgId pgid, flags uint32) {
|
|||
inode.key = newKey
|
||||
inode.value = value
|
||||
inode.pgid = pgId
|
||||
_assert(len(inode.key) > 0, "put: zero-length inode key")
|
||||
common.Assert(len(inode.key) > 0, "put: zero-length inode key")
|
||||
}
|
||||
|
||||
// del removes a key from the node.
|
||||
|
@ -158,30 +160,30 @@ func (n *node) del(key []byte) {
|
|||
}
|
||||
|
||||
// read initializes the node from a page.
|
||||
func (n *node) read(p *page) {
|
||||
n.pgid = p.id
|
||||
n.isLeaf = ((p.flags & leafPageFlag) != 0)
|
||||
n.inodes = make(inodes, int(p.count))
|
||||
func (n *node) read(p *common.Page) {
|
||||
n.pgid = p.Id()
|
||||
n.isLeaf = (p.Flags() & common.LeafPageFlag) != 0
|
||||
n.inodes = make(inodes, int(p.Count()))
|
||||
|
||||
for i := 0; i < int(p.count); i++ {
|
||||
for i := 0; i < int(p.Count()); i++ {
|
||||
inode := &n.inodes[i]
|
||||
if n.isLeaf {
|
||||
elem := p.leafPageElement(uint16(i))
|
||||
inode.flags = elem.flags
|
||||
inode.key = elem.key()
|
||||
inode.value = elem.value()
|
||||
elem := p.LeafPageElement(uint16(i))
|
||||
inode.flags = elem.Flags()
|
||||
inode.key = elem.Key()
|
||||
inode.value = elem.Value()
|
||||
} else {
|
||||
elem := p.branchPageElement(uint16(i))
|
||||
inode.pgid = elem.pgid
|
||||
inode.key = elem.key()
|
||||
elem := p.BranchPageElement(uint16(i))
|
||||
inode.pgid = elem.Pgid()
|
||||
inode.key = elem.Key()
|
||||
}
|
||||
_assert(len(inode.key) > 0, "read: zero-length inode key")
|
||||
common.Assert(len(inode.key) > 0, "read: zero-length inode key")
|
||||
}
|
||||
|
||||
// Save first key so we can find the node in the parent when we spill.
|
||||
// Save first key, so we can find the node in the parent when we spill.
|
||||
if len(n.inodes) > 0 {
|
||||
n.key = n.inodes[0].key
|
||||
_assert(len(n.key) > 0, "read: zero-length node key")
|
||||
common.Assert(len(n.key) > 0, "read: zero-length node key")
|
||||
} else {
|
||||
n.key = nil
|
||||
}
|
||||
|
@ -190,23 +192,23 @@ func (n *node) read(p *page) {
|
|||
// write writes the items onto one or more pages.
|
||||
// The page should have p.id (might be 0 for meta or bucket-inline page) and p.overflow set
|
||||
// and the rest should be zeroed.
|
||||
func (n *node) write(p *page) {
|
||||
_assert(p.count == 0 && p.flags == 0, "node cannot be written into a not empty page")
|
||||
func (n *node) write(p *common.Page) {
|
||||
common.Assert(p.Count() == 0 && p.Flags() == 0, "node cannot be written into a not empty page")
|
||||
|
||||
// Initialize page.
|
||||
if n.isLeaf {
|
||||
p.flags = leafPageFlag
|
||||
p.SetFlags(common.LeafPageFlag)
|
||||
} else {
|
||||
p.flags = branchPageFlag
|
||||
p.SetFlags(common.BranchPageFlag)
|
||||
}
|
||||
|
||||
if len(n.inodes) >= 0xFFFF {
|
||||
panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
|
||||
panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.Id()))
|
||||
}
|
||||
p.count = uint16(len(n.inodes))
|
||||
p.SetCount(uint16(len(n.inodes)))
|
||||
|
||||
// Stop here if there are no items to write.
|
||||
if p.count == 0 {
|
||||
if p.Count() == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -214,27 +216,27 @@ func (n *node) write(p *page) {
|
|||
// off tracks the offset into p of the start of the next data.
|
||||
off := unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes))
|
||||
for i, item := range n.inodes {
|
||||
_assert(len(item.key) > 0, "write: zero-length inode key")
|
||||
common.Assert(len(item.key) > 0, "write: zero-length inode key")
|
||||
|
||||
// Create a slice to write into of needed size and advance
|
||||
// byte pointer for next iteration.
|
||||
sz := len(item.key) + len(item.value)
|
||||
b := unsafeByteSlice(unsafe.Pointer(p), off, 0, sz)
|
||||
b := common.UnsafeByteSlice(unsafe.Pointer(p), off, 0, sz)
|
||||
off += uintptr(sz)
|
||||
|
||||
// Write the page element.
|
||||
if n.isLeaf {
|
||||
elem := p.leafPageElement(uint16(i))
|
||||
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||
elem.flags = item.flags
|
||||
elem.ksize = uint32(len(item.key))
|
||||
elem.vsize = uint32(len(item.value))
|
||||
elem := p.LeafPageElement(uint16(i))
|
||||
elem.SetPos(uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))))
|
||||
elem.SetFlags(item.flags)
|
||||
elem.SetKsize(uint32(len(item.key)))
|
||||
elem.SetVsize(uint32(len(item.value)))
|
||||
} else {
|
||||
elem := p.branchPageElement(uint16(i))
|
||||
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||
elem.ksize = uint32(len(item.key))
|
||||
elem.pgid = item.pgid
|
||||
_assert(elem.pgid != p.id, "write: circular dependency occurred")
|
||||
elem := p.BranchPageElement(uint16(i))
|
||||
elem.SetPos(uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))))
|
||||
elem.SetKsize(uint32(len(item.key)))
|
||||
elem.SetPgid(item.pgid)
|
||||
common.Assert(elem.Pgid() != p.Id(), "write: circular dependency occurred")
|
||||
}
|
||||
|
||||
// Write data for the element to the end of the page.
|
||||
|
@ -273,7 +275,7 @@ func (n *node) split(pageSize uintptr) []*node {
|
|||
func (n *node) splitTwo(pageSize uintptr) (*node, *node) {
|
||||
// Ignore the split if the page doesn't have at least enough nodes for
|
||||
// two pages or if the nodes can fit in a single page.
|
||||
if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
|
||||
if len(n.inodes) <= (common.MinKeysPerPage*2) || n.sizeLessThan(pageSize) {
|
||||
return n, nil
|
||||
}
|
||||
|
||||
|
@ -313,17 +315,17 @@ func (n *node) splitTwo(pageSize uintptr) (*node, *node) {
|
|||
// It returns the index as well as the size of the first page.
|
||||
// This is only be called from split().
|
||||
func (n *node) splitIndex(threshold int) (index, sz uintptr) {
|
||||
sz = pageHeaderSize
|
||||
sz = common.PageHeaderSize
|
||||
|
||||
// Loop until we only have the minimum number of keys required for the second page.
|
||||
for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
|
||||
for i := 0; i < len(n.inodes)-common.MinKeysPerPage; i++ {
|
||||
index = uintptr(i)
|
||||
inode := n.inodes[i]
|
||||
elsize := n.pageElementSize() + uintptr(len(inode.key)) + uintptr(len(inode.value))
|
||||
|
||||
// If we have at least the minimum number of keys and adding another
|
||||
// node would put us over the threshold then exit and return.
|
||||
if index >= minKeysPerPage && sz+elsize > uintptr(threshold) {
|
||||
if index >= common.MinKeysPerPage && sz+elsize > uintptr(threshold) {
|
||||
break
|
||||
}
|
||||
|
||||
|
@ -360,7 +362,7 @@ func (n *node) spill() error {
|
|||
for _, node := range nodes {
|
||||
// Add node's page to the freelist if it's not new.
|
||||
if node.pgid > 0 {
|
||||
tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
|
||||
tx.db.freelist.free(tx.meta.Txid(), tx.page(node.pgid))
|
||||
node.pgid = 0
|
||||
}
|
||||
|
||||
|
@ -371,10 +373,10 @@ func (n *node) spill() error {
|
|||
}
|
||||
|
||||
// Write the node.
|
||||
if p.id >= tx.meta.pgid {
|
||||
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
|
||||
if p.Id() >= tx.meta.Pgid() {
|
||||
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.Id(), tx.meta.Pgid()))
|
||||
}
|
||||
node.pgid = p.id
|
||||
node.pgid = p.Id()
|
||||
node.write(p)
|
||||
node.spilled = true
|
||||
|
||||
|
@ -387,7 +389,7 @@ func (n *node) spill() error {
|
|||
|
||||
node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
|
||||
node.key = node.inodes[0].key
|
||||
_assert(len(node.key) > 0, "spill: zero-length node key")
|
||||
common.Assert(len(node.key) > 0, "spill: zero-length node key")
|
||||
}
|
||||
|
||||
// Update the statistics.
|
||||
|
@ -457,11 +459,11 @@ func (n *node) rebalance() {
|
|||
return
|
||||
}
|
||||
|
||||
_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
|
||||
common.Assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
|
||||
|
||||
// Destination node is right sibling if idx == 0, otherwise left sibling.
|
||||
var target *node
|
||||
var useNextSibling = (n.parent.childIndex(n) == 0)
|
||||
var useNextSibling = n.parent.childIndex(n) == 0
|
||||
if useNextSibling {
|
||||
target = n.nextSibling()
|
||||
} else {
|
||||
|
@ -525,7 +527,7 @@ func (n *node) dereference() {
|
|||
key := make([]byte, len(n.key))
|
||||
copy(key, n.key)
|
||||
n.key = key
|
||||
_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
|
||||
common.Assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
|
||||
}
|
||||
|
||||
for i := range n.inodes {
|
||||
|
@ -534,7 +536,7 @@ func (n *node) dereference() {
|
|||
key := make([]byte, len(inode.key))
|
||||
copy(key, inode.key)
|
||||
inode.key = key
|
||||
_assert(len(inode.key) > 0, "dereference: zero-length inode key")
|
||||
common.Assert(len(inode.key) > 0, "dereference: zero-length inode key")
|
||||
|
||||
value := make([]byte, len(inode.value))
|
||||
copy(value, inode.value)
|
||||
|
@ -553,7 +555,7 @@ func (n *node) dereference() {
|
|||
// free adds the node's underlying page to the freelist.
|
||||
func (n *node) free() {
|
||||
if n.pgid != 0 {
|
||||
n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
|
||||
n.bucket.tx.db.freelist.free(n.bucket.tx.meta.Txid(), n.bucket.tx.page(n.pgid))
|
||||
n.pgid = 0
|
||||
}
|
||||
}
|
||||
|
@ -602,7 +604,7 @@ func (s nodes) Less(i, j int) bool {
|
|||
// to an element which hasn't been added to a page yet.
|
||||
type inode struct {
|
||||
flags uint32
|
||||
pgid pgid
|
||||
pgid common.Pgid
|
||||
key []byte
|
||||
value []byte
|
||||
}
|
||||
|
|
43
node_test.go
43
node_test.go
|
@ -3,15 +3,19 @@ package bbolt
|
|||
import (
|
||||
"testing"
|
||||
"unsafe"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// Ensure that a node can insert a key/value.
|
||||
func TestNode_put(t *testing.T) {
|
||||
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{meta: &meta{pgid: 1}}}}
|
||||
m := &common.Meta{}
|
||||
m.SetPgid(1)
|
||||
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{meta: m}}}
|
||||
n.put([]byte("baz"), []byte("baz"), []byte("2"), 0, 0)
|
||||
n.put([]byte("foo"), []byte("foo"), []byte("0"), 0, 0)
|
||||
n.put([]byte("bar"), []byte("bar"), []byte("1"), 0, 0)
|
||||
n.put([]byte("foo"), []byte("foo"), []byte("3"), 0, leafPageFlag)
|
||||
n.put([]byte("foo"), []byte("foo"), []byte("3"), 0, common.LeafPageFlag)
|
||||
|
||||
if len(n.inodes) != 3 {
|
||||
t.Fatalf("exp=3; got=%d", len(n.inodes))
|
||||
|
@ -25,7 +29,7 @@ func TestNode_put(t *testing.T) {
|
|||
if k, v := n.inodes[2].key, n.inodes[2].value; string(k) != "foo" || string(v) != "3" {
|
||||
t.Fatalf("exp=<foo,3>; got=<%s,%s>", k, v)
|
||||
}
|
||||
if n.inodes[2].flags != uint32(leafPageFlag) {
|
||||
if n.inodes[2].flags != uint32(common.LeafPageFlag) {
|
||||
t.Fatalf("not a leaf: %d", n.inodes[2].flags)
|
||||
}
|
||||
}
|
||||
|
@ -34,18 +38,19 @@ func TestNode_put(t *testing.T) {
|
|||
func TestNode_read_LeafPage(t *testing.T) {
|
||||
// Create a page.
|
||||
var buf [4096]byte
|
||||
page := (*page)(unsafe.Pointer(&buf[0]))
|
||||
page.flags = leafPageFlag
|
||||
page.count = 2
|
||||
page := (*common.Page)(unsafe.Pointer(&buf[0]))
|
||||
page.SetFlags(common.LeafPageFlag)
|
||||
page.SetCount(2)
|
||||
|
||||
// Insert 2 elements at the beginning. sizeof(leafPageElement) == 16
|
||||
nodes := (*[3]leafPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page)))
|
||||
nodes[0] = leafPageElement{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(leafPageElement) * 2
|
||||
nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4
|
||||
nodes := page.LeafPageElements()
|
||||
//nodes := (*[3]leafPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(page)) + unsafe.Sizeof(*page)))
|
||||
nodes[0] = *common.NewLeafPageElement(0, 32, 3, 4) // pos = sizeof(leafPageElement) * 2
|
||||
nodes[1] = *common.NewLeafPageElement(0, 23, 10, 3) // pos = sizeof(leafPageElement) + 3 + 4
|
||||
|
||||
// Write data for the nodes at the end.
|
||||
const s = "barfoozhelloworldbye"
|
||||
data := unsafeByteSlice(unsafe.Pointer(&nodes[2]), 0, 0, len(s))
|
||||
data := common.UnsafeByteSlice(unsafe.Pointer(uintptr(unsafe.Pointer(page))+unsafe.Sizeof(*page)+common.LeafPageElementSize*2), 0, 0, len(s))
|
||||
copy(data, s)
|
||||
|
||||
// Deserialize page into a leaf.
|
||||
|
@ -70,14 +75,16 @@ func TestNode_read_LeafPage(t *testing.T) {
|
|||
// Ensure that a node can serialize into a leaf page.
|
||||
func TestNode_write_LeafPage(t *testing.T) {
|
||||
// Create a node.
|
||||
n := &node{isLeaf: true, inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
|
||||
m := &common.Meta{}
|
||||
m.SetPgid(1)
|
||||
n := &node{isLeaf: true, inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}}
|
||||
n.put([]byte("susy"), []byte("susy"), []byte("que"), 0, 0)
|
||||
n.put([]byte("ricki"), []byte("ricki"), []byte("lake"), 0, 0)
|
||||
n.put([]byte("john"), []byte("john"), []byte("johnson"), 0, 0)
|
||||
|
||||
// Write it to a page.
|
||||
var buf [4096]byte
|
||||
p := (*page)(unsafe.Pointer(&buf[0]))
|
||||
p := (*common.Page)(unsafe.Pointer(&buf[0]))
|
||||
n.write(p)
|
||||
|
||||
// Read the page back in.
|
||||
|
@ -102,7 +109,9 @@ func TestNode_write_LeafPage(t *testing.T) {
|
|||
// Ensure that a node can split into appropriate subgroups.
|
||||
func TestNode_split(t *testing.T) {
|
||||
// Create a node.
|
||||
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
|
||||
m := &common.Meta{}
|
||||
m.SetPgid(1)
|
||||
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}}
|
||||
n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0)
|
||||
n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0)
|
||||
n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0)
|
||||
|
@ -127,7 +136,9 @@ func TestNode_split(t *testing.T) {
|
|||
// Ensure that a page with the minimum number of inodes just returns a single node.
|
||||
func TestNode_split_MinKeys(t *testing.T) {
|
||||
// Create a node.
|
||||
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
|
||||
m := &common.Meta{}
|
||||
m.SetPgid(1)
|
||||
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}}
|
||||
n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0)
|
||||
n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0)
|
||||
|
||||
|
@ -141,7 +152,9 @@ func TestNode_split_MinKeys(t *testing.T) {
|
|||
// Ensure that a node that has keys that all fit on a page just returns one leaf.
|
||||
func TestNode_split_SinglePage(t *testing.T) {
|
||||
// Create a node.
|
||||
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}}
|
||||
m := &common.Meta{}
|
||||
m.SetPgid(1)
|
||||
n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: m}}}
|
||||
n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0)
|
||||
n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0)
|
||||
n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0)
|
||||
|
|
214
page.go
214
page.go
|
@ -1,214 +0,0 @@
|
|||
package bbolt
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const pageHeaderSize = unsafe.Sizeof(page{})
|
||||
|
||||
const minKeysPerPage = 2
|
||||
|
||||
const branchPageElementSize = unsafe.Sizeof(branchPageElement{})
|
||||
const leafPageElementSize = unsafe.Sizeof(leafPageElement{})
|
||||
|
||||
const (
|
||||
branchPageFlag = 0x01
|
||||
leafPageFlag = 0x02
|
||||
metaPageFlag = 0x04
|
||||
freelistPageFlag = 0x10
|
||||
)
|
||||
|
||||
const (
|
||||
bucketLeafFlag = 0x01
|
||||
)
|
||||
|
||||
type pgid uint64
|
||||
|
||||
type page struct {
|
||||
id pgid
|
||||
flags uint16
|
||||
count uint16
|
||||
overflow uint32
|
||||
}
|
||||
|
||||
// typ returns a human readable page type string used for debugging.
|
||||
func (p *page) typ() string {
|
||||
if (p.flags & branchPageFlag) != 0 {
|
||||
return "branch"
|
||||
} else if (p.flags & leafPageFlag) != 0 {
|
||||
return "leaf"
|
||||
} else if (p.flags & metaPageFlag) != 0 {
|
||||
return "meta"
|
||||
} else if (p.flags & freelistPageFlag) != 0 {
|
||||
return "freelist"
|
||||
}
|
||||
return fmt.Sprintf("unknown<%02x>", p.flags)
|
||||
}
|
||||
|
||||
// meta returns a pointer to the metadata section of the page.
|
||||
func (p *page) meta() *meta {
|
||||
return (*meta)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
|
||||
}
|
||||
|
||||
func (p *page) fastCheck(id pgid) {
|
||||
_assert(p.id == id, "Page expected to be: %v, but self identifies as %v", id, p.id)
|
||||
// Only one flag of page-type can be set.
|
||||
_assert(p.flags == branchPageFlag ||
|
||||
p.flags == leafPageFlag ||
|
||||
p.flags == metaPageFlag ||
|
||||
p.flags == freelistPageFlag,
|
||||
"page %v: has unexpected type/flags: %x", p.id, p.flags)
|
||||
}
|
||||
|
||||
// leafPageElement retrieves the leaf node by index
|
||||
func (p *page) leafPageElement(index uint16) *leafPageElement {
|
||||
return (*leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
|
||||
leafPageElementSize, int(index)))
|
||||
}
|
||||
|
||||
// leafPageElements retrieves a list of leaf nodes.
|
||||
func (p *page) leafPageElements() []leafPageElement {
|
||||
if p.count == 0 {
|
||||
return nil
|
||||
}
|
||||
var elems []leafPageElement
|
||||
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
|
||||
unsafeSlice(unsafe.Pointer(&elems), data, int(p.count))
|
||||
return elems
|
||||
}
|
||||
|
||||
// branchPageElement retrieves the branch node by index
|
||||
func (p *page) branchPageElement(index uint16) *branchPageElement {
|
||||
return (*branchPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
|
||||
unsafe.Sizeof(branchPageElement{}), int(index)))
|
||||
}
|
||||
|
||||
// branchPageElements retrieves a list of branch nodes.
|
||||
func (p *page) branchPageElements() []branchPageElement {
|
||||
if p.count == 0 {
|
||||
return nil
|
||||
}
|
||||
var elems []branchPageElement
|
||||
data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
|
||||
unsafeSlice(unsafe.Pointer(&elems), data, int(p.count))
|
||||
return elems
|
||||
}
|
||||
|
||||
// dump writes n bytes of the page to STDERR as hex output.
|
||||
func (p *page) hexdump(n int) {
|
||||
buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, n)
|
||||
fmt.Fprintf(os.Stderr, "%x\n", buf)
|
||||
}
|
||||
|
||||
type pages []*page
|
||||
|
||||
func (s pages) Len() int { return len(s) }
|
||||
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
|
||||
|
||||
// branchPageElement represents a node on a branch page.
|
||||
type branchPageElement struct {
|
||||
pos uint32
|
||||
ksize uint32
|
||||
pgid pgid
|
||||
}
|
||||
|
||||
// key returns a byte slice of the node key.
|
||||
func (n *branchPageElement) key() []byte {
|
||||
return unsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize))
|
||||
}
|
||||
|
||||
// leafPageElement represents a node on a leaf page.
|
||||
type leafPageElement struct {
|
||||
flags uint32
|
||||
pos uint32
|
||||
ksize uint32
|
||||
vsize uint32
|
||||
}
|
||||
|
||||
// key returns a byte slice of the node key.
|
||||
func (n *leafPageElement) key() []byte {
|
||||
i := int(n.pos)
|
||||
j := i + int(n.ksize)
|
||||
return unsafeByteSlice(unsafe.Pointer(n), 0, i, j)
|
||||
}
|
||||
|
||||
// value returns a byte slice of the node value.
|
||||
func (n *leafPageElement) value() []byte {
|
||||
i := int(n.pos) + int(n.ksize)
|
||||
j := i + int(n.vsize)
|
||||
return unsafeByteSlice(unsafe.Pointer(n), 0, i, j)
|
||||
}
|
||||
|
||||
// PageInfo represents human readable information about a page.
|
||||
type PageInfo struct {
|
||||
ID int
|
||||
Type string
|
||||
Count int
|
||||
OverflowCount int
|
||||
}
|
||||
|
||||
type pgids []pgid
|
||||
|
||||
func (s pgids) Len() int { return len(s) }
|
||||
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
|
||||
|
||||
// merge returns the sorted union of a and b.
|
||||
func (a pgids) merge(b pgids) pgids {
|
||||
// Return the opposite slice if one is nil.
|
||||
if len(a) == 0 {
|
||||
return b
|
||||
}
|
||||
if len(b) == 0 {
|
||||
return a
|
||||
}
|
||||
merged := make(pgids, len(a)+len(b))
|
||||
mergepgids(merged, a, b)
|
||||
return merged
|
||||
}
|
||||
|
||||
// mergepgids copies the sorted union of a and b into dst.
|
||||
// If dst is too small, it panics.
|
||||
func mergepgids(dst, a, b pgids) {
|
||||
if len(dst) < len(a)+len(b) {
|
||||
panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
|
||||
}
|
||||
// Copy in the opposite slice if one is nil.
|
||||
if len(a) == 0 {
|
||||
copy(dst, b)
|
||||
return
|
||||
}
|
||||
if len(b) == 0 {
|
||||
copy(dst, a)
|
||||
return
|
||||
}
|
||||
|
||||
// Merged will hold all elements from both lists.
|
||||
merged := dst[:0]
|
||||
|
||||
// Assign lead to the slice with a lower starting value, follow to the higher value.
|
||||
lead, follow := a, b
|
||||
if b[0] < a[0] {
|
||||
lead, follow = b, a
|
||||
}
|
||||
|
||||
// Continue while there are elements in the lead.
|
||||
for len(lead) > 0 {
|
||||
// Merge largest prefix of lead that is ahead of follow[0].
|
||||
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
|
||||
merged = append(merged, lead[:n]...)
|
||||
if n >= len(lead) {
|
||||
break
|
||||
}
|
||||
|
||||
// Swap lead and follow.
|
||||
lead, follow = follow, lead[n:]
|
||||
}
|
||||
|
||||
// Append what's left in follow.
|
||||
_ = append(merged, follow...)
|
||||
}
|
72
page_test.go
72
page_test.go
|
@ -1,72 +0,0 @@
|
|||
package bbolt
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"testing/quick"
|
||||
)
|
||||
|
||||
// Ensure that the page type can be returned in human readable format.
|
||||
func TestPage_typ(t *testing.T) {
|
||||
if typ := (&page{flags: branchPageFlag}).typ(); typ != "branch" {
|
||||
t.Fatalf("exp=branch; got=%v", typ)
|
||||
}
|
||||
if typ := (&page{flags: leafPageFlag}).typ(); typ != "leaf" {
|
||||
t.Fatalf("exp=leaf; got=%v", typ)
|
||||
}
|
||||
if typ := (&page{flags: metaPageFlag}).typ(); typ != "meta" {
|
||||
t.Fatalf("exp=meta; got=%v", typ)
|
||||
}
|
||||
if typ := (&page{flags: freelistPageFlag}).typ(); typ != "freelist" {
|
||||
t.Fatalf("exp=freelist; got=%v", typ)
|
||||
}
|
||||
if typ := (&page{flags: 20000}).typ(); typ != "unknown<4e20>" {
|
||||
t.Fatalf("exp=unknown<4e20>; got=%v", typ)
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure that the hexdump debugging function doesn't blow up.
|
||||
func TestPage_dump(t *testing.T) {
|
||||
(&page{id: 256}).hexdump(16)
|
||||
}
|
||||
|
||||
func TestPgids_merge(t *testing.T) {
|
||||
a := pgids{4, 5, 6, 10, 11, 12, 13, 27}
|
||||
b := pgids{1, 3, 8, 9, 25, 30}
|
||||
c := a.merge(b)
|
||||
if !reflect.DeepEqual(c, pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) {
|
||||
t.Errorf("mismatch: %v", c)
|
||||
}
|
||||
|
||||
a = pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36}
|
||||
b = pgids{8, 9, 25, 30}
|
||||
c = a.merge(b)
|
||||
if !reflect.DeepEqual(c, pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) {
|
||||
t.Errorf("mismatch: %v", c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPgids_merge_quick(t *testing.T) {
|
||||
if err := quick.Check(func(a, b pgids) bool {
|
||||
// Sort incoming lists.
|
||||
sort.Sort(a)
|
||||
sort.Sort(b)
|
||||
|
||||
// Merge the two lists together.
|
||||
got := a.merge(b)
|
||||
|
||||
// The expected value should be the two lists combined and sorted.
|
||||
exp := append(a, b...)
|
||||
sort.Sort(exp)
|
||||
|
||||
if !reflect.DeepEqual(exp, got) {
|
||||
t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got)
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}, nil); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
141
tx.go
141
tx.go
|
@ -9,10 +9,9 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// txid represents the internal transaction identifier.
|
||||
type txid uint64
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// Tx represents a read-only or read/write transaction on the database.
|
||||
// Read-only transactions can be used for retrieving values for keys and creating cursors.
|
||||
|
@ -26,9 +25,9 @@ type Tx struct {
|
|||
writable bool
|
||||
managed bool
|
||||
db *DB
|
||||
meta *meta
|
||||
meta *common.Meta
|
||||
root Bucket
|
||||
pages map[pgid]*page
|
||||
pages map[common.Pgid]*common.Page
|
||||
stats TxStats
|
||||
commitHandlers []func()
|
||||
|
||||
|
@ -47,24 +46,24 @@ func (tx *Tx) init(db *DB) {
|
|||
tx.pages = nil
|
||||
|
||||
// Copy the meta page since it can be changed by the writer.
|
||||
tx.meta = &meta{}
|
||||
db.meta().copy(tx.meta)
|
||||
tx.meta = &common.Meta{}
|
||||
db.meta().Copy(tx.meta)
|
||||
|
||||
// Copy over the root bucket.
|
||||
tx.root = newBucket(tx)
|
||||
tx.root.bucket = &bucket{}
|
||||
*tx.root.bucket = tx.meta.root
|
||||
tx.root.InBucket = &common.InBucket{}
|
||||
*tx.root.InBucket = *(tx.meta.RootBucket())
|
||||
|
||||
// Increment the transaction id and add a page cache for writable transactions.
|
||||
if tx.writable {
|
||||
tx.pages = make(map[pgid]*page)
|
||||
tx.meta.txid += txid(1)
|
||||
tx.pages = make(map[common.Pgid]*common.Page)
|
||||
tx.meta.IncTxid()
|
||||
}
|
||||
}
|
||||
|
||||
// ID returns the transaction id.
|
||||
func (tx *Tx) ID() int {
|
||||
return int(tx.meta.txid)
|
||||
return int(tx.meta.Txid())
|
||||
}
|
||||
|
||||
// DB returns a reference to the database that created the transaction.
|
||||
|
@ -74,7 +73,7 @@ func (tx *Tx) DB() *DB {
|
|||
|
||||
// Size returns current database size in bytes as seen by this transaction.
|
||||
func (tx *Tx) Size() int64 {
|
||||
return int64(tx.meta.pgid) * int64(tx.db.pageSize)
|
||||
return int64(tx.meta.Pgid()) * int64(tx.db.pageSize)
|
||||
}
|
||||
|
||||
// Writable returns whether the transaction can perform write operations.
|
||||
|
@ -140,11 +139,11 @@ func (tx *Tx) OnCommit(fn func()) {
|
|||
// Returns an error if a disk write error occurs, or if Commit is
|
||||
// called on a read-only transaction.
|
||||
func (tx *Tx) Commit() error {
|
||||
_assert(!tx.managed, "managed tx commit not allowed")
|
||||
common.Assert(!tx.managed, "managed tx commit not allowed")
|
||||
if tx.db == nil {
|
||||
return ErrTxClosed
|
||||
return common.ErrTxClosed
|
||||
} else if !tx.writable {
|
||||
return ErrTxNotWritable
|
||||
return common.ErrTxNotWritable
|
||||
}
|
||||
|
||||
// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
|
||||
|
@ -156,7 +155,7 @@ func (tx *Tx) Commit() error {
|
|||
tx.stats.IncRebalanceTime(time.Since(startTime))
|
||||
}
|
||||
|
||||
opgid := tx.meta.pgid
|
||||
opgid := tx.meta.Pgid()
|
||||
|
||||
// spill data onto dirty pages.
|
||||
startTime = time.Now()
|
||||
|
@ -167,11 +166,11 @@ func (tx *Tx) Commit() error {
|
|||
tx.stats.IncSpillTime(time.Since(startTime))
|
||||
|
||||
// Free the old root bucket.
|
||||
tx.meta.root.root = tx.root.root
|
||||
tx.meta.RootBucket().SetRootPage(tx.root.RootPage())
|
||||
|
||||
// Free the old freelist because commit writes out a fresh freelist.
|
||||
if tx.meta.freelist != pgidNoFreelist {
|
||||
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
|
||||
if tx.meta.Freelist() != common.PgidNoFreelist {
|
||||
tx.db.freelist.free(tx.meta.Txid(), tx.db.page(tx.meta.Freelist()))
|
||||
}
|
||||
|
||||
if !tx.db.NoFreelistSync {
|
||||
|
@ -180,12 +179,12 @@ func (tx *Tx) Commit() error {
|
|||
return err
|
||||
}
|
||||
} else {
|
||||
tx.meta.freelist = pgidNoFreelist
|
||||
tx.meta.SetFreelist(common.PgidNoFreelist)
|
||||
}
|
||||
|
||||
// If the high water mark has moved up then attempt to grow the database.
|
||||
if tx.meta.pgid > opgid {
|
||||
if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
|
||||
if tx.meta.Pgid() > opgid {
|
||||
if err := tx.db.grow(int(tx.meta.Pgid()+1) * tx.db.pageSize); err != nil {
|
||||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
|
@ -244,7 +243,7 @@ func (tx *Tx) commitFreelist() error {
|
|||
tx.rollback()
|
||||
return err
|
||||
}
|
||||
tx.meta.freelist = p.id
|
||||
tx.meta.SetFreelist(p.Id())
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -252,9 +251,9 @@ func (tx *Tx) commitFreelist() error {
|
|||
// Rollback closes the transaction and ignores all previous updates. Read-only
|
||||
// transactions must be rolled back and not committed.
|
||||
func (tx *Tx) Rollback() error {
|
||||
_assert(!tx.managed, "managed tx rollback not allowed")
|
||||
common.Assert(!tx.managed, "managed tx rollback not allowed")
|
||||
if tx.db == nil {
|
||||
return ErrTxClosed
|
||||
return common.ErrTxClosed
|
||||
}
|
||||
tx.nonPhysicalRollback()
|
||||
return nil
|
||||
|
@ -266,7 +265,7 @@ func (tx *Tx) nonPhysicalRollback() {
|
|||
return
|
||||
}
|
||||
if tx.writable {
|
||||
tx.db.freelist.rollback(tx.meta.txid)
|
||||
tx.db.freelist.rollback(tx.meta.Txid())
|
||||
}
|
||||
tx.close()
|
||||
}
|
||||
|
@ -277,7 +276,7 @@ func (tx *Tx) rollback() {
|
|||
return
|
||||
}
|
||||
if tx.writable {
|
||||
tx.db.freelist.rollback(tx.meta.txid)
|
||||
tx.db.freelist.rollback(tx.meta.Txid())
|
||||
// When mmap fails, the `data`, `dataref` and `datasz` may be reset to
|
||||
// zero values, and there is no way to reload free page IDs in this case.
|
||||
if tx.db.data != nil {
|
||||
|
@ -287,7 +286,7 @@ func (tx *Tx) rollback() {
|
|||
tx.db.freelist.noSyncReload(tx.db.freepages())
|
||||
} else {
|
||||
// Read free page list from freelist page.
|
||||
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
|
||||
tx.db.freelist.reload(tx.db.page(tx.db.meta().Freelist()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -352,13 +351,13 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
|||
|
||||
// Generate a meta page. We use the same page data for both meta pages.
|
||||
buf := make([]byte, tx.db.pageSize)
|
||||
page := (*page)(unsafe.Pointer(&buf[0]))
|
||||
page.flags = metaPageFlag
|
||||
*page.meta() = *tx.meta
|
||||
page := (*common.Page)(unsafe.Pointer(&buf[0]))
|
||||
page.SetFlags(common.MetaPageFlag)
|
||||
*page.Meta() = *tx.meta
|
||||
|
||||
// Write meta 0.
|
||||
page.id = 0
|
||||
page.meta().checksum = page.meta().sum64()
|
||||
page.SetId(0)
|
||||
page.Meta().SetChecksum(page.Meta().Sum64())
|
||||
nn, err := w.Write(buf)
|
||||
n += int64(nn)
|
||||
if err != nil {
|
||||
|
@ -366,9 +365,9 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
|||
}
|
||||
|
||||
// Write meta 1 with a lower transaction id.
|
||||
page.id = 1
|
||||
page.meta().txid -= 1
|
||||
page.meta().checksum = page.meta().sum64()
|
||||
page.SetId(1)
|
||||
page.Meta().DecTxid()
|
||||
page.Meta().SetChecksum(page.Meta().Sum64())
|
||||
nn, err = w.Write(buf)
|
||||
n += int64(nn)
|
||||
if err != nil {
|
||||
|
@ -408,14 +407,14 @@ func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
|
|||
}
|
||||
|
||||
// allocate returns a contiguous block of memory starting at a given page.
|
||||
func (tx *Tx) allocate(count int) (*page, error) {
|
||||
p, err := tx.db.allocate(tx.meta.txid, count)
|
||||
func (tx *Tx) allocate(count int) (*common.Page, error) {
|
||||
p, err := tx.db.allocate(tx.meta.Txid(), count)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Save to our page cache.
|
||||
tx.pages[p.id] = p
|
||||
tx.pages[p.Id()] = p
|
||||
|
||||
// Update statistics.
|
||||
tx.stats.IncPageCount(int64(count))
|
||||
|
@ -427,18 +426,18 @@ func (tx *Tx) allocate(count int) (*page, error) {
|
|||
// write writes any dirty pages to disk.
|
||||
func (tx *Tx) write() error {
|
||||
// Sort pages by id.
|
||||
pages := make(pages, 0, len(tx.pages))
|
||||
pages := make(common.Pages, 0, len(tx.pages))
|
||||
for _, p := range tx.pages {
|
||||
pages = append(pages, p)
|
||||
}
|
||||
// Clear out page cache early.
|
||||
tx.pages = make(map[pgid]*page)
|
||||
tx.pages = make(map[common.Pgid]*common.Page)
|
||||
sort.Sort(pages)
|
||||
|
||||
// Write pages to disk in order.
|
||||
for _, p := range pages {
|
||||
rem := (uint64(p.overflow) + 1) * uint64(tx.db.pageSize)
|
||||
offset := int64(p.id) * int64(tx.db.pageSize)
|
||||
rem := (uint64(p.Overflow()) + 1) * uint64(tx.db.pageSize)
|
||||
offset := int64(p.Id()) * int64(tx.db.pageSize)
|
||||
var written uintptr
|
||||
|
||||
// Write out page in "max allocation" sized chunks.
|
||||
|
@ -447,7 +446,7 @@ func (tx *Tx) write() error {
|
|||
if sz > maxAllocSize-1 {
|
||||
sz = maxAllocSize - 1
|
||||
}
|
||||
buf := unsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz))
|
||||
buf := common.UnsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz))
|
||||
|
||||
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
|
||||
return err
|
||||
|
@ -469,7 +468,7 @@ func (tx *Tx) write() error {
|
|||
}
|
||||
|
||||
// Ignore file sync if flag is set on DB.
|
||||
if !tx.db.NoSync || IgnoreNoSync {
|
||||
if !tx.db.NoSync || common.IgnoreNoSync {
|
||||
if err := fdatasync(tx.db); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -479,11 +478,11 @@ func (tx *Tx) write() error {
|
|||
for _, p := range pages {
|
||||
// Ignore page sizes over 1 page.
|
||||
// These are allocated using make() instead of the page pool.
|
||||
if int(p.overflow) != 0 {
|
||||
if int(p.Overflow()) != 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize)
|
||||
buf := common.UnsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize)
|
||||
|
||||
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
|
||||
for i := range buf {
|
||||
|
@ -500,13 +499,13 @@ func (tx *Tx) writeMeta() error {
|
|||
// Create a temporary buffer for the meta page.
|
||||
buf := make([]byte, tx.db.pageSize)
|
||||
p := tx.db.pageInBuffer(buf, 0)
|
||||
tx.meta.write(p)
|
||||
tx.meta.Write(p)
|
||||
|
||||
// Write the meta page to file.
|
||||
if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
|
||||
if _, err := tx.db.ops.writeAt(buf, int64(p.Id())*int64(tx.db.pageSize)); err != nil {
|
||||
return err
|
||||
}
|
||||
if !tx.db.NoSync || IgnoreNoSync {
|
||||
if !tx.db.NoSync || common.IgnoreNoSync {
|
||||
if err := fdatasync(tx.db); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -520,69 +519,69 @@ func (tx *Tx) writeMeta() error {
|
|||
|
||||
// page returns a reference to the page with a given id.
|
||||
// If page has been written to then a temporary buffered page is returned.
|
||||
func (tx *Tx) page(id pgid) *page {
|
||||
func (tx *Tx) page(id common.Pgid) *common.Page {
|
||||
// Check the dirty pages first.
|
||||
if tx.pages != nil {
|
||||
if p, ok := tx.pages[id]; ok {
|
||||
p.fastCheck(id)
|
||||
p.FastCheck(id)
|
||||
return p
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise return directly from the mmap.
|
||||
p := tx.db.page(id)
|
||||
p.fastCheck(id)
|
||||
p.FastCheck(id)
|
||||
return p
|
||||
}
|
||||
|
||||
// forEachPage iterates over every page within a given page and executes a function.
|
||||
func (tx *Tx) forEachPage(pgidnum pgid, fn func(*page, int, []pgid)) {
|
||||
stack := make([]pgid, 10)
|
||||
func (tx *Tx) forEachPage(pgidnum common.Pgid, fn func(*common.Page, int, []common.Pgid)) {
|
||||
stack := make([]common.Pgid, 10)
|
||||
stack[0] = pgidnum
|
||||
tx.forEachPageInternal(stack[:1], fn)
|
||||
}
|
||||
|
||||
func (tx *Tx) forEachPageInternal(pgidstack []pgid, fn func(*page, int, []pgid)) {
|
||||
func (tx *Tx) forEachPageInternal(pgidstack []common.Pgid, fn func(*common.Page, int, []common.Pgid)) {
|
||||
p := tx.page(pgidstack[len(pgidstack)-1])
|
||||
|
||||
// Execute function.
|
||||
fn(p, len(pgidstack)-1, pgidstack)
|
||||
|
||||
// Recursively loop over children.
|
||||
if (p.flags & branchPageFlag) != 0 {
|
||||
for i := 0; i < int(p.count); i++ {
|
||||
elem := p.branchPageElement(uint16(i))
|
||||
tx.forEachPageInternal(append(pgidstack, elem.pgid), fn)
|
||||
if (p.Flags() & common.BranchPageFlag) != 0 {
|
||||
for i := 0; i < int(p.Count()); i++ {
|
||||
elem := p.BranchPageElement(uint16(i))
|
||||
tx.forEachPageInternal(append(pgidstack, elem.Pgid()), fn)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Page returns page information for a given page number.
|
||||
// This is only safe for concurrent use when used by a writable transaction.
|
||||
func (tx *Tx) Page(id int) (*PageInfo, error) {
|
||||
func (tx *Tx) Page(id int) (*common.PageInfo, error) {
|
||||
if tx.db == nil {
|
||||
return nil, ErrTxClosed
|
||||
} else if pgid(id) >= tx.meta.pgid {
|
||||
return nil, common.ErrTxClosed
|
||||
} else if common.Pgid(id) >= tx.meta.Pgid() {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if tx.db.freelist == nil {
|
||||
return nil, ErrFreePagesNotLoaded
|
||||
return nil, common.ErrFreePagesNotLoaded
|
||||
}
|
||||
|
||||
// Build the page info.
|
||||
p := tx.db.page(pgid(id))
|
||||
info := &PageInfo{
|
||||
p := tx.db.page(common.Pgid(id))
|
||||
info := &common.PageInfo{
|
||||
ID: id,
|
||||
Count: int(p.count),
|
||||
OverflowCount: int(p.overflow),
|
||||
Count: int(p.Count()),
|
||||
OverflowCount: int(p.Overflow()),
|
||||
}
|
||||
|
||||
// Determine the type (or if it's free).
|
||||
if tx.db.freelist.freed(pgid(id)) {
|
||||
if tx.db.freelist.freed(common.Pgid(id)) {
|
||||
info.Type = "free"
|
||||
} else {
|
||||
info.Type = p.typ()
|
||||
info.Type = p.Typ()
|
||||
}
|
||||
|
||||
return info, nil
|
||||
|
|
74
tx_check.go
74
tx_check.go
|
@ -3,6 +3,8 @@ package bbolt
|
|||
import (
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// Check performs several consistency checks on the database for this transaction.
|
||||
|
@ -37,8 +39,8 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) {
|
|||
tx.db.loadFreelist()
|
||||
|
||||
// Check if any pages are double freed.
|
||||
freed := make(map[pgid]bool)
|
||||
all := make([]pgid, tx.db.freelist.count())
|
||||
freed := make(map[common.Pgid]bool)
|
||||
all := make([]common.Pgid, tx.db.freelist.count())
|
||||
tx.db.freelist.copyall(all)
|
||||
for _, id := range all {
|
||||
if freed[id] {
|
||||
|
@ -48,12 +50,12 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) {
|
|||
}
|
||||
|
||||
// Track every reachable page.
|
||||
reachable := make(map[pgid]*page)
|
||||
reachable := make(map[common.Pgid]*common.Page)
|
||||
reachable[0] = tx.page(0) // meta0
|
||||
reachable[1] = tx.page(1) // meta1
|
||||
if tx.meta.freelist != pgidNoFreelist {
|
||||
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
|
||||
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
|
||||
if tx.meta.Freelist() != common.PgidNoFreelist {
|
||||
for i := uint32(0); i <= tx.page(tx.meta.Freelist()).Overflow(); i++ {
|
||||
reachable[tx.meta.Freelist()+common.Pgid(i)] = tx.page(tx.meta.Freelist())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,7 +63,7 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) {
|
|||
tx.checkBucket(&tx.root, reachable, freed, kvStringer, ch)
|
||||
|
||||
// Ensure all pages below high water mark are either reachable or freed.
|
||||
for i := pgid(0); i < tx.meta.pgid; i++ {
|
||||
for i := common.Pgid(0); i < tx.meta.Pgid(); i++ {
|
||||
_, isReachable := reachable[i]
|
||||
if !isReachable && !freed[i] {
|
||||
ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
|
||||
|
@ -72,22 +74,22 @@ func (tx *Tx) check(kvStringer KVStringer, ch chan error) {
|
|||
close(ch)
|
||||
}
|
||||
|
||||
func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool,
|
||||
func (tx *Tx) checkBucket(b *Bucket, reachable map[common.Pgid]*common.Page, freed map[common.Pgid]bool,
|
||||
kvStringer KVStringer, ch chan error) {
|
||||
// Ignore inline buckets.
|
||||
if b.root == 0 {
|
||||
if b.RootPage() == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Check every page used by this bucket.
|
||||
b.tx.forEachPage(b.root, func(p *page, _ int, stack []pgid) {
|
||||
if p.id > tx.meta.pgid {
|
||||
ch <- fmt.Errorf("page %d: out of bounds: %d (stack: %v)", int(p.id), int(b.tx.meta.pgid), stack)
|
||||
b.tx.forEachPage(b.RootPage(), func(p *common.Page, _ int, stack []common.Pgid) {
|
||||
if p.Id() > tx.meta.Pgid() {
|
||||
ch <- fmt.Errorf("page %d: out of bounds: %d (stack: %v)", int(p.Id()), int(b.tx.meta.Pgid()), stack)
|
||||
}
|
||||
|
||||
// Ensure each page is only referenced once.
|
||||
for i := pgid(0); i <= pgid(p.overflow); i++ {
|
||||
var id = p.id + i
|
||||
for i := common.Pgid(0); i <= common.Pgid(p.Overflow()); i++ {
|
||||
var id = p.Id() + i
|
||||
if _, ok := reachable[id]; ok {
|
||||
ch <- fmt.Errorf("page %d: multiple references (stack: %v)", int(id), stack)
|
||||
}
|
||||
|
@ -95,14 +97,14 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo
|
|||
}
|
||||
|
||||
// We should only encounter un-freed leaf and branch pages.
|
||||
if freed[p.id] {
|
||||
ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
|
||||
} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
|
||||
ch <- fmt.Errorf("page %d: invalid type: %s (stack: %v)", int(p.id), p.typ(), stack)
|
||||
if freed[p.Id()] {
|
||||
ch <- fmt.Errorf("page %d: reachable freed", int(p.Id()))
|
||||
} else if (p.Flags()&common.BranchPageFlag) == 0 && (p.Flags()&common.LeafPageFlag) == 0 {
|
||||
ch <- fmt.Errorf("page %d: invalid type: %s (stack: %v)", int(p.Id()), p.Typ(), stack)
|
||||
}
|
||||
})
|
||||
|
||||
tx.recursivelyCheckPages(b.root, kvStringer.KeyToString, ch)
|
||||
tx.recursivelyCheckPages(b.RootPage(), kvStringer.KeyToString, ch)
|
||||
|
||||
// Check each bucket within this bucket.
|
||||
_ = b.ForEachBucket(func(k []byte) error {
|
||||
|
@ -117,7 +119,7 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo
|
|||
// key order constraints:
|
||||
// - keys on pages must be sorted
|
||||
// - keys on children pages are between 2 consecutive keys on the parent's branch page).
|
||||
func (tx *Tx) recursivelyCheckPages(pgId pgid, keyToString func([]byte) string, ch chan error) {
|
||||
func (tx *Tx) recursivelyCheckPages(pgId common.Pgid, keyToString func([]byte) string, ch chan error) {
|
||||
tx.recursivelyCheckPagesInternal(pgId, nil, nil, nil, keyToString, ch)
|
||||
}
|
||||
|
||||
|
@ -127,36 +129,36 @@ func (tx *Tx) recursivelyCheckPages(pgId pgid, keyToString func([]byte) string,
|
|||
// - Are in right ordering relationship to their parents.
|
||||
// `pagesStack` is expected to contain IDs of pages from the tree root to `pgid` for the clean debugging message.
|
||||
func (tx *Tx) recursivelyCheckPagesInternal(
|
||||
pgId pgid, minKeyClosed, maxKeyOpen []byte, pagesStack []pgid,
|
||||
pgId common.Pgid, minKeyClosed, maxKeyOpen []byte, pagesStack []common.Pgid,
|
||||
keyToString func([]byte) string, ch chan error) (maxKeyInSubtree []byte) {
|
||||
|
||||
p := tx.page(pgId)
|
||||
pagesStack = append(pagesStack, pgId)
|
||||
switch {
|
||||
case p.flags&branchPageFlag != 0:
|
||||
case p.Flags()&common.BranchPageFlag != 0:
|
||||
// For branch page we navigate ranges of all subpages.
|
||||
runningMin := minKeyClosed
|
||||
for i := range p.branchPageElements() {
|
||||
elem := p.branchPageElement(uint16(i))
|
||||
verifyKeyOrder(elem.pgid, "branch", i, elem.key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack)
|
||||
for i := range p.BranchPageElements() {
|
||||
elem := p.BranchPageElement(uint16(i))
|
||||
verifyKeyOrder(elem.Pgid(), "branch", i, elem.Key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack)
|
||||
|
||||
maxKey := maxKeyOpen
|
||||
if i < len(p.branchPageElements())-1 {
|
||||
maxKey = p.branchPageElement(uint16(i + 1)).key()
|
||||
if i < len(p.BranchPageElements())-1 {
|
||||
maxKey = p.BranchPageElement(uint16(i + 1)).Key()
|
||||
}
|
||||
maxKeyInSubtree = tx.recursivelyCheckPagesInternal(elem.pgid, elem.key(), maxKey, pagesStack, keyToString, ch)
|
||||
maxKeyInSubtree = tx.recursivelyCheckPagesInternal(elem.Pgid(), elem.Key(), maxKey, pagesStack, keyToString, ch)
|
||||
runningMin = maxKeyInSubtree
|
||||
}
|
||||
return maxKeyInSubtree
|
||||
case p.flags&leafPageFlag != 0:
|
||||
case p.Flags()&common.LeafPageFlag != 0:
|
||||
runningMin := minKeyClosed
|
||||
for i := range p.leafPageElements() {
|
||||
elem := p.leafPageElement(uint16(i))
|
||||
verifyKeyOrder(pgId, "leaf", i, elem.key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack)
|
||||
runningMin = elem.key()
|
||||
for i := range p.LeafPageElements() {
|
||||
elem := p.LeafPageElement(uint16(i))
|
||||
verifyKeyOrder(pgId, "leaf", i, elem.Key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack)
|
||||
runningMin = elem.Key()
|
||||
}
|
||||
if p.count > 0 {
|
||||
return p.leafPageElement(p.count - 1).key()
|
||||
if p.Count() > 0 {
|
||||
return p.LeafPageElement(p.Count() - 1).Key()
|
||||
}
|
||||
default:
|
||||
ch <- fmt.Errorf("unexpected page type for pgId:%d", pgId)
|
||||
|
@ -168,7 +170,7 @@ func (tx *Tx) recursivelyCheckPagesInternal(
|
|||
* verifyKeyOrder checks whether an entry with given #index on pgId (pageType: "branch|leaf") that has given "key",
|
||||
* is within range determined by (previousKey..maxKeyOpen) and reports found violations to the channel (ch).
|
||||
*/
|
||||
func verifyKeyOrder(pgId pgid, pageType string, index int, key []byte, previousKey []byte, maxKeyOpen []byte, ch chan error, keyToString func([]byte) string, pagesStack []pgid) {
|
||||
func verifyKeyOrder(pgId common.Pgid, pageType string, index int, key []byte, previousKey []byte, maxKeyOpen []byte, ch chan error, keyToString func([]byte) string, pagesStack []common.Pgid) {
|
||||
if index == 0 && previousKey != nil && compareKeys(previousKey, key) > 0 {
|
||||
ch <- fmt.Errorf("the first key[%d]=(hex)%s on %s page(%d) needs to be >= the key in the ancestor (%s). Stack: %v",
|
||||
index, keyToString(key), pageType, pgId, keyToString(previousKey), pagesStack)
|
||||
|
|
25
tx_test.go
25
tx_test.go
|
@ -15,6 +15,7 @@ import (
|
|||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
"go.etcd.io/bbolt/internal/btesting"
|
||||
"go.etcd.io/bbolt/internal/common"
|
||||
)
|
||||
|
||||
// TestTx_Check_ReadOnly tests consistency checking on a ReadOnly database.
|
||||
|
@ -84,7 +85,7 @@ func TestTx_Commit_ErrTxClosed(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != bolt.ErrTxClosed {
|
||||
if err := tx.Commit(); err != common.ErrTxClosed {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -101,7 +102,7 @@ func TestTx_Rollback_ErrTxClosed(t *testing.T) {
|
|||
if err := tx.Rollback(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := tx.Rollback(); err != bolt.ErrTxClosed {
|
||||
if err := tx.Rollback(); err != common.ErrTxClosed {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -113,7 +114,7 @@ func TestTx_Commit_ErrTxNotWritable(t *testing.T) {
|
|||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := tx.Commit(); err != bolt.ErrTxNotWritable {
|
||||
if err := tx.Commit(); err != common.ErrTxNotWritable {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// Close the view transaction
|
||||
|
@ -165,7 +166,7 @@ func TestTx_CreateBucket_ErrTxNotWritable(t *testing.T) {
|
|||
db := btesting.MustCreateDB(t)
|
||||
if err := db.View(func(tx *bolt.Tx) error {
|
||||
_, err := tx.CreateBucket([]byte("foo"))
|
||||
if err != bolt.ErrTxNotWritable {
|
||||
if err != common.ErrTxNotWritable {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -185,7 +186,7 @@ func TestTx_CreateBucket_ErrTxClosed(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := tx.CreateBucket([]byte("foo")); err != bolt.ErrTxClosed {
|
||||
if _, err := tx.CreateBucket([]byte("foo")); err != common.ErrTxClosed {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -293,11 +294,11 @@ func TestTx_CreateBucketIfNotExists(t *testing.T) {
|
|||
func TestTx_CreateBucketIfNotExists_ErrBucketNameRequired(t *testing.T) {
|
||||
db := btesting.MustCreateDB(t)
|
||||
if err := db.Update(func(tx *bolt.Tx) error {
|
||||
if _, err := tx.CreateBucketIfNotExists([]byte{}); err != bolt.ErrBucketNameRequired {
|
||||
if _, err := tx.CreateBucketIfNotExists([]byte{}); err != common.ErrBucketNameRequired {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if _, err := tx.CreateBucketIfNotExists(nil); err != bolt.ErrBucketNameRequired {
|
||||
if _, err := tx.CreateBucketIfNotExists(nil); err != common.ErrBucketNameRequired {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
|
@ -323,7 +324,7 @@ func TestTx_CreateBucket_ErrBucketExists(t *testing.T) {
|
|||
|
||||
// Create the same bucket again.
|
||||
if err := db.Update(func(tx *bolt.Tx) error {
|
||||
if _, err := tx.CreateBucket([]byte("widgets")); err != bolt.ErrBucketExists {
|
||||
if _, err := tx.CreateBucket([]byte("widgets")); err != common.ErrBucketExists {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -336,7 +337,7 @@ func TestTx_CreateBucket_ErrBucketExists(t *testing.T) {
|
|||
func TestTx_CreateBucket_ErrBucketNameRequired(t *testing.T) {
|
||||
db := btesting.MustCreateDB(t)
|
||||
if err := db.Update(func(tx *bolt.Tx) error {
|
||||
if _, err := tx.CreateBucket(nil); err != bolt.ErrBucketNameRequired {
|
||||
if _, err := tx.CreateBucket(nil); err != common.ErrBucketNameRequired {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -401,7 +402,7 @@ func TestTx_DeleteBucket_ErrTxClosed(t *testing.T) {
|
|||
if err := tx.Commit(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := tx.DeleteBucket([]byte("foo")); err != bolt.ErrTxClosed {
|
||||
if err := tx.DeleteBucket([]byte("foo")); err != common.ErrTxClosed {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
}
|
||||
|
@ -410,7 +411,7 @@ func TestTx_DeleteBucket_ErrTxClosed(t *testing.T) {
|
|||
func TestTx_DeleteBucket_ReadOnly(t *testing.T) {
|
||||
db := btesting.MustCreateDB(t)
|
||||
if err := db.View(func(tx *bolt.Tx) error {
|
||||
if err := tx.DeleteBucket([]byte("foo")); err != bolt.ErrTxNotWritable {
|
||||
if err := tx.DeleteBucket([]byte("foo")); err != common.ErrTxNotWritable {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
@ -423,7 +424,7 @@ func TestTx_DeleteBucket_ReadOnly(t *testing.T) {
|
|||
func TestTx_DeleteBucket_NotFound(t *testing.T) {
|
||||
db := btesting.MustCreateDB(t)
|
||||
if err := db.Update(func(tx *bolt.Tx) error {
|
||||
if err := tx.DeleteBucket([]byte("widgets")); err != bolt.ErrBucketNotFound {
|
||||
if err := tx.DeleteBucket([]byte("widgets")); err != common.ErrBucketNotFound {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
return nil
|
||||
|
|
39
unsafe.go
39
unsafe.go
|
@ -1,39 +0,0 @@
|
|||
package bbolt
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func unsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer {
|
||||
return unsafe.Pointer(uintptr(base) + offset)
|
||||
}
|
||||
|
||||
func unsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer {
|
||||
return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz)
|
||||
}
|
||||
|
||||
func unsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte {
|
||||
// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
|
||||
//
|
||||
// This memory is not allocated from C, but it is unmanaged by Go's
|
||||
// garbage collector and should behave similarly, and the compiler
|
||||
// should produce similar code. Note that this conversion allows a
|
||||
// subslice to begin after the base address, with an optional offset,
|
||||
// while the URL above does not cover this case and only slices from
|
||||
// index 0. However, the wiki never says that the address must be to
|
||||
// the beginning of a C allocation (or even that malloc was used at
|
||||
// all), so this is believed to be correct.
|
||||
return (*[maxAllocSize]byte)(unsafeAdd(base, offset))[i:j:j]
|
||||
}
|
||||
|
||||
// unsafeSlice modifies the data, len, and cap of a slice variable pointed to by
|
||||
// the slice parameter. This helper should be used over other direct
|
||||
// manipulation of reflect.SliceHeader to prevent misuse, namely, converting
|
||||
// from reflect.SliceHeader to a Go slice type.
|
||||
func unsafeSlice(slice, data unsafe.Pointer, len int) {
|
||||
s := (*reflect.SliceHeader)(slice)
|
||||
s.Data = uintptr(data)
|
||||
s.Cap = len
|
||||
s.Len = len
|
||||
}
|
Loading…
Reference in New Issue