create a common package

Points:
1. There are lots of duplicated definitions between bolt and
   guts_cli, which is definitely not good.
2. The implementation in guts_cli also has issue, please
   refer to https://github.com/etcd-io/bbolt/issues/391.
   This refactoring can fix the issue.

Signed-off-by: Benjamin Wang <wachao@vmware.com>
pull/407/head
Benjamin Wang 2023-02-17 13:58:25 +08:00
parent 1273ac779e
commit 34595e7231
8 changed files with 839 additions and 0 deletions

54
internal/common/bucket.go Normal file
View File

@ -0,0 +1,54 @@
package common
import (
"fmt"
"unsafe"
)
const BucketHeaderSize = int(unsafe.Sizeof(InBucket{}))
// InBucket represents the on-file representation of a bucket.
// This is stored as the "value" of a bucket key. If the bucket is small enough,
// then its root page can be stored inline in the "value", after the bucket
// header. In the case of inline buckets, the "root" will be 0.
type InBucket struct {
root Pgid // page id of the bucket's root-level page
sequence uint64 // monotonically incrementing, used by NextSequence()
}
func NewInBucket(root Pgid, seq uint64) InBucket {
return InBucket{
root: root,
sequence: seq,
}
}
func (b *InBucket) RootPage() Pgid {
return b.root
}
func (b *InBucket) SetRootPage(id Pgid) {
b.root = id
}
// InSequence returns the sequence. The reason why not naming it `Sequence`
// is to avoid duplicated name as `(*Bucket) Sequence()`
func (b *InBucket) InSequence() uint64 {
return b.sequence
}
func (b *InBucket) SetInSequence(v uint64) {
b.sequence = v
}
func (b *InBucket) IncSequence() {
b.sequence++
}
func (b *InBucket) InlinePage(v []byte) *Page {
return (*Page)(unsafe.Pointer(&v[BucketHeaderSize]))
}
func (b *InBucket) String() string {
return fmt.Sprintf("<pgid=%d,seq=%d>", b.root, b.sequence)
}

78
internal/common/errors.go Normal file
View File

@ -0,0 +1,78 @@
package common
import "errors"
// These errors can be returned when opening or calling methods on a DB.
var (
// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
// is opened or after it is closed.
ErrDatabaseNotOpen = errors.New("database not open")
// ErrDatabaseOpen is returned when opening a database that is
// already open.
ErrDatabaseOpen = errors.New("database already open")
// ErrInvalid is returned when both meta pages on a database are invalid.
// This typically occurs when a file is not a bolt database.
ErrInvalid = errors.New("invalid database")
// ErrInvalidMapping is returned when the database file fails to get mapped.
ErrInvalidMapping = errors.New("database isn't correctly mapped")
// ErrVersionMismatch is returned when the data file was created with a
// different version of Bolt.
ErrVersionMismatch = errors.New("version mismatch")
// ErrChecksum is returned when either meta page checksum does not match.
ErrChecksum = errors.New("checksum error")
// ErrTimeout is returned when a database cannot obtain an exclusive lock
// on the data file after the timeout passed to Open().
ErrTimeout = errors.New("timeout")
)
// These errors can occur when beginning or committing a Tx.
var (
// ErrTxNotWritable is returned when performing a write operation on a
// read-only transaction.
ErrTxNotWritable = errors.New("tx not writable")
// ErrTxClosed is returned when committing or rolling back a transaction
// that has already been committed or rolled back.
ErrTxClosed = errors.New("tx closed")
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
// read-only database.
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
// ErrFreePagesNotLoaded is returned when a readonly transaction without
// preloading the free pages is trying to access the free pages.
ErrFreePagesNotLoaded = errors.New("free pages are not pre-loaded")
)
// These errors can occur when putting or deleting a value or a bucket.
var (
// ErrBucketNotFound is returned when trying to access a bucket that has
// not been created yet.
ErrBucketNotFound = errors.New("bucket not found")
// ErrBucketExists is returned when creating a bucket that already exists.
ErrBucketExists = errors.New("bucket already exists")
// ErrBucketNameRequired is returned when creating a bucket with a blank name.
ErrBucketNameRequired = errors.New("bucket name required")
// ErrKeyRequired is returned when inserting a zero-length key.
ErrKeyRequired = errors.New("key required")
// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
ErrKeyTooLarge = errors.New("key too large")
// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
ErrValueTooLarge = errors.New("value too large")
// ErrIncompatibleValue is returned when trying create or delete a bucket
// on an existing non-bucket key or when trying to create or delete a
// non-bucket key on an existing bucket key.
ErrIncompatibleValue = errors.New("incompatible value")
)

147
internal/common/meta.go Normal file
View File

@ -0,0 +1,147 @@
package common
import (
"fmt"
"hash/fnv"
"io"
"unsafe"
)
type Meta struct {
magic uint32
version uint32
pageSize uint32
flags uint32
root InBucket
freelist Pgid
pgid Pgid
txid Txid
checksum uint64
}
// Validate checks the marker bytes and version of the meta page to ensure it matches this binary.
func (m *Meta) Validate() error {
if m.magic != Magic {
return ErrInvalid
} else if m.version != Version {
return ErrVersionMismatch
} else if m.checksum != m.Sum64() {
return ErrChecksum
}
return nil
}
// Copy copies one meta object to another.
func (m *Meta) Copy(dest *Meta) {
*dest = *m
}
// Write writes the meta onto a page.
func (m *Meta) Write(p *Page) {
if m.root.root >= m.pgid {
panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
} else if m.freelist >= m.pgid && m.freelist != PgidNoFreelist {
// TODO: reject pgidNoFreeList if !NoFreelistSync
panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
}
// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
p.id = Pgid(m.txid % 2)
p.flags |= MetaPageFlag
// Calculate the checksum.
m.checksum = m.Sum64()
m.Copy(p.Meta())
}
// Sum64 generates the checksum for the meta.
func (m *Meta) Sum64() uint64 {
var h = fnv.New64a()
_, _ = h.Write((*[unsafe.Offsetof(Meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
return h.Sum64()
}
func (m *Meta) Magic() uint32 {
return m.magic
}
func (m *Meta) SetMagic(v uint32) {
m.magic = v
}
func (m *Meta) SetVersion(v uint32) {
m.version = v
}
func (m *Meta) PageSize() uint32 {
return m.pageSize
}
func (m *Meta) SetPageSize(v uint32) {
m.pageSize = v
}
func (m *Meta) Flags() uint32 {
return m.flags
}
func (m *Meta) SetFlags(v uint32) {
m.flags = v
}
func (m *Meta) SetRootBucket(b InBucket) {
m.root = b
}
func (m *Meta) RootBucket() *InBucket {
return &m.root
}
func (m *Meta) Freelist() Pgid {
return m.freelist
}
func (m *Meta) SetFreelist(v Pgid) {
m.freelist = v
}
func (m *Meta) Pgid() Pgid {
return m.pgid
}
func (m *Meta) SetPgid(id Pgid) {
m.pgid = id
}
func (m *Meta) Txid() Txid {
return m.txid
}
func (m *Meta) SetTxid(id Txid) {
m.txid = id
}
func (m *Meta) IncTxid() {
m.txid += 1
}
func (m *Meta) DecTxid() {
m.txid -= 1
}
func (m *Meta) SetChecksum(v uint64) {
m.checksum = v
}
func (m *Meta) Print(w io.Writer) {
fmt.Fprintf(w, "Version: %d\n", m.version)
fmt.Fprintf(w, "Page Size: %d bytes\n", m.pageSize)
fmt.Fprintf(w, "Flags: %08x\n", m.flags)
fmt.Fprintf(w, "Root: <pgid=%d>\n", m.root.root)
fmt.Fprintf(w, "Freelist: <pgid=%d>\n", m.freelist)
fmt.Fprintf(w, "HWM: <pgid=%d>\n", m.pgid)
fmt.Fprintf(w, "Txn ID: %d\n", m.txid)
fmt.Fprintf(w, "Checksum: %016x\n", m.checksum)
fmt.Fprintf(w, "\n")
}

374
internal/common/page.go Normal file
View File

@ -0,0 +1,374 @@
package common
import (
"fmt"
"os"
"sort"
"unsafe"
)
const PageHeaderSize = unsafe.Sizeof(Page{})
const MinKeysPerPage = 2
const BranchPageElementSize = unsafe.Sizeof(branchPageElement{})
const LeafPageElementSize = unsafe.Sizeof(leafPageElement{})
const (
BranchPageFlag = 0x01
LeafPageFlag = 0x02
MetaPageFlag = 0x04
FreelistPageFlag = 0x10
)
const (
BucketLeafFlag = 0x01
)
type Pgid uint64
type Page struct {
id Pgid
flags uint16
count uint16
overflow uint32
}
func NewPage(id Pgid, flags, count uint16, overflow uint32) *Page {
return &Page{
id: id,
flags: flags,
count: count,
overflow: overflow,
}
}
// Typ returns a human-readable page type string used for debugging.
func (p *Page) Typ() string {
if (p.flags & BranchPageFlag) != 0 {
return "branch"
} else if (p.flags & LeafPageFlag) != 0 {
return "leaf"
} else if (p.flags & MetaPageFlag) != 0 {
return "meta"
} else if (p.flags & FreelistPageFlag) != 0 {
return "freelist"
}
return fmt.Sprintf("unknown<%02x>", p.flags)
}
// Meta returns a pointer to the metadata section of the page.
func (p *Page) Meta() *Meta {
return (*Meta)(UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
}
func (p *Page) FastCheck(id Pgid) {
Assert(p.id == id, "Page expected to be: %v, but self identifies as %v", id, p.id)
// Only one flag of page-type can be set.
Assert(p.flags == BranchPageFlag ||
p.flags == LeafPageFlag ||
p.flags == MetaPageFlag ||
p.flags == FreelistPageFlag,
"page %v: has unexpected type/flags: %x", p.id, p.flags)
}
// LeafPageElement retrieves the leaf node by index
func (p *Page) LeafPageElement(index uint16) *leafPageElement {
return (*leafPageElement)(UnsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
LeafPageElementSize, int(index)))
}
// LeafPageElements retrieves a list of leaf nodes.
func (p *Page) LeafPageElements() []leafPageElement {
if p.count == 0 {
return nil
}
var elems []leafPageElement
data := UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
UnsafeSlice(unsafe.Pointer(&elems), data, int(p.count))
return elems
}
// BranchPageElement retrieves the branch node by index
func (p *Page) BranchPageElement(index uint16) *branchPageElement {
return (*branchPageElement)(UnsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
unsafe.Sizeof(branchPageElement{}), int(index)))
}
// BranchPageElements retrieves a list of branch nodes.
func (p *Page) BranchPageElements() []branchPageElement {
if p.count == 0 {
return nil
}
var elems []branchPageElement
data := UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))
UnsafeSlice(unsafe.Pointer(&elems), data, int(p.count))
return elems
}
func (p *Page) FreelistPageCount() (int, int) {
Assert(p.flags == FreelistPageFlag, fmt.Sprintf("can't get freelist page count from a non-freelist page: %2x", p.flags))
// If the page.count is at the max uint16 value (64k) then it's considered
// an overflow and the size of the freelist is stored as the first element.
var idx, count = 0, int(p.count)
if count == 0xFFFF {
idx = 1
c := *(*Pgid)(UnsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
count = int(c)
if count < 0 {
panic(fmt.Sprintf("leading element count %d overflows int", c))
}
}
return idx, count
}
func (p *Page) FreelistPageIds() []Pgid {
Assert(p.flags == FreelistPageFlag, fmt.Sprintf("can't get freelist page IDs from a non-freelist page: %2x", p.flags))
idx, count := p.FreelistPageCount()
if count == 0 {
return nil
}
var ids []Pgid
data := UnsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), unsafe.Sizeof(ids[0]), idx)
UnsafeSlice(unsafe.Pointer(&ids), data, count)
return ids
}
// dump writes n bytes of the page to STDERR as hex output.
func (p *Page) hexdump(n int) {
buf := UnsafeByteSlice(unsafe.Pointer(p), 0, 0, n)
fmt.Fprintf(os.Stderr, "%x\n", buf)
}
func (p *Page) Id() Pgid {
return p.id
}
func (p *Page) SetId(target Pgid) {
p.id = target
}
func (p *Page) Flags() uint16 {
return p.flags
}
func (p *Page) SetFlags(v uint16) {
p.flags = v
}
func (p *Page) FlagsXOR(v uint16) {
p.flags |= v
}
func (p *Page) Count() uint16 {
return p.count
}
func (p *Page) SetCount(target uint16) {
p.count = target
}
func (p *Page) Overflow() uint32 {
return p.overflow
}
func (p *Page) SetOverflow(target uint32) {
p.overflow = target
}
func (p *Page) String() string {
return fmt.Sprintf("ID: %d, Type: %s, count: %d, overflow: %d", p.id, p.Typ(), p.count, p.overflow)
}
type Pages []*Page
func (s Pages) Len() int { return len(s) }
func (s Pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s Pages) Less(i, j int) bool { return s[i].id < s[j].id }
// branchPageElement represents a node on a branch page.
type branchPageElement struct {
pos uint32
ksize uint32
pgid Pgid
}
func (n *branchPageElement) Pos() uint32 {
return n.pos
}
func (n *branchPageElement) SetPos(v uint32) {
n.pos = v
}
func (n *branchPageElement) Ksize() uint32 {
return n.ksize
}
func (n *branchPageElement) SetKsize(v uint32) {
n.ksize = v
}
func (n *branchPageElement) Pgid() Pgid {
return n.pgid
}
func (n *branchPageElement) SetPgid(v Pgid) {
n.pgid = v
}
// Key returns a byte slice of the node key.
func (n *branchPageElement) Key() []byte {
return UnsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize))
}
// leafPageElement represents a node on a leaf page.
type leafPageElement struct {
flags uint32
pos uint32
ksize uint32
vsize uint32
}
func NewLeafPageElement(flags, pos, ksize, vsize uint32) *leafPageElement {
return &leafPageElement{
flags: flags,
pos: pos,
ksize: ksize,
vsize: vsize,
}
}
func (n *leafPageElement) Flags() uint32 {
return n.flags
}
func (n *leafPageElement) SetFlags(v uint32) {
n.flags = v
}
func (n *leafPageElement) Pos() uint32 {
return n.pos
}
func (n *leafPageElement) SetPos(v uint32) {
n.pos = v
}
func (n *leafPageElement) Ksize() uint32 {
return n.ksize
}
func (n *leafPageElement) SetKsize(v uint32) {
n.ksize = v
}
func (n *leafPageElement) Vsize() uint32 {
return n.vsize
}
func (n *leafPageElement) SetVsize(v uint32) {
n.vsize = v
}
// Key returns a byte slice of the node key.
func (n *leafPageElement) Key() []byte {
i := int(n.pos)
j := i + int(n.ksize)
return UnsafeByteSlice(unsafe.Pointer(n), 0, i, j)
}
// Value returns a byte slice of the node value.
func (n *leafPageElement) Value() []byte {
i := int(n.pos) + int(n.ksize)
j := i + int(n.vsize)
return UnsafeByteSlice(unsafe.Pointer(n), 0, i, j)
}
func (n *leafPageElement) IsBucketEntry() bool {
return n.flags&uint32(BucketLeafFlag) != 0
}
func (n *leafPageElement) Bucket() *InBucket {
if n.IsBucketEntry() {
return LoadBucket(n.Value())
} else {
return nil
}
}
// PageInfo represents human readable information about a page.
type PageInfo struct {
ID int
Type string
Count int
OverflowCount int
}
type Pgids []Pgid
func (s Pgids) Len() int { return len(s) }
func (s Pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s Pgids) Less(i, j int) bool { return s[i] < s[j] }
// Merge returns the sorted union of a and b.
func (a Pgids) Merge(b Pgids) Pgids {
// Return the opposite slice if one is nil.
if len(a) == 0 {
return b
}
if len(b) == 0 {
return a
}
merged := make(Pgids, len(a)+len(b))
Mergepgids(merged, a, b)
return merged
}
// Mergepgids copies the sorted union of a and b into dst.
// If dst is too small, it panics.
func Mergepgids(dst, a, b Pgids) {
if len(dst) < len(a)+len(b) {
panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
}
// Copy in the opposite slice if one is nil.
if len(a) == 0 {
copy(dst, b)
return
}
if len(b) == 0 {
copy(dst, a)
return
}
// Merged will hold all elements from both lists.
merged := dst[:0]
// Assign lead to the slice with a lower starting value, follow to the higher value.
lead, follow := a, b
if b[0] < a[0] {
lead, follow = b, a
}
// Continue while there are elements in the lead.
for len(lead) > 0 {
// Merge largest prefix of lead that is ahead of follow[0].
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
merged = append(merged, lead[:n]...)
if n >= len(lead) {
break
}
// Swap lead and follow.
lead, follow = follow, lead[n:]
}
// Append what's left in follow.
_ = append(merged, follow...)
}

View File

@ -0,0 +1,72 @@
package common
import (
"reflect"
"sort"
"testing"
"testing/quick"
)
// Ensure that the page type can be returned in human readable format.
func TestPage_typ(t *testing.T) {
if typ := (&Page{flags: BranchPageFlag}).Typ(); typ != "branch" {
t.Fatalf("exp=branch; got=%v", typ)
}
if typ := (&Page{flags: LeafPageFlag}).Typ(); typ != "leaf" {
t.Fatalf("exp=leaf; got=%v", typ)
}
if typ := (&Page{flags: MetaPageFlag}).Typ(); typ != "meta" {
t.Fatalf("exp=meta; got=%v", typ)
}
if typ := (&Page{flags: FreelistPageFlag}).Typ(); typ != "freelist" {
t.Fatalf("exp=freelist; got=%v", typ)
}
if typ := (&Page{flags: 20000}).Typ(); typ != "unknown<4e20>" {
t.Fatalf("exp=unknown<4e20>; got=%v", typ)
}
}
// Ensure that the hexdump debugging function doesn't blow up.
func TestPage_dump(t *testing.T) {
(&Page{id: 256}).hexdump(16)
}
func TestPgids_merge(t *testing.T) {
a := Pgids{4, 5, 6, 10, 11, 12, 13, 27}
b := Pgids{1, 3, 8, 9, 25, 30}
c := a.Merge(b)
if !reflect.DeepEqual(c, Pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) {
t.Errorf("mismatch: %v", c)
}
a = Pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36}
b = Pgids{8, 9, 25, 30}
c = a.Merge(b)
if !reflect.DeepEqual(c, Pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) {
t.Errorf("mismatch: %v", c)
}
}
func TestPgids_merge_quick(t *testing.T) {
if err := quick.Check(func(a, b Pgids) bool {
// Sort incoming lists.
sort.Sort(a)
sort.Sort(b)
// Merge the two lists together.
got := a.Merge(b)
// The expected value should be the two lists combined and sorted.
exp := append(a, b...)
sort.Sort(exp)
if !reflect.DeepEqual(exp, got) {
t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got)
return false
}
return true
}, nil); err != nil {
t.Fatal(err)
}
}

50
internal/common/types.go Normal file
View File

@ -0,0 +1,50 @@
package common
import (
"os"
"runtime"
"time"
)
// MaxMmapStep is the largest step that can be taken when remapping the mmap.
const MaxMmapStep = 1 << 30 // 1GB
// Version represents the data file format version.
const Version = 2
// Magic represents a marker value to indicate that a file is a Bolt DB.
const Magic uint32 = 0xED0CDAED
const PgidNoFreelist Pgid = 0xffffffffffffffff
// DO NOT EDIT. Copied from the "bolt" package.
const pageMaxAllocSize = 0xFFFFFFF
// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
// syncing changes to a file. This is required as some operating systems,
// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
// must be synchronized using the msync(2) syscall.
const IgnoreNoSync = runtime.GOOS == "openbsd"
// Default values if not set in a DB instance.
const (
DefaultMaxBatchSize int = 1000
DefaultMaxBatchDelay = 10 * time.Millisecond
DefaultAllocSize = 16 * 1024 * 1024
)
// DefaultPageSize is the default page size for db which is set to the OS page size.
var DefaultPageSize = os.Getpagesize()
// FreelistType is the type of the freelist backend
type FreelistType string
const (
// FreelistArrayType indicates backend freelist type is array
FreelistArrayType = FreelistType("array")
// FreelistMapType indicates backend freelist type is hashmap
FreelistMapType = FreelistType("hashmap")
)
// Txid represents the internal transaction identifier.
type Txid uint64

39
internal/common/unsafe.go Normal file
View File

@ -0,0 +1,39 @@
package common
import (
"reflect"
"unsafe"
)
func UnsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer {
return unsafe.Pointer(uintptr(base) + offset)
}
func UnsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer {
return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz)
}
func UnsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte {
// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
//
// This memory is not allocated from C, but it is unmanaged by Go's
// garbage collector and should behave similarly, and the compiler
// should produce similar code. Note that this conversion allows a
// subslice to begin after the base address, with an optional offset,
// while the URL above does not cover this case and only slices from
// index 0. However, the wiki never says that the address must be to
// the beginning of a C allocation (or even that malloc was used at
// all), so this is believed to be correct.
return (*[pageMaxAllocSize]byte)(UnsafeAdd(base, offset))[i:j:j]
}
// UnsafeSlice modifies the data, len, and cap of a slice variable pointed to by
// the slice parameter. This helper should be used over other direct
// manipulation of reflect.SliceHeader to prevent misuse, namely, converting
// from reflect.SliceHeader to a Go slice type.
func UnsafeSlice(slice, data unsafe.Pointer, len int) {
s := (*reflect.SliceHeader)(slice)
s.Data = uintptr(data)
s.Cap = len
s.Len = len
}

25
internal/common/utils.go Normal file
View File

@ -0,0 +1,25 @@
package common
import (
"fmt"
"unsafe"
)
// Assert will panic with a given formatted message if the given condition is false.
func Assert(condition bool, msg string, v ...interface{}) {
if !condition {
panic(fmt.Sprintf("assertion failed: "+msg, v...))
}
}
func LoadBucket(buf []byte) *InBucket {
return (*InBucket)(unsafe.Pointer(&buf[0]))
}
func LoadPage(buf []byte) *Page {
return (*Page)(unsafe.Pointer(&buf[0]))
}
func LoadPageMeta(buf []byte) *Meta {
return (*Meta)(unsafe.Pointer(&buf[PageHeaderSize]))
}