mirror of https://github.com/etcd-io/bbolt.git
Add RWTransaction.Put().
parent
b17d078d4f
commit
1a17a2cf1e
|
@ -0,0 +1,17 @@
|
||||||
|
|
||||||
|
|
||||||
|
===0===
|
||||||
|
| d|g |
|
||||||
|
|1|2|3|
|
||||||
|
=======
|
||||||
|
| | |
|
||||||
|
------ | -------
|
||||||
|
| | |
|
||||||
|
===1=== ===2=== ===3===
|
||||||
|
|a|b|c| |d|e|f| |g|h|i|
|
||||||
|
|-|-|-| |-|-|-| |-|-|-|
|
||||||
|
|*|*|*| |*|*|*| |*|*|*|
|
||||||
|
|*|*|*| |*|*|*| |*|*|*|
|
||||||
|
|*|*|*| |*|*|*| |*|*|*|
|
||||||
|
|
||||||
|
|
2
TODO
2
TODO
|
@ -2,7 +2,7 @@ TODO
|
||||||
====
|
====
|
||||||
X Open DB.
|
X Open DB.
|
||||||
X Initialize transaction.
|
X Initialize transaction.
|
||||||
- Cursor First, Goto(key), Next
|
- Cursor First, Get(key), Next
|
||||||
- RWTransaction.insert()
|
- RWTransaction.insert()
|
||||||
- rebalance
|
- rebalance
|
||||||
- adjust cursors
|
- adjust cursors
|
||||||
|
|
20
bnode.go
20
bnode.go
|
@ -1,20 +0,0 @@
|
||||||
package bolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
const bnodeSize = int(unsafe.Sizeof(lnode{}))
|
|
||||||
|
|
||||||
// bnode represents a node on a branch page.
|
|
||||||
type bnode struct {
|
|
||||||
pos uint32
|
|
||||||
ksize uint32
|
|
||||||
pgid pgid
|
|
||||||
}
|
|
||||||
|
|
||||||
// key returns a byte slice of the node key.
|
|
||||||
func (n *bnode) key() []byte {
|
|
||||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
|
||||||
return buf[n.pos : n.pos+n.ksize]
|
|
||||||
}
|
|
129
branch.go
129
branch.go
|
@ -1,129 +0,0 @@
|
||||||
package bolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
// branch represents a temporary in-memory branch page.
|
|
||||||
type branch struct {
|
|
||||||
pgid pgid
|
|
||||||
depth int
|
|
||||||
parent *branch
|
|
||||||
items branchItems
|
|
||||||
}
|
|
||||||
|
|
||||||
// size returns the size of the branch after serialization.
|
|
||||||
func (b *branch) size() int {
|
|
||||||
var size int = pageHeaderSize
|
|
||||||
for _, item := range b.items {
|
|
||||||
size += bnodeSize + len(item.key)
|
|
||||||
}
|
|
||||||
return size
|
|
||||||
}
|
|
||||||
|
|
||||||
// put adds a new node or replaces an existing node.
|
|
||||||
func (b *branch) put(id pgid, newid pgid, key []byte, replace bool) {
|
|
||||||
var index int
|
|
||||||
for ; index < len(b.items); index++ {
|
|
||||||
if b.items[index].pgid == id {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !replace {
|
|
||||||
index++
|
|
||||||
b.items = append(b.items, branchItem{})
|
|
||||||
if index < len(b.items) {
|
|
||||||
copy(b.items[index+1:], b.items[index:])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
b.items[index].pgid = newid
|
|
||||||
b.items[index].key = key
|
|
||||||
}
|
|
||||||
|
|
||||||
// read initializes the item data from an on-disk page.
|
|
||||||
func (b *branch) read(p *page) {
|
|
||||||
b.pgid = p.id
|
|
||||||
b.items = make(branchItems, int(p.count))
|
|
||||||
bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr))
|
|
||||||
for i := 0; i < int(p.count); i++ {
|
|
||||||
bnode := &bnodes[i]
|
|
||||||
item := &b.items[i]
|
|
||||||
item.pgid = bnode.pgid
|
|
||||||
item.key = bnode.key()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// write writes the items onto a branch page.
|
|
||||||
func (b *branch) write(p *page) {
|
|
||||||
// Initialize page.
|
|
||||||
p.flags |= p_branch
|
|
||||||
p.count = uint16(len(b.items))
|
|
||||||
|
|
||||||
// Loop over each item and write it to the page.
|
|
||||||
bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr))
|
|
||||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(b.items):]
|
|
||||||
for index, item := range b.items {
|
|
||||||
// Write node.
|
|
||||||
bnode := &bnodes[index]
|
|
||||||
bnode.pgid = item.pgid
|
|
||||||
bnode.pos = uint32(uintptr(unsafe.Pointer(&buf[0])) - uintptr(unsafe.Pointer(bnode)))
|
|
||||||
bnode.ksize = uint32(len(item.key))
|
|
||||||
|
|
||||||
// Write key to the end of the page.
|
|
||||||
copy(buf[0:], item.key)
|
|
||||||
buf = buf[len(item.key):]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// split divides up the noes in the branch into appropriately sized groups.
|
|
||||||
func (b *branch) split(pageSize int) []*branch {
|
|
||||||
// Ignore the split if the page doesn't have at least enough nodes for
|
|
||||||
// multiple pages or if the data can fit on a single page.
|
|
||||||
if len(b.items) <= (minKeysPerPage*2) || b.size() < pageSize {
|
|
||||||
return []*branch{b}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set fill threshold to 50%.
|
|
||||||
threshold := pageSize / 2
|
|
||||||
|
|
||||||
// Otherwise group into smaller pages and target a given fill size.
|
|
||||||
size := 0
|
|
||||||
current := &branch{}
|
|
||||||
branches := make([]*branch, 0)
|
|
||||||
|
|
||||||
for index, item := range b.items {
|
|
||||||
nodeSize := bnodeSize + len(item.key)
|
|
||||||
|
|
||||||
if len(current.items) >= minKeysPerPage && index < len(b.items)-minKeysPerPage && size+nodeSize > threshold {
|
|
||||||
size = pageHeaderSize
|
|
||||||
branches = append(branches, current)
|
|
||||||
current = &branch{}
|
|
||||||
}
|
|
||||||
|
|
||||||
size += nodeSize
|
|
||||||
current.items = append(current.items, item)
|
|
||||||
}
|
|
||||||
branches = append(branches, current)
|
|
||||||
|
|
||||||
return branches
|
|
||||||
}
|
|
||||||
|
|
||||||
type branches []*branch
|
|
||||||
|
|
||||||
func (s branches) Len() int { return len(s) }
|
|
||||||
func (s branches) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
|
||||||
func (s branches) Less(i, j int) bool { return s[i].depth < s[j].depth }
|
|
||||||
|
|
||||||
type branchItems []branchItem
|
|
||||||
|
|
||||||
type branchItem struct {
|
|
||||||
pgid pgid
|
|
||||||
key []byte
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s branchItems) Len() int { return len(s) }
|
|
||||||
func (s branchItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
|
||||||
func (s branchItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
|
|
161
branch_test.go
161
branch_test.go
|
@ -1,161 +0,0 @@
|
||||||
package bolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Ensure that a branch can replace a key.
|
|
||||||
func TestBranchPutReplace(t *testing.T) {
|
|
||||||
b := &branch{
|
|
||||||
items: branchItems{
|
|
||||||
branchItem{pgid: 1, key: []byte("bar")},
|
|
||||||
branchItem{pgid: 2, key: []byte("baz")},
|
|
||||||
branchItem{pgid: 3, key: []byte("foo")},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
b.put(1, 4, []byte("bar"), true)
|
|
||||||
b.put(2, 5, []byte("boo"), true)
|
|
||||||
assert.Equal(t, len(b.items), 3)
|
|
||||||
assert.Equal(t, b.items[0].pgid, pgid(4))
|
|
||||||
assert.Equal(t, string(b.items[0].key), "bar")
|
|
||||||
assert.Equal(t, b.items[1].pgid, pgid(5))
|
|
||||||
assert.Equal(t, string(b.items[1].key), "boo")
|
|
||||||
assert.Equal(t, b.items[2].pgid, pgid(3))
|
|
||||||
assert.Equal(t, string(b.items[2].key), "foo")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a branch can insert a key.
|
|
||||||
func TestBranchPutInsert(t *testing.T) {
|
|
||||||
b := &branch{
|
|
||||||
items: branchItems{
|
|
||||||
branchItem{pgid: 1, key: []byte("bar")},
|
|
||||||
branchItem{pgid: 2, key: []byte("foo")},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
b.put(1, 4, []byte("baz"), false)
|
|
||||||
b.put(2, 5, []byte("zzz"), false)
|
|
||||||
assert.Equal(t, len(b.items), 4)
|
|
||||||
assert.Equal(t, b.items[0].pgid, pgid(1))
|
|
||||||
assert.Equal(t, string(b.items[0].key), "bar")
|
|
||||||
assert.Equal(t, b.items[1].pgid, pgid(4))
|
|
||||||
assert.Equal(t, string(b.items[1].key), "baz")
|
|
||||||
assert.Equal(t, b.items[2].pgid, pgid(2))
|
|
||||||
assert.Equal(t, string(b.items[2].key), "foo")
|
|
||||||
assert.Equal(t, b.items[3].pgid, pgid(5))
|
|
||||||
assert.Equal(t, string(b.items[3].key), "zzz")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a branch can deserialize from a page.
|
|
||||||
func TestBranchRead(t *testing.T) {
|
|
||||||
// Create a page.
|
|
||||||
var buf [4096]byte
|
|
||||||
page := (*page)(unsafe.Pointer(&buf[0]))
|
|
||||||
page.count = 2
|
|
||||||
|
|
||||||
// Insert 2 items at the beginning. sizeof(bnode) == 16
|
|
||||||
nodes := (*[3]bnode)(unsafe.Pointer(&page.ptr))
|
|
||||||
nodes[0] = bnode{pos: 32, ksize: 3, pgid: 100} // pos = sizeof(bnode) * 2
|
|
||||||
nodes[1] = bnode{pos: 19, ksize: 10, pgid: 101} // pos = sizeof(bnode) + 3
|
|
||||||
|
|
||||||
// Write data for the nodes at the end.
|
|
||||||
data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
|
|
||||||
copy(data[:], []byte("bar"))
|
|
||||||
copy(data[3:], []byte("helloworld"))
|
|
||||||
|
|
||||||
// Deserialize page into a branch.
|
|
||||||
b := &branch{}
|
|
||||||
b.read(page)
|
|
||||||
|
|
||||||
// Check that there are two items with correct data.
|
|
||||||
assert.Equal(t, len(b.items), 2)
|
|
||||||
assert.Equal(t, b.items[0].key, []byte("bar"))
|
|
||||||
assert.Equal(t, b.items[1].key, []byte("helloworld"))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a branch can serialize itself.
|
|
||||||
func TestBranchWrite(t *testing.T) {
|
|
||||||
b := &branch{
|
|
||||||
items: branchItems{
|
|
||||||
branchItem{pgid: 1, key: []byte("susy")},
|
|
||||||
branchItem{pgid: 2, key: []byte("ricki")},
|
|
||||||
branchItem{pgid: 3, key: []byte("john")},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write it to a page.
|
|
||||||
var buf [4096]byte
|
|
||||||
p := (*page)(unsafe.Pointer(&buf[0]))
|
|
||||||
b.write(p)
|
|
||||||
|
|
||||||
// Read the page back in.
|
|
||||||
b2 := &branch{}
|
|
||||||
b2.read(p)
|
|
||||||
|
|
||||||
// Check that the two pages are the same.
|
|
||||||
assert.Equal(t, len(b2.items), 3)
|
|
||||||
assert.Equal(t, b2.items[0].pgid, pgid(1))
|
|
||||||
assert.Equal(t, b2.items[0].key, []byte("susy"))
|
|
||||||
assert.Equal(t, b2.items[1].pgid, pgid(2))
|
|
||||||
assert.Equal(t, b2.items[1].key, []byte("ricki"))
|
|
||||||
assert.Equal(t, b2.items[2].pgid, pgid(3))
|
|
||||||
assert.Equal(t, b2.items[2].key, []byte("john"))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a branch can split into appropriate subgroups.
|
|
||||||
func TestBranchSplit(t *testing.T) {
|
|
||||||
// Create a branch.
|
|
||||||
b := &branch{
|
|
||||||
items: branchItems{
|
|
||||||
branchItem{pgid: 1, key: []byte("00000001")},
|
|
||||||
branchItem{pgid: 2, key: []byte("00000002")},
|
|
||||||
branchItem{pgid: 3, key: []byte("00000003")},
|
|
||||||
branchItem{pgid: 4, key: []byte("00000004")},
|
|
||||||
branchItem{pgid: 5, key: []byte("00000005")},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Split between 3 & 4.
|
|
||||||
branches := b.split(100)
|
|
||||||
|
|
||||||
assert.Equal(t, len(branches), 2)
|
|
||||||
assert.Equal(t, len(branches[0].items), 2)
|
|
||||||
assert.Equal(t, len(branches[1].items), 3)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a branch with the minimum number of items just returns a single branch.
|
|
||||||
func TestBranchSplitWithMinKeys(t *testing.T) {
|
|
||||||
// Create a branch.
|
|
||||||
b := &branch{
|
|
||||||
items: branchItems{
|
|
||||||
branchItem{pgid: 1, key: []byte("00000001")},
|
|
||||||
branchItem{pgid: 2, key: []byte("00000002")},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Split.
|
|
||||||
branches := b.split(20)
|
|
||||||
assert.Equal(t, len(branches), 1)
|
|
||||||
assert.Equal(t, len(branches[0].items), 2)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a branch that has keys that all fit on a page just returns one branch.
|
|
||||||
func TestBranchSplitFitsInPage(t *testing.T) {
|
|
||||||
// Create a branch.
|
|
||||||
b := &branch{
|
|
||||||
items: branchItems{
|
|
||||||
branchItem{pgid: 1, key: []byte("00000001")},
|
|
||||||
branchItem{pgid: 2, key: []byte("00000002")},
|
|
||||||
branchItem{pgid: 3, key: []byte("00000003")},
|
|
||||||
branchItem{pgid: 4, key: []byte("00000004")},
|
|
||||||
branchItem{pgid: 5, key: []byte("00000005")},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Split.
|
|
||||||
branches := b.split(4096)
|
|
||||||
assert.Equal(t, len(branches), 1)
|
|
||||||
assert.Equal(t, len(branches[0].items), 5)
|
|
||||||
}
|
|
|
@ -15,13 +15,8 @@ func (b *Bucket) Name() string {
|
||||||
return b.name
|
return b.name
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get retrieves the value for a key in the bucket.
|
// cursor creates a new cursor for this bucket.
|
||||||
func (b *Bucket) Get(key []byte) []byte {
|
func (b *Bucket) cursor() *Cursor {
|
||||||
return b.Cursor().Get(key)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cursor creates a new cursor for this bucket.
|
|
||||||
func (b *Bucket) Cursor() *Cursor {
|
|
||||||
return &Cursor{
|
return &Cursor{
|
||||||
transaction: b.transaction,
|
transaction: b.transaction,
|
||||||
root: b.root,
|
root: b.root,
|
||||||
|
|
67
cursor.go
67
cursor.go
|
@ -14,7 +14,7 @@ type Cursor struct {
|
||||||
// elem represents a node on a page that's on the cursor's stack.
|
// elem represents a node on a page that's on the cursor's stack.
|
||||||
type elem struct {
|
type elem struct {
|
||||||
page *page
|
page *page
|
||||||
index int
|
index uint16
|
||||||
}
|
}
|
||||||
|
|
||||||
// First moves the cursor to the first item in the bucket and returns its key and data.
|
// First moves the cursor to the first item in the bucket and returns its key and data.
|
||||||
|
@ -30,25 +30,29 @@ func (c *Cursor) Next() ([]byte, []byte) {
|
||||||
|
|
||||||
// Get positions the cursor at a specific key and returns the its value.
|
// Get positions the cursor at a specific key and returns the its value.
|
||||||
func (c *Cursor) Get(key []byte) []byte {
|
func (c *Cursor) Get(key []byte) []byte {
|
||||||
if c.Goto(key) {
|
|
||||||
return c.node().value()
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Goto positions the cursor at a specific key.
|
|
||||||
// Returns true if an exact match or false if positioned after the closest match.
|
|
||||||
func (c *Cursor) Goto(key []byte) bool {
|
|
||||||
// TODO(benbjohnson): Optimize for specific use cases.
|
|
||||||
|
|
||||||
// Start from root page and traverse to correct page.
|
// Start from root page and traverse to correct page.
|
||||||
c.stack = c.stack[:0]
|
c.stack = c.stack[:0]
|
||||||
c.search(key, c.transaction.page(c.root))
|
c.search(key, c.transaction.page(c.root))
|
||||||
|
p, index := c.top()
|
||||||
|
|
||||||
return false
|
// If the cursor is pointing to the end of page then return nil.
|
||||||
|
if index == p.count {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If our target node isn't the same key as what's passed in then return nil.
|
||||||
|
// c.page().hexdump(512)
|
||||||
|
if !bytes.Equal(key, c.node().key()) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.node().value()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Cursor) search(key []byte, p *page) {
|
func (c *Cursor) search(key []byte, p *page) {
|
||||||
|
if (p.flags & (p_branch | p_leaf)) == 0 {
|
||||||
|
panic("invalid page type: " + p.typ())
|
||||||
|
}
|
||||||
e := elem{page: p}
|
e := elem{page: p}
|
||||||
c.stack = append(c.stack, e)
|
c.stack = append(c.stack, e)
|
||||||
|
|
||||||
|
@ -58,12 +62,26 @@ func (c *Cursor) search(key []byte, p *page) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Binary search for the correct branch node.
|
// Binary search for the correct range.
|
||||||
nodes := p.bnodes()
|
inodes := p.branchPageElements()
|
||||||
e.index = sort.Search(int(p.count)-1, func(i int) bool { return bytes.Compare(nodes[i+1].key(), key) != -1 })
|
|
||||||
|
var exact bool
|
||||||
|
index := sort.Search(int(p.count), func(i int) bool {
|
||||||
|
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
|
||||||
|
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
|
||||||
|
ret := bytes.Compare(inodes[i].key(), key)
|
||||||
|
if ret == 0 {
|
||||||
|
exact = true
|
||||||
|
}
|
||||||
|
return ret != -1
|
||||||
|
})
|
||||||
|
if !exact && index > 0 {
|
||||||
|
index--
|
||||||
|
}
|
||||||
|
e.index = uint16(index)
|
||||||
|
|
||||||
// Recursively search to the next page.
|
// Recursively search to the next page.
|
||||||
c.search(key, c.transaction.page(nodes[e.index].pgid))
|
c.search(key, c.transaction.page(inodes[e.index].pgid))
|
||||||
}
|
}
|
||||||
|
|
||||||
// nsearch searches a leaf node for the index of the node that matches key.
|
// nsearch searches a leaf node for the index of the node that matches key.
|
||||||
|
@ -71,16 +89,17 @@ func (c *Cursor) nsearch(key []byte, p *page) {
|
||||||
e := &c.stack[len(c.stack)-1]
|
e := &c.stack[len(c.stack)-1]
|
||||||
|
|
||||||
// Binary search for the correct leaf node index.
|
// Binary search for the correct leaf node index.
|
||||||
nodes := p.lnodes()
|
inodes := p.leafPageElements()
|
||||||
e.index = sort.Search(int(p.count), func(i int) bool {
|
index := sort.Search(int(p.count), func(i int) bool {
|
||||||
return bytes.Compare(nodes[i].key(), key) != -1
|
return bytes.Compare(inodes[i].key(), key) != -1
|
||||||
})
|
})
|
||||||
|
e.index = uint16(index)
|
||||||
}
|
}
|
||||||
|
|
||||||
// top returns the page and leaf node that the cursor is currently pointing at.
|
// top returns the page and leaf node that the cursor is currently pointing at.
|
||||||
func (c *Cursor) top() (*page, *lnode) {
|
func (c *Cursor) top() (*page, uint16) {
|
||||||
elem := c.stack[len(c.stack)-1]
|
elem := c.stack[len(c.stack)-1]
|
||||||
return elem.page, elem.page.lnode(elem.index)
|
return elem.page, elem.index
|
||||||
}
|
}
|
||||||
|
|
||||||
// page returns the page that the cursor is currently pointing at.
|
// page returns the page that the cursor is currently pointing at.
|
||||||
|
@ -89,7 +108,7 @@ func (c *Cursor) page() *page {
|
||||||
}
|
}
|
||||||
|
|
||||||
// node returns the leaf node that the cursor is currently positioned on.
|
// node returns the leaf node that the cursor is currently positioned on.
|
||||||
func (c *Cursor) node() *lnode {
|
func (c *Cursor) node() *leafPageElement {
|
||||||
elem := c.stack[len(c.stack)-1]
|
elem := c.stack[len(c.stack)-1]
|
||||||
return elem.page.lnode(elem.index)
|
return elem.page.leafPageElement(elem.index)
|
||||||
}
|
}
|
||||||
|
|
46
db.go
46
db.go
|
@ -1,6 +1,7 @@
|
||||||
package bolt
|
package bolt
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
@ -114,7 +115,7 @@ func (db *DB) mmap() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TEMP(benbjohnson): Set max size to 1MB.
|
// TEMP(benbjohnson): Set max size to 1MB.
|
||||||
size := 2 << 20
|
size := 2 << 30
|
||||||
|
|
||||||
// Memory-map the data file as a byte slice.
|
// Memory-map the data file as a byte slice.
|
||||||
if db.data, err = db.syscall.Mmap(int(db.file.Fd()), 0, size, syscall.PROT_READ, syscall.MAP_SHARED); err != nil {
|
if db.data, err = db.syscall.Mmap(int(db.file.Fd()), 0, size, syscall.PROT_READ, syscall.MAP_SHARED); err != nil {
|
||||||
|
@ -224,10 +225,7 @@ func (db *DB) RWTransaction() (*RWTransaction, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a transaction associated with the database.
|
// Create a transaction associated with the database.
|
||||||
t := &RWTransaction{
|
t := &RWTransaction{nodes: make(map[pgid]*node)}
|
||||||
branches: make(map[pgid]*branch),
|
|
||||||
leafs: make(map[pgid]*leaf),
|
|
||||||
}
|
|
||||||
t.init(db)
|
t.init(db)
|
||||||
|
|
||||||
return t, nil
|
return t, nil
|
||||||
|
@ -319,6 +317,44 @@ func (db *DB) Delete(name string, key []byte) error {
|
||||||
return t.Commit()
|
return t.Commit()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Copy writes the entire database to a writer.
|
||||||
|
func (db *DB) Copy(w io.Writer) error {
|
||||||
|
if !db.opened {
|
||||||
|
return DatabaseNotOpenError
|
||||||
|
}
|
||||||
|
|
||||||
|
// Maintain a reader transaction so pages don't get reclaimed.
|
||||||
|
t, err := db.Transaction()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer t.Close()
|
||||||
|
|
||||||
|
// Open reader on the database.
|
||||||
|
f, err := os.Open(db.path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
// Copy everything.
|
||||||
|
if _, err := io.Copy(w, f); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CopyFile copies the entire database to file at the given path.
|
||||||
|
func (db *DB) CopyFile(path string) error {
|
||||||
|
f, err := os.Create(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return db.Copy(f)
|
||||||
|
}
|
||||||
|
|
||||||
// page retrieves a page reference from the mmap based on the current page size.
|
// page retrieves a page reference from the mmap based on the current page size.
|
||||||
func (db *DB) page(id pgid) *page {
|
func (db *DB) page(id pgid) *page {
|
||||||
return (*page)(unsafe.Pointer(&db.data[id*pgid(db.pageSize)]))
|
return (*page)(unsafe.Pointer(&db.data[id*pgid(db.pageSize)]))
|
||||||
|
|
74
db_test.go
74
db_test.go
|
@ -1,11 +1,14 @@
|
||||||
package bolt
|
package bolt
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"syscall"
|
"syscall"
|
||||||
"testing"
|
"testing"
|
||||||
|
"testing/quick"
|
||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
|
@ -99,25 +102,6 @@ func TestDBMmapStatError(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure that mmap errors get returned.
|
|
||||||
/*
|
|
||||||
func TestDBMmapError(t *testing.T) {
|
|
||||||
withMockDB(func(db *DB, mockos *mockos, mocksyscall *mocksyscall, path string) {
|
|
||||||
exp := errors.New("")
|
|
||||||
file, metafile := &mockfile{}, &mockfile{}
|
|
||||||
mockos.On("OpenFile", path, os.O_RDWR|os.O_CREATE, os.FileMode(0666)).Return(file, nil)
|
|
||||||
mockos.On("OpenFile", path, os.O_RDWR|os.O_SYNC, os.FileMode(0666)).Return(metafile, nil)
|
|
||||||
mockos.On("Getpagesize").Return(0x1000)
|
|
||||||
file.On("ReadAt", mock.Anything, int64(0)).Return(0, nil)
|
|
||||||
file.On("Stat").Return(&mockfileinfo{"", 0x2000, 0666, time.Now(), false, nil}, nil)
|
|
||||||
metafile.On("WriteAt", mock.Anything, int64(0)).Return(0, nil)
|
|
||||||
mocksyscall.On("Mmap", 0, int64(0), 0x2000, syscall.PROT_READ, syscall.MAP_SHARED).Return(([]byte)(nil), exp)
|
|
||||||
err := db.Open(path, 0666)
|
|
||||||
assert.Equal(t, err, exp)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Ensure that corrupt meta0 page errors get returned.
|
// Ensure that corrupt meta0 page errors get returned.
|
||||||
func TestDBCorruptMeta0(t *testing.T) {
|
func TestDBCorruptMeta0(t *testing.T) {
|
||||||
withMockDB(func(db *DB, mockos *mockos, mocksyscall *mocksyscall, path string) {
|
withMockDB(func(db *DB, mockos *mockos, mocksyscall *mocksyscall, path string) {
|
||||||
|
@ -150,10 +134,6 @@ func TestDBCorruptMeta0(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
//--------------------------------------
|
|
||||||
// Transaction()
|
|
||||||
//--------------------------------------
|
|
||||||
|
|
||||||
// Ensure that a database cannot open a transaction when it's not open.
|
// Ensure that a database cannot open a transaction when it's not open.
|
||||||
func TestDBTransactionDatabaseNotOpenError(t *testing.T) {
|
func TestDBTransactionDatabaseNotOpenError(t *testing.T) {
|
||||||
withDB(func(db *DB, path string) {
|
withDB(func(db *DB, path string) {
|
||||||
|
@ -163,6 +143,54 @@ func TestDBTransactionDatabaseNotOpenError(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure that a bucket can write a key/value.
|
||||||
|
func TestDBPut(t *testing.T) {
|
||||||
|
withOpenDB(func(db *DB, path string) {
|
||||||
|
db.CreateBucket("widgets")
|
||||||
|
err := db.Put("widgets", []byte("foo"), []byte("bar"))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
value, err := db.Get("widgets", []byte("foo"))
|
||||||
|
if assert.NoError(t, err) {
|
||||||
|
assert.Equal(t, value, []byte("bar"))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that a bucket can write random keys and values across multiple txns.
|
||||||
|
func TestDBPutRandom(t *testing.T) {
|
||||||
|
f := func(items testKeyValuePairs) bool {
|
||||||
|
withOpenDB(func(db *DB, path string) {
|
||||||
|
db.CreateBucket("widgets")
|
||||||
|
for _, item := range items {
|
||||||
|
if len(item.Key) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := db.Put("widgets", item.Key, item.Value); err != nil {
|
||||||
|
panic("put error: " + err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, item := range items {
|
||||||
|
if len(item.Key) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value, err := db.Get("widgets", item.Key)
|
||||||
|
if err != nil {
|
||||||
|
panic("get error: " + err.Error())
|
||||||
|
}
|
||||||
|
if !bytes.Equal(value, []byte(item.Value)) {
|
||||||
|
// db.CopyFile("/tmp/bolt.random.db")
|
||||||
|
t.Fatalf("value mismatch:\n%x\n%x", item.Value, value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Fprint(os.Stderr, ".")
|
||||||
|
})
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if err := quick.Check(f, qc()); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// withDB executes a function with a database reference.
|
// withDB executes a function with a database reference.
|
||||||
func withDB(fn func(*DB, string)) {
|
func withDB(fn func(*DB, string)) {
|
||||||
f, _ := ioutil.TempFile("", "bolt-")
|
f, _ := ioutil.TempFile("", "bolt-")
|
||||||
|
|
120
leaf.go
120
leaf.go
|
@ -1,120 +0,0 @@
|
||||||
package bolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"sort"
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
// leaf represents an in-memory, deserialized leaf page.
|
|
||||||
type leaf struct {
|
|
||||||
pgid pgid
|
|
||||||
parent *branch
|
|
||||||
items leafItems
|
|
||||||
}
|
|
||||||
|
|
||||||
// size returns the size of the leaf after serialization.
|
|
||||||
func (l *leaf) size() int {
|
|
||||||
var size int = pageHeaderSize
|
|
||||||
for _, item := range l.items {
|
|
||||||
size += lnodeSize + len(item.key) + len(item.value)
|
|
||||||
}
|
|
||||||
return size
|
|
||||||
}
|
|
||||||
|
|
||||||
// put inserts or replaces a key on a leaf page.
|
|
||||||
func (l *leaf) put(key []byte, value []byte) {
|
|
||||||
// Find insertion index.
|
|
||||||
index := sort.Search(len(l.items), func(i int) bool { return bytes.Compare(l.items[i].key, key) != -1 })
|
|
||||||
|
|
||||||
// If there is no existing key then add a new item.
|
|
||||||
if index == len(l.items) {
|
|
||||||
l.items = append(l.items, leafItem{})
|
|
||||||
} else if len(l.items) == 0 || !bytes.Equal(l.items[index].key, key) {
|
|
||||||
l.items = append(l.items, leafItem{})
|
|
||||||
copy(l.items[index+1:], l.items[index:])
|
|
||||||
}
|
|
||||||
l.items[index].key = key
|
|
||||||
l.items[index].value = value
|
|
||||||
}
|
|
||||||
|
|
||||||
// read initializes the item data from an on-disk page.
|
|
||||||
func (l *leaf) read(p *page) {
|
|
||||||
l.pgid = p.id
|
|
||||||
l.items = make(leafItems, int(p.count))
|
|
||||||
lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr))
|
|
||||||
for i := 0; i < int(p.count); i++ {
|
|
||||||
lnode := &lnodes[i]
|
|
||||||
item := &l.items[i]
|
|
||||||
item.key = lnode.key()
|
|
||||||
item.value = lnode.value()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// write writes the items onto one or more leaf pages.
|
|
||||||
func (l *leaf) write(p *page) {
|
|
||||||
// Initialize page.
|
|
||||||
p.flags |= p_leaf
|
|
||||||
p.count = uint16(len(l.items))
|
|
||||||
|
|
||||||
// Loop over each item and write it to the page.
|
|
||||||
lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr))
|
|
||||||
b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(l.items):]
|
|
||||||
for index, item := range l.items {
|
|
||||||
// Write node.
|
|
||||||
lnode := &lnodes[index]
|
|
||||||
lnode.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(lnode)))
|
|
||||||
lnode.ksize = uint32(len(item.key))
|
|
||||||
lnode.vsize = uint32(len(item.value))
|
|
||||||
|
|
||||||
// Write data to the end of the page.
|
|
||||||
copy(b[0:], item.key)
|
|
||||||
b = b[len(item.key):]
|
|
||||||
copy(b[0:], item.value)
|
|
||||||
b = b[len(item.value):]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// split divides up the noes in the page into appropriately sized groups.
|
|
||||||
func (l *leaf) split(pageSize int) []*leaf {
|
|
||||||
// Ignore the split if the page doesn't have at least enough nodes for
|
|
||||||
// multiple pages or if the data can fit on a single page.
|
|
||||||
if len(l.items) <= (minKeysPerPage*2) || l.size() < pageSize {
|
|
||||||
return []*leaf{l}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set fill threshold to 50%.
|
|
||||||
threshold := pageSize / 2
|
|
||||||
|
|
||||||
// Otherwise group into smaller pages and target a given fill size.
|
|
||||||
size := 0
|
|
||||||
current := &leaf{}
|
|
||||||
leafs := make([]*leaf, 0)
|
|
||||||
|
|
||||||
for index, item := range l.items {
|
|
||||||
nodeSize := lnodeSize + len(item.key) + len(item.value)
|
|
||||||
|
|
||||||
if len(current.items) >= minKeysPerPage && index < len(l.items)-minKeysPerPage && size+nodeSize > threshold {
|
|
||||||
size = pageHeaderSize
|
|
||||||
leafs = append(leafs, current)
|
|
||||||
current = &leaf{}
|
|
||||||
}
|
|
||||||
|
|
||||||
size += nodeSize
|
|
||||||
current.items = append(current.items, item)
|
|
||||||
}
|
|
||||||
leafs = append(leafs, current)
|
|
||||||
|
|
||||||
return leafs
|
|
||||||
}
|
|
||||||
|
|
||||||
type leafItems []leafItem
|
|
||||||
|
|
||||||
type leafItem struct {
|
|
||||||
key []byte
|
|
||||||
value []byte
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s leafItems) Len() int { return len(s) }
|
|
||||||
func (s leafItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
|
||||||
func (s leafItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
|
|
124
leaf_test.go
124
leaf_test.go
|
@ -1,124 +0,0 @@
|
||||||
package bolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Ensure that a leaf can insert a key/value.
|
|
||||||
func TestLeafPut(t *testing.T) {
|
|
||||||
l := &leaf{items: make(leafItems, 0)}
|
|
||||||
l.put([]byte("baz"), []byte("2"))
|
|
||||||
l.put([]byte("foo"), []byte("0"))
|
|
||||||
l.put([]byte("bar"), []byte("1"))
|
|
||||||
l.put([]byte("foo"), []byte("3"))
|
|
||||||
assert.Equal(t, len(l.items), 3)
|
|
||||||
assert.Equal(t, l.items[0], leafItem{[]byte("bar"), []byte("1")})
|
|
||||||
assert.Equal(t, l.items[1], leafItem{[]byte("baz"), []byte("2")})
|
|
||||||
assert.Equal(t, l.items[2], leafItem{[]byte("foo"), []byte("3")})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a leaf can deserialize from a page.
|
|
||||||
func TestLeafRead(t *testing.T) {
|
|
||||||
// Create a page.
|
|
||||||
var buf [4096]byte
|
|
||||||
page := (*page)(unsafe.Pointer(&buf[0]))
|
|
||||||
page.count = 2
|
|
||||||
|
|
||||||
// Insert 2 leaf items at the beginning. sizeof(lnode) == 16
|
|
||||||
nodes := (*[3]lnode)(unsafe.Pointer(&page.ptr))
|
|
||||||
nodes[0] = lnode{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(lnode) * 2
|
|
||||||
nodes[1] = lnode{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(lnode) + 3 + 4
|
|
||||||
|
|
||||||
// Write data for the nodes at the end.
|
|
||||||
data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
|
|
||||||
copy(data[:], []byte("barfooz"))
|
|
||||||
copy(data[7:], []byte("helloworldbye"))
|
|
||||||
|
|
||||||
// Deserialize page into a leaf.
|
|
||||||
l := &leaf{}
|
|
||||||
l.read(page)
|
|
||||||
|
|
||||||
// Check that there are two items with correct data.
|
|
||||||
assert.Equal(t, len(l.items), 2)
|
|
||||||
assert.Equal(t, l.items[0].key, []byte("bar"))
|
|
||||||
assert.Equal(t, l.items[0].value, []byte("fooz"))
|
|
||||||
assert.Equal(t, l.items[1].key, []byte("helloworld"))
|
|
||||||
assert.Equal(t, l.items[1].value, []byte("bye"))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a leaf can serialize itself.
|
|
||||||
func TestLeafWrite(t *testing.T) {
|
|
||||||
// Create a leaf.
|
|
||||||
l := &leaf{items: make(leafItems, 0)}
|
|
||||||
l.put([]byte("susy"), []byte("que"))
|
|
||||||
l.put([]byte("ricki"), []byte("lake"))
|
|
||||||
l.put([]byte("john"), []byte("johnson"))
|
|
||||||
|
|
||||||
// Write it to a page.
|
|
||||||
var buf [4096]byte
|
|
||||||
p := (*page)(unsafe.Pointer(&buf[0]))
|
|
||||||
l.write(p)
|
|
||||||
|
|
||||||
// Read the page back in.
|
|
||||||
l2 := &leaf{}
|
|
||||||
l2.read(p)
|
|
||||||
|
|
||||||
// Check that the two pages are the same.
|
|
||||||
assert.Equal(t, len(l2.items), 3)
|
|
||||||
assert.Equal(t, l2.items[0].key, []byte("john"))
|
|
||||||
assert.Equal(t, l2.items[0].value, []byte("johnson"))
|
|
||||||
assert.Equal(t, l2.items[1].key, []byte("ricki"))
|
|
||||||
assert.Equal(t, l2.items[1].value, []byte("lake"))
|
|
||||||
assert.Equal(t, l2.items[2].key, []byte("susy"))
|
|
||||||
assert.Equal(t, l2.items[2].value, []byte("que"))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a leaf can split into appropriate subgroups.
|
|
||||||
func TestLeafSplit(t *testing.T) {
|
|
||||||
// Create a leaf.
|
|
||||||
l := &leaf{items: make(leafItems, 0)}
|
|
||||||
l.put([]byte("00000001"), []byte("0123456701234567"))
|
|
||||||
l.put([]byte("00000002"), []byte("0123456701234567"))
|
|
||||||
l.put([]byte("00000003"), []byte("0123456701234567"))
|
|
||||||
l.put([]byte("00000004"), []byte("0123456701234567"))
|
|
||||||
l.put([]byte("00000005"), []byte("0123456701234567"))
|
|
||||||
|
|
||||||
// Split between 3 & 4.
|
|
||||||
leafs := l.split(100)
|
|
||||||
|
|
||||||
assert.Equal(t, len(leafs), 2)
|
|
||||||
assert.Equal(t, len(leafs[0].items), 2)
|
|
||||||
assert.Equal(t, len(leafs[1].items), 3)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a leaf with the minimum number of items just returns a single leaf.
|
|
||||||
func TestLeafSplitWithMinKeys(t *testing.T) {
|
|
||||||
// Create a leaf.
|
|
||||||
l := &leaf{items: make(leafItems, 0)}
|
|
||||||
l.put([]byte("00000001"), []byte("0123456701234567"))
|
|
||||||
l.put([]byte("00000002"), []byte("0123456701234567"))
|
|
||||||
|
|
||||||
// Split.
|
|
||||||
leafs := l.split(20)
|
|
||||||
assert.Equal(t, len(leafs), 1)
|
|
||||||
assert.Equal(t, len(leafs[0].items), 2)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure that a leaf that has keys that all fit on a page just returns one leaf.
|
|
||||||
func TestLeafSplitFitsInPage(t *testing.T) {
|
|
||||||
// Create a leaf.
|
|
||||||
l := &leaf{items: make(leafItems, 0)}
|
|
||||||
l.put([]byte("00000001"), []byte("0123456701234567"))
|
|
||||||
l.put([]byte("00000002"), []byte("0123456701234567"))
|
|
||||||
l.put([]byte("00000003"), []byte("0123456701234567"))
|
|
||||||
l.put([]byte("00000004"), []byte("0123456701234567"))
|
|
||||||
l.put([]byte("00000005"), []byte("0123456701234567"))
|
|
||||||
|
|
||||||
// Split.
|
|
||||||
leafs := l.split(4096)
|
|
||||||
assert.Equal(t, len(leafs), 1)
|
|
||||||
assert.Equal(t, len(leafs[0].items), 5)
|
|
||||||
}
|
|
27
lnode.go
27
lnode.go
|
@ -1,27 +0,0 @@
|
||||||
package bolt
|
|
||||||
|
|
||||||
import (
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
const lnodeSize = int(unsafe.Sizeof(lnode{}))
|
|
||||||
|
|
||||||
// lnode represents a node on a leaf page.
|
|
||||||
type lnode struct {
|
|
||||||
flags uint32
|
|
||||||
pos uint32
|
|
||||||
ksize uint32
|
|
||||||
vsize uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
// key returns a byte slice of the node key.
|
|
||||||
func (n *lnode) key() []byte {
|
|
||||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
|
||||||
return buf[n.pos : n.pos+n.ksize]
|
|
||||||
}
|
|
||||||
|
|
||||||
// value returns a byte slice of the node value.
|
|
||||||
func (n *lnode) value() []byte {
|
|
||||||
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
|
||||||
return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
|
|
||||||
}
|
|
5
meta.go
5
meta.go
|
@ -6,9 +6,10 @@ type meta struct {
|
||||||
magic uint32
|
magic uint32
|
||||||
version uint32
|
version uint32
|
||||||
pageSize uint32
|
pageSize uint32
|
||||||
pgid pgid
|
flags uint32
|
||||||
free pgid
|
|
||||||
sys pgid
|
sys pgid
|
||||||
|
free pgid
|
||||||
|
pgid pgid
|
||||||
txnid txnid
|
txnid txnid
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,178 @@
|
||||||
|
package bolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"sort"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// node represents an in-memory, deserialized page.
|
||||||
|
type node struct {
|
||||||
|
isLeaf bool
|
||||||
|
key []byte
|
||||||
|
depth int
|
||||||
|
pgid pgid
|
||||||
|
parent *node
|
||||||
|
inodes inodes
|
||||||
|
}
|
||||||
|
|
||||||
|
// size returns the size of the node after serialization.
|
||||||
|
func (n *node) size() int {
|
||||||
|
var elementSize int = n.pageElementSize()
|
||||||
|
|
||||||
|
var size int = pageHeaderSize
|
||||||
|
for _, item := range n.inodes {
|
||||||
|
size += elementSize + len(item.key) + len(item.value)
|
||||||
|
}
|
||||||
|
return size
|
||||||
|
}
|
||||||
|
|
||||||
|
// pageElementSize returns the size of each page element based on the type of node.
|
||||||
|
func (n *node) pageElementSize() int {
|
||||||
|
if n.isLeaf {
|
||||||
|
return leafPageElementSize
|
||||||
|
}
|
||||||
|
return branchPageElementSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// root returns the root node in the tree.
|
||||||
|
func (n *node) root() *node {
|
||||||
|
if n.parent == nil {
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
return n.parent
|
||||||
|
}
|
||||||
|
|
||||||
|
// put inserts a key/value.
|
||||||
|
func (n *node) put(oldKey, newKey, value []byte, pgid pgid) {
|
||||||
|
// Find insertion index.
|
||||||
|
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
|
||||||
|
|
||||||
|
// Add capacity and shift nodes if we don't have an exact match and need to insert.
|
||||||
|
exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
|
||||||
|
if !exact {
|
||||||
|
n.inodes = append(n.inodes, inode{})
|
||||||
|
copy(n.inodes[index+1:], n.inodes[index:])
|
||||||
|
}
|
||||||
|
|
||||||
|
inode := &n.inodes[index]
|
||||||
|
inode.key = newKey
|
||||||
|
inode.value = value
|
||||||
|
inode.pgid = pgid
|
||||||
|
}
|
||||||
|
|
||||||
|
// read initializes the node from a page.
|
||||||
|
func (n *node) read(p *page) {
|
||||||
|
n.pgid = p.id
|
||||||
|
n.isLeaf = ((p.flags & p_leaf) != 0)
|
||||||
|
n.inodes = make(inodes, int(p.count))
|
||||||
|
|
||||||
|
for i := 0; i < int(p.count); i++ {
|
||||||
|
inode := &n.inodes[i]
|
||||||
|
if n.isLeaf {
|
||||||
|
elem := p.leafPageElement(uint16(i))
|
||||||
|
inode.key = elem.key()
|
||||||
|
inode.value = elem.value()
|
||||||
|
} else {
|
||||||
|
elem := p.branchPageElement(uint16(i))
|
||||||
|
inode.pgid = elem.pgid
|
||||||
|
inode.key = elem.key()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save first key so we can find the node in the parent when we spill.
|
||||||
|
if len(n.inodes) > 0 {
|
||||||
|
n.key = n.inodes[0].key
|
||||||
|
} else {
|
||||||
|
n.key = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// write writes the items onto one or more pages.
|
||||||
|
func (n *node) write(p *page) {
|
||||||
|
// Initialize page.
|
||||||
|
if n.isLeaf {
|
||||||
|
p.flags |= p_leaf
|
||||||
|
} else {
|
||||||
|
p.flags |= p_branch
|
||||||
|
}
|
||||||
|
p.count = uint16(len(n.inodes))
|
||||||
|
|
||||||
|
// Loop over each item and write it to the page.
|
||||||
|
b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
|
||||||
|
for i, item := range n.inodes {
|
||||||
|
// Write the page element.
|
||||||
|
if n.isLeaf {
|
||||||
|
elem := p.leafPageElement(uint16(i))
|
||||||
|
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||||
|
elem.ksize = uint32(len(item.key))
|
||||||
|
elem.vsize = uint32(len(item.value))
|
||||||
|
} else {
|
||||||
|
elem := p.branchPageElement(uint16(i))
|
||||||
|
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||||
|
elem.ksize = uint32(len(item.key))
|
||||||
|
elem.pgid = item.pgid
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write data for the element to the end of the page.
|
||||||
|
copy(b[0:], item.key)
|
||||||
|
b = b[len(item.key):]
|
||||||
|
copy(b[0:], item.value)
|
||||||
|
b = b[len(item.value):]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// split divides up the node into appropriately sized nodes.
|
||||||
|
func (n *node) split(pageSize int) []*node {
|
||||||
|
// Ignore the split if the page doesn't have at least enough nodes for
|
||||||
|
// multiple pages or if the data can fit on a single page.
|
||||||
|
if len(n.inodes) <= (minKeysPerPage*2) || n.size() < pageSize {
|
||||||
|
return []*node{n}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set fill threshold to 50%.
|
||||||
|
threshold := pageSize / 2
|
||||||
|
|
||||||
|
// Group into smaller pages and target a given fill size.
|
||||||
|
size := 0
|
||||||
|
current := &node{isLeaf: n.isLeaf}
|
||||||
|
nodes := make([]*node, 0)
|
||||||
|
|
||||||
|
for i, inode := range n.inodes {
|
||||||
|
elemSize := n.pageElementSize() + len(inode.key) + len(inode.value)
|
||||||
|
|
||||||
|
if len(current.inodes) >= minKeysPerPage && i < len(n.inodes)-minKeysPerPage && size+elemSize > threshold {
|
||||||
|
size = pageHeaderSize
|
||||||
|
nodes = append(nodes, current)
|
||||||
|
current = &node{isLeaf: n.isLeaf}
|
||||||
|
}
|
||||||
|
|
||||||
|
size += elemSize
|
||||||
|
current.inodes = append(current.inodes, inode)
|
||||||
|
}
|
||||||
|
nodes = append(nodes, current)
|
||||||
|
|
||||||
|
return nodes
|
||||||
|
}
|
||||||
|
|
||||||
|
// nodesByDepth sorts a list of branches by deepest first.
|
||||||
|
type nodesByDepth []*node
|
||||||
|
|
||||||
|
func (s nodesByDepth) Len() int { return len(s) }
|
||||||
|
func (s nodesByDepth) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
|
func (s nodesByDepth) Less(i, j int) bool { return s[i].depth > s[j].depth }
|
||||||
|
|
||||||
|
// inode represents an internal node inside of a node.
|
||||||
|
// It can be used to point to elements in a page or point
|
||||||
|
// to an element which hasn't been added to a page yet.
|
||||||
|
type inode struct {
|
||||||
|
pgid pgid
|
||||||
|
key []byte
|
||||||
|
value []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
type inodes []inode
|
||||||
|
|
||||||
|
func (s inodes) Len() int { return len(s) }
|
||||||
|
func (s inodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
|
func (s inodes) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
|
|
@ -0,0 +1,129 @@
|
||||||
|
package bolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Ensure that a node can insert a key/value.
|
||||||
|
func TestNodePut(t *testing.T) {
|
||||||
|
n := &node{inodes: make(inodes, 0)}
|
||||||
|
n.put([]byte("baz"), []byte("baz"), []byte("2"), 0)
|
||||||
|
n.put([]byte("foo"), []byte("foo"), []byte("0"), 0)
|
||||||
|
n.put([]byte("bar"), []byte("bar"), []byte("1"), 0)
|
||||||
|
n.put([]byte("foo"), []byte("foo"), []byte("3"), 0)
|
||||||
|
assert.Equal(t, len(n.inodes), 3)
|
||||||
|
assert.Equal(t, n.inodes[0].key, []byte("bar"))
|
||||||
|
assert.Equal(t, n.inodes[0].value, []byte("1"))
|
||||||
|
assert.Equal(t, n.inodes[1].key, []byte("baz"))
|
||||||
|
assert.Equal(t, n.inodes[1].value, []byte("2"))
|
||||||
|
assert.Equal(t, n.inodes[2].key, []byte("foo"))
|
||||||
|
assert.Equal(t, n.inodes[2].value, []byte("3"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that a node can deserialize from a leaf page.
|
||||||
|
func TestNodeReadLeafPage(t *testing.T) {
|
||||||
|
// Create a page.
|
||||||
|
var buf [4096]byte
|
||||||
|
page := (*page)(unsafe.Pointer(&buf[0]))
|
||||||
|
page.flags = p_leaf
|
||||||
|
page.count = 2
|
||||||
|
|
||||||
|
// Insert 2 elements at the beginning. sizeof(leafPageElement) == 16
|
||||||
|
nodes := (*[3]leafPageElement)(unsafe.Pointer(&page.ptr))
|
||||||
|
nodes[0] = leafPageElement{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(leafPageElement) * 2
|
||||||
|
nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4
|
||||||
|
|
||||||
|
// Write data for the nodes at the end.
|
||||||
|
data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
|
||||||
|
copy(data[:], []byte("barfooz"))
|
||||||
|
copy(data[7:], []byte("helloworldbye"))
|
||||||
|
|
||||||
|
// Deserialize page into a leaf.
|
||||||
|
n := &node{}
|
||||||
|
n.read(page)
|
||||||
|
|
||||||
|
// Check that there are two inodes with correct data.
|
||||||
|
assert.True(t, n.isLeaf)
|
||||||
|
assert.Equal(t, len(n.inodes), 2)
|
||||||
|
assert.Equal(t, n.inodes[0].key, []byte("bar"))
|
||||||
|
assert.Equal(t, n.inodes[0].value, []byte("fooz"))
|
||||||
|
assert.Equal(t, n.inodes[1].key, []byte("helloworld"))
|
||||||
|
assert.Equal(t, n.inodes[1].value, []byte("bye"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that a node can serialize into a leaf page.
|
||||||
|
func TestNodeWriteLeafPage(t *testing.T) {
|
||||||
|
// Create a node.
|
||||||
|
n := &node{isLeaf: true, inodes: make(inodes, 0)}
|
||||||
|
n.put([]byte("susy"), []byte("susy"), []byte("que"), 0)
|
||||||
|
n.put([]byte("ricki"), []byte("ricki"), []byte("lake"), 0)
|
||||||
|
n.put([]byte("john"), []byte("john"), []byte("johnson"), 0)
|
||||||
|
|
||||||
|
// Write it to a page.
|
||||||
|
var buf [4096]byte
|
||||||
|
p := (*page)(unsafe.Pointer(&buf[0]))
|
||||||
|
n.write(p)
|
||||||
|
|
||||||
|
// Read the page back in.
|
||||||
|
n2 := &node{}
|
||||||
|
n2.read(p)
|
||||||
|
|
||||||
|
// Check that the two pages are the same.
|
||||||
|
assert.Equal(t, len(n2.inodes), 3)
|
||||||
|
assert.Equal(t, n2.inodes[0].key, []byte("john"))
|
||||||
|
assert.Equal(t, n2.inodes[0].value, []byte("johnson"))
|
||||||
|
assert.Equal(t, n2.inodes[1].key, []byte("ricki"))
|
||||||
|
assert.Equal(t, n2.inodes[1].value, []byte("lake"))
|
||||||
|
assert.Equal(t, n2.inodes[2].key, []byte("susy"))
|
||||||
|
assert.Equal(t, n2.inodes[2].value, []byte("que"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that a node can split into appropriate subgroups.
|
||||||
|
func TestNodeSplit(t *testing.T) {
|
||||||
|
// Create a node.
|
||||||
|
n := &node{inodes: make(inodes, 0)}
|
||||||
|
n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0)
|
||||||
|
n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0)
|
||||||
|
n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0)
|
||||||
|
n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0)
|
||||||
|
n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0)
|
||||||
|
|
||||||
|
// Split between 3 & 4.
|
||||||
|
nodes := n.split(100)
|
||||||
|
|
||||||
|
assert.Equal(t, len(nodes), 2)
|
||||||
|
assert.Equal(t, len(nodes[0].inodes), 2)
|
||||||
|
assert.Equal(t, len(nodes[1].inodes), 3)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that a page with the minimum number of inodes just returns a single node.
|
||||||
|
func TestNodeSplitWithMinKeys(t *testing.T) {
|
||||||
|
// Create a node.
|
||||||
|
n := &node{inodes: make(inodes, 0)}
|
||||||
|
n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0)
|
||||||
|
n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0)
|
||||||
|
|
||||||
|
// Split.
|
||||||
|
nodes := n.split(20)
|
||||||
|
assert.Equal(t, len(nodes), 1)
|
||||||
|
assert.Equal(t, len(nodes[0].inodes), 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that a node that has keys that all fit on a page just returns one leaf.
|
||||||
|
func TestNodeSplitFitsInPage(t *testing.T) {
|
||||||
|
// Create a node.
|
||||||
|
n := &node{inodes: make(inodes, 0)}
|
||||||
|
n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0)
|
||||||
|
n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0)
|
||||||
|
n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0)
|
||||||
|
n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0)
|
||||||
|
n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0)
|
||||||
|
|
||||||
|
// Split.
|
||||||
|
nodes := n.split(4096)
|
||||||
|
assert.Equal(t, len(nodes), 1)
|
||||||
|
assert.Equal(t, len(nodes[0].inodes), 5)
|
||||||
|
}
|
85
page.go
85
page.go
|
@ -1,6 +1,8 @@
|
||||||
package bolt
|
package bolt
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -10,6 +12,9 @@ const maxAllocSize = 0xFFFFFFF
|
||||||
const minKeysPerPage = 2
|
const minKeysPerPage = 2
|
||||||
const maxNodesPerPage = 65535
|
const maxNodesPerPage = 65535
|
||||||
|
|
||||||
|
const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
|
||||||
|
const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
|
||||||
|
|
||||||
const (
|
const (
|
||||||
p_branch = 0x01
|
p_branch = 0x01
|
||||||
p_leaf = 0x02
|
p_leaf = 0x02
|
||||||
|
@ -28,29 +33,46 @@ type page struct {
|
||||||
ptr uintptr
|
ptr uintptr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// typ returns a human readable page type string used for debugging.
|
||||||
|
func (p *page) typ() string {
|
||||||
|
if (p.flags & p_branch) != 0 {
|
||||||
|
return "branch"
|
||||||
|
} else if (p.flags & p_leaf) != 0 {
|
||||||
|
return "leaf"
|
||||||
|
} else if (p.flags & p_meta) != 0 {
|
||||||
|
return "meta"
|
||||||
|
} else if (p.flags & p_sys) != 0 {
|
||||||
|
return "system"
|
||||||
|
} else if (p.flags & p_freelist) != 0 {
|
||||||
|
return "freelist"
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("unknown<%02x>", p.flags)
|
||||||
|
}
|
||||||
|
|
||||||
// meta returns a pointer to the metadata section of the page.
|
// meta returns a pointer to the metadata section of the page.
|
||||||
func (p *page) meta() *meta {
|
func (p *page) meta() *meta {
|
||||||
return (*meta)(unsafe.Pointer(&p.ptr))
|
return (*meta)(unsafe.Pointer(&p.ptr))
|
||||||
}
|
}
|
||||||
|
|
||||||
// lnode retrieves the leaf node by index
|
// leafPageElement retrieves the leaf node by index
|
||||||
func (p *page) lnode(index int) *lnode {
|
func (p *page) leafPageElement(index uint16) *leafPageElement {
|
||||||
return &((*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)))[index]
|
n := &((*[maxNodesPerPage]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
|
||||||
|
return n
|
||||||
}
|
}
|
||||||
|
|
||||||
// lnodes retrieves a list of leaf nodes.
|
// leafPageElements retrieves a list of leaf nodes.
|
||||||
func (p *page) lnodes() []lnode {
|
func (p *page) leafPageElements() []leafPageElement {
|
||||||
return ((*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)))[:]
|
return ((*[maxNodesPerPage]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
|
||||||
}
|
}
|
||||||
|
|
||||||
// bnode retrieves the branch node by index
|
// branchPageElement retrieves the branch node by index
|
||||||
func (p *page) bnode(index int) *bnode {
|
func (p *page) branchPageElement(index uint16) *branchPageElement {
|
||||||
return &((*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr)))[index]
|
return &((*[maxNodesPerPage]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
|
||||||
}
|
}
|
||||||
|
|
||||||
// bnodes retrieves a list of branch nodes.
|
// branchPageElements retrieves a list of branch nodes.
|
||||||
func (p *page) bnodes() []bnode {
|
func (p *page) branchPageElements() []branchPageElement {
|
||||||
return ((*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr)))[:]
|
return ((*[maxNodesPerPage]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
|
||||||
}
|
}
|
||||||
|
|
||||||
// freelist retrieves a list of page ids from a freelist page.
|
// freelist retrieves a list of page ids from a freelist page.
|
||||||
|
@ -58,8 +80,47 @@ func (p *page) freelist() []pgid {
|
||||||
return ((*[maxNodesPerPage]pgid)(unsafe.Pointer(&p.ptr)))[0:p.count]
|
return ((*[maxNodesPerPage]pgid)(unsafe.Pointer(&p.ptr)))[0:p.count]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// dump writes n bytes of the page to STDERR as hex output.
|
||||||
|
func (p *page) hexdump(n int) {
|
||||||
|
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
|
||||||
|
fmt.Fprintf(os.Stderr, "%x\n", buf)
|
||||||
|
}
|
||||||
|
|
||||||
type pages []*page
|
type pages []*page
|
||||||
|
|
||||||
func (s pages) Len() int { return len(s) }
|
func (s pages) Len() int { return len(s) }
|
||||||
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||||
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
|
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
|
||||||
|
|
||||||
|
// branchPageElement represents a node on a branch page.
|
||||||
|
type branchPageElement struct {
|
||||||
|
pos uint32
|
||||||
|
ksize uint32
|
||||||
|
pgid pgid
|
||||||
|
}
|
||||||
|
|
||||||
|
// key returns a byte slice of the node key.
|
||||||
|
func (n *branchPageElement) key() []byte {
|
||||||
|
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||||
|
return buf[n.pos : n.pos+n.ksize]
|
||||||
|
}
|
||||||
|
|
||||||
|
// leafPageElement represents a node on a leaf page.
|
||||||
|
type leafPageElement struct {
|
||||||
|
flags uint32
|
||||||
|
pos uint32
|
||||||
|
ksize uint32
|
||||||
|
vsize uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
// key returns a byte slice of the node key.
|
||||||
|
func (n *leafPageElement) key() []byte {
|
||||||
|
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||||
|
return buf[n.pos : n.pos+n.ksize]
|
||||||
|
}
|
||||||
|
|
||||||
|
// value returns a byte slice of the node value.
|
||||||
|
func (n *leafPageElement) value() []byte {
|
||||||
|
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
|
||||||
|
return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
package bolt
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"math/rand"
|
||||||
|
"reflect"
|
||||||
|
"testing/quick"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// testing/quick defaults to 100 iterations and a random seed.
|
||||||
|
// You can override these settings from the command line:
|
||||||
|
//
|
||||||
|
// -quickchecks The number of iterations to perform.
|
||||||
|
// -quick.seed The seed to use for randomizing.
|
||||||
|
// -quick.maxitems The maximum number of items to insert into a DB.
|
||||||
|
// -quick.maxksize The maximum size of a key.
|
||||||
|
// -quick.maxvsize The maximum size of a value.
|
||||||
|
//
|
||||||
|
|
||||||
|
var seed, testMaxItemCount, testMaxKeySize, testMaxValueSize int
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
flag.IntVar(&seed, "quick.seed", int(time.Now().UnixNano())%100000, "")
|
||||||
|
flag.IntVar(&testMaxItemCount, "quick.maxitems", 1024, "")
|
||||||
|
flag.IntVar(&testMaxKeySize, "quick.maxksize", 1024, "")
|
||||||
|
flag.IntVar(&testMaxValueSize, "quick.maxvsize", 1024, "")
|
||||||
|
warn("seed:", seed)
|
||||||
|
}
|
||||||
|
|
||||||
|
// qc creates a testing/quick configuration.
|
||||||
|
func qc() *quick.Config {
|
||||||
|
return &quick.Config{Rand: rand.New(rand.NewSource(int64(seed)))}
|
||||||
|
}
|
||||||
|
|
||||||
|
type testKeyValuePairs []testKeyValuePair
|
||||||
|
|
||||||
|
func (t testKeyValuePairs) Generate(rand *rand.Rand, size int) reflect.Value {
|
||||||
|
n := rand.Intn(testMaxItemCount-1) + 1
|
||||||
|
items := make(testKeyValuePairs, n)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
items[i].Generate(rand, size)
|
||||||
|
}
|
||||||
|
return reflect.ValueOf(items)
|
||||||
|
}
|
||||||
|
|
||||||
|
type testKeyValuePair struct {
|
||||||
|
Key []byte
|
||||||
|
Value []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t testKeyValuePair) Generate(rand *rand.Rand, size int) reflect.Value {
|
||||||
|
t.Key = randByteSlice(rand, 1, testMaxKeySize)
|
||||||
|
t.Value = randByteSlice(rand, 0, testMaxValueSize)
|
||||||
|
return reflect.ValueOf(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
func randByteSlice(rand *rand.Rand, minSize, maxSize int) []byte {
|
||||||
|
n := rand.Intn(maxSize - minSize) + minSize
|
||||||
|
b := make([]byte, n)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
b[i] = byte(rand.Intn(255))
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
157
rwtransaction.go
157
rwtransaction.go
|
@ -9,8 +9,7 @@ import (
|
||||||
// Only one read/write transaction can be active for a DB at a time.
|
// Only one read/write transaction can be active for a DB at a time.
|
||||||
type RWTransaction struct {
|
type RWTransaction struct {
|
||||||
Transaction
|
Transaction
|
||||||
branches map[pgid]*branch
|
nodes map[pgid]*node
|
||||||
leafs map[pgid]*leaf
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// init initializes the transaction.
|
// init initializes the transaction.
|
||||||
|
@ -21,7 +20,7 @@ func (t *RWTransaction) init(db *DB) {
|
||||||
// Copy the meta and increase the transaction id.
|
// Copy the meta and increase the transaction id.
|
||||||
t.meta = &meta{}
|
t.meta = &meta{}
|
||||||
db.meta().copy(t.meta)
|
db.meta().copy(t.meta)
|
||||||
t.meta.txnid += txnid(2)
|
t.meta.txnid += txnid(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreateBucket creates a new bucket.
|
// CreateBucket creates a new bucket.
|
||||||
|
@ -32,7 +31,7 @@ func (t *RWTransaction) CreateBucket(name string) error {
|
||||||
} else if len(name) == 0 {
|
} else if len(name) == 0 {
|
||||||
return &Error{"bucket name cannot be blank", nil}
|
return &Error{"bucket name cannot be blank", nil}
|
||||||
} else if len(name) > MaxBucketNameSize {
|
} else if len(name) > MaxBucketNameSize {
|
||||||
return &Error{"bucket name too large", nil}
|
return &Error{"bucket name too long", nil}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a blank root leaf page.
|
// Create a blank root leaf page.
|
||||||
|
@ -72,9 +71,9 @@ func (t *RWTransaction) Put(name string, key []byte, value []byte) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert a new node.
|
// Insert a new node.
|
||||||
c := b.Cursor()
|
c := b.cursor()
|
||||||
c.Goto(key)
|
c.Get(key)
|
||||||
t.leaf(c).put(key, value)
|
t.node(c.stack).put(key, key, value, 0)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -121,8 +120,8 @@ func (t *RWTransaction) Rollback() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *RWTransaction) close() {
|
func (t *RWTransaction) close() {
|
||||||
// Clear temporary pages.
|
// Clear nodes.
|
||||||
t.leafs = nil
|
t.nodes = nil
|
||||||
|
|
||||||
// TODO: Release writer lock.
|
// TODO: Release writer lock.
|
||||||
}
|
}
|
||||||
|
@ -146,55 +145,81 @@ func (t *RWTransaction) allocate(count int) *page {
|
||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
// spill writes all the leafs and branches to dirty pages.
|
// spill writes all the nodes to dirty pages.
|
||||||
func (t *RWTransaction) spill() {
|
func (t *RWTransaction) spill() {
|
||||||
// Spill leafs first.
|
// Keep track of the current root nodes.
|
||||||
for _, l := range t.leafs {
|
// We will update this at the end once all nodes are created.
|
||||||
t.spillLeaf(l)
|
type root struct {
|
||||||
|
node *node
|
||||||
|
pgid pgid
|
||||||
|
}
|
||||||
|
var roots []root
|
||||||
|
|
||||||
|
// Sort nodes by highest depth first.
|
||||||
|
nodes := make(nodesByDepth, 0, len(t.nodes))
|
||||||
|
for _, n := range t.nodes {
|
||||||
|
nodes = append(nodes, n)
|
||||||
|
}
|
||||||
|
sort.Sort(nodes)
|
||||||
|
|
||||||
|
// Spill nodes by deepest first.
|
||||||
|
for i := 0; i < len(nodes); i++ {
|
||||||
|
n := nodes[i]
|
||||||
|
|
||||||
|
// Save existing root buckets for later.
|
||||||
|
if n.parent == nil && n.pgid != 0 {
|
||||||
|
roots = append(roots, root{n, n.pgid})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sort branches by highest depth first.
|
// Split nodes and write them.
|
||||||
branches := make(branches, 0, len(t.branches))
|
newNodes := n.split(t.db.pageSize)
|
||||||
for _, b := range t.branches {
|
|
||||||
branches = append(branches, b)
|
// If this is a root node that split then create a parent node.
|
||||||
|
if n.parent == nil && len(newNodes) > 1 {
|
||||||
|
n.parent = &node{
|
||||||
|
isLeaf: false,
|
||||||
|
key: newNodes[0].inodes[0].key,
|
||||||
|
depth: n.depth - 1,
|
||||||
|
inodes: make(inodes, 0),
|
||||||
}
|
}
|
||||||
sort.Sort(branches)
|
nodes = append(nodes, n.parent)
|
||||||
|
sort.Sort(nodes)
|
||||||
// Spill branches by deepest first.
|
|
||||||
for _, b := range branches {
|
|
||||||
t.spillBranch(b)
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// spillLeaf writes a leaf to one or more dirty pages.
|
// Write nodes to dirty pages.
|
||||||
func (t *RWTransaction) spillLeaf(l *leaf) {
|
for i, newNode := range newNodes {
|
||||||
parent := l.parent
|
// Allocate contiguous space for the node.
|
||||||
|
p := t.allocate((newNode.size() / t.db.pageSize) + 1)
|
||||||
|
|
||||||
// Split leaf, if necessary.
|
// Write the node to the page.
|
||||||
leafs := l.split(t.db.pageSize)
|
newNode.write(p)
|
||||||
|
newNode.pgid = p.id
|
||||||
|
newNode.parent = n.parent
|
||||||
|
|
||||||
// TODO: If this is a root leaf and we split then add a parent branch.
|
// The first node should use the existing entry, other nodes are inserts.
|
||||||
|
var oldKey []byte
|
||||||
|
if i == 0 {
|
||||||
|
oldKey = n.key
|
||||||
|
} else {
|
||||||
|
oldKey = newNode.inodes[0].key
|
||||||
|
}
|
||||||
|
|
||||||
// Process each resulting leaf.
|
// Update the parent entry.
|
||||||
previd := leafs[0].pgid
|
if newNode.parent != nil {
|
||||||
for index, l := range leafs {
|
newNode.parent.put(oldKey, newNode.inodes[0].key, nil, newNode.pgid)
|
||||||
// Allocate contiguous space for the leaf.
|
}
|
||||||
p := t.allocate((l.size() / t.db.pageSize) + 1)
|
|
||||||
|
|
||||||
// Write the leaf to the page.
|
|
||||||
l.write(p)
|
|
||||||
|
|
||||||
// Insert or replace the node in the parent branch with the new pgid.
|
|
||||||
if parent != nil {
|
|
||||||
parent.put(previd, p.id, l.items[0].key, (index == 0))
|
|
||||||
previd = l.pgid
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// spillBranch writes a branch to one or more dirty pages.
|
// Update roots with new roots.
|
||||||
func (t *RWTransaction) spillBranch(l *branch) {
|
for _, root := range roots {
|
||||||
warn("[pending] RWTransaction.spillBranch()") // TODO
|
for _, b := range t.sys.buckets {
|
||||||
|
if b.root == root.pgid {
|
||||||
|
b.root = root.node.root().pgid
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// write writes any dirty pages to disk.
|
// write writes any dirty pages to disk.
|
||||||
|
@ -231,28 +256,8 @@ func (t *RWTransaction) writeMeta() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// leaf retrieves a leaf object based on the current position of a cursor.
|
// node retrieves a node based a cursor stack.
|
||||||
func (t *RWTransaction) leaf(c *Cursor) *leaf {
|
func (t *RWTransaction) node(stack []elem) *node {
|
||||||
e := c.stack[len(c.stack)-1]
|
|
||||||
id := e.page.id
|
|
||||||
|
|
||||||
// Retrieve leaf if it has already been fetched.
|
|
||||||
if l := t.leafs[id]; l != nil {
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise create a leaf and cache it.
|
|
||||||
l := &leaf{}
|
|
||||||
l.read(t.page(id))
|
|
||||||
l.parent = t.branch(c.stack[:len(c.stack)-1])
|
|
||||||
t.leafs[id] = l
|
|
||||||
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
// branch retrieves a branch object based a cursor stack.
|
|
||||||
// This should only be called from leaf().
|
|
||||||
func (t *RWTransaction) branch(stack []elem) *branch {
|
|
||||||
if len(stack) == 0 {
|
if len(stack) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -260,16 +265,16 @@ func (t *RWTransaction) branch(stack []elem) *branch {
|
||||||
// Retrieve branch if it has already been fetched.
|
// Retrieve branch if it has already been fetched.
|
||||||
e := &stack[len(stack)-1]
|
e := &stack[len(stack)-1]
|
||||||
id := e.page.id
|
id := e.page.id
|
||||||
if b := t.branches[id]; b != nil {
|
if n := t.nodes[id]; n != nil {
|
||||||
return b
|
return n
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise create a branch and cache it.
|
// Otherwise create a branch and cache it.
|
||||||
b := &branch{}
|
n := &node{}
|
||||||
b.read(t.page(id))
|
n.read(t.page(id))
|
||||||
b.depth = len(stack) - 1
|
n.depth = len(stack) - 1
|
||||||
b.parent = t.branch(stack[:len(stack)-1])
|
n.parent = t.node(stack[:len(stack)-1])
|
||||||
t.branches[id] = b
|
t.nodes[id] = n
|
||||||
|
|
||||||
return b
|
return n
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package bolt
|
package bolt
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
@ -28,3 +29,35 @@ func TestTransactionCreateBucket(t *testing.T) {
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure that a bucket cannot be created twice.
|
||||||
|
func TestTransactionRecreateBucket(t *testing.T) {
|
||||||
|
withOpenDB(func(db *DB, path string) {
|
||||||
|
// Create a bucket.
|
||||||
|
err := db.CreateBucket("widgets")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Create the same bucket again.
|
||||||
|
err = db.CreateBucket("widgets")
|
||||||
|
assert.Equal(t, err, &Error{"bucket already exists", nil})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that a bucket is created with a non-blank name.
|
||||||
|
func TestTransactionCreateBucketWithoutName(t *testing.T) {
|
||||||
|
withOpenDB(func(db *DB, path string) {
|
||||||
|
err := db.CreateBucket("")
|
||||||
|
assert.Equal(t, err, &Error{"bucket name cannot be blank", nil})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that a bucket name is not too long.
|
||||||
|
func TestTransactionCreateBucketWithLongName(t *testing.T) {
|
||||||
|
withOpenDB(func(db *DB, path string) {
|
||||||
|
err := db.CreateBucket(strings.Repeat("X", 255))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
err = db.CreateBucket(strings.Repeat("X", 256))
|
||||||
|
assert.Equal(t, err, &Error{"bucket name too long", nil})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
15
sys.go
15
sys.go
|
@ -25,6 +25,16 @@ func (s *sys) get(key string) *bucket {
|
||||||
return s.buckets[key]
|
return s.buckets[key]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getByRoot retrieves a bucket by root page id.
|
||||||
|
func (s *sys) getByRoot(pgid pgid) *bucket {
|
||||||
|
for _, b := range s.buckets {
|
||||||
|
if b.root == pgid {
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
panic("root not found")
|
||||||
|
}
|
||||||
|
|
||||||
// put sets a new value for a bucket.
|
// put sets a new value for a bucket.
|
||||||
func (s *sys) put(key string, b *bucket) {
|
func (s *sys) put(key string, b *bucket) {
|
||||||
s.buckets[key] = b
|
s.buckets[key] = b
|
||||||
|
@ -32,7 +42,9 @@ func (s *sys) put(key string, b *bucket) {
|
||||||
|
|
||||||
// del deletes a bucket by name.
|
// del deletes a bucket by name.
|
||||||
func (s *sys) del(key string) {
|
func (s *sys) del(key string) {
|
||||||
|
if b := s.buckets[key]; b != nil {
|
||||||
delete(s.buckets, key)
|
delete(s.buckets, key)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// read initializes the data from an on-disk page.
|
// read initializes the data from an on-disk page.
|
||||||
|
@ -61,7 +73,8 @@ func (s *sys) read(p *page) {
|
||||||
|
|
||||||
// Associate keys and buckets.
|
// Associate keys and buckets.
|
||||||
for index, key := range keys {
|
for index, key := range keys {
|
||||||
s.buckets[key] = buckets[index]
|
b := &bucket{buckets[index].root}
|
||||||
|
s.buckets[key] = b
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -62,7 +62,7 @@ func (t *Transaction) Cursor(name string) *Cursor {
|
||||||
if b == nil {
|
if b == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return b.Cursor()
|
return b.cursor()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get retrieves the value for a key in a named bucket.
|
// Get retrieves the value for a key in a named bucket.
|
||||||
|
|
Loading…
Reference in New Issue