diff --git a/NOTES b/NOTES new file mode 100644 index 0000000..967d3aa --- /dev/null +++ b/NOTES @@ -0,0 +1,17 @@ + + + ===0=== + | d|g | + |1|2|3| + ======= + | | | + ------ | ------- + | | | +===1=== ===2=== ===3=== +|a|b|c| |d|e|f| |g|h|i| +|-|-|-| |-|-|-| |-|-|-| +|*|*|*| |*|*|*| |*|*|*| +|*|*|*| |*|*|*| |*|*|*| +|*|*|*| |*|*|*| |*|*|*| + + diff --git a/TODO b/TODO index efd1a81..9c8a1a0 100644 --- a/TODO +++ b/TODO @@ -2,7 +2,7 @@ TODO ==== X Open DB. X Initialize transaction. -- Cursor First, Goto(key), Next +- Cursor First, Get(key), Next - RWTransaction.insert() - rebalance - adjust cursors diff --git a/bnode.go b/bnode.go deleted file mode 100644 index 3ad4f58..0000000 --- a/bnode.go +++ /dev/null @@ -1,20 +0,0 @@ -package bolt - -import ( - "unsafe" -) - -const bnodeSize = int(unsafe.Sizeof(lnode{})) - -// bnode represents a node on a branch page. -type bnode struct { - pos uint32 - ksize uint32 - pgid pgid -} - -// key returns a byte slice of the node key. -func (n *bnode) key() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return buf[n.pos : n.pos+n.ksize] -} diff --git a/branch.go b/branch.go deleted file mode 100644 index c4f0b1f..0000000 --- a/branch.go +++ /dev/null @@ -1,129 +0,0 @@ -package bolt - -import ( - "bytes" - "unsafe" -) - -// branch represents a temporary in-memory branch page. -type branch struct { - pgid pgid - depth int - parent *branch - items branchItems -} - -// size returns the size of the branch after serialization. -func (b *branch) size() int { - var size int = pageHeaderSize - for _, item := range b.items { - size += bnodeSize + len(item.key) - } - return size -} - -// put adds a new node or replaces an existing node. -func (b *branch) put(id pgid, newid pgid, key []byte, replace bool) { - var index int - for ; index < len(b.items); index++ { - if b.items[index].pgid == id { - break - } - } - - if !replace { - index++ - b.items = append(b.items, branchItem{}) - if index < len(b.items) { - copy(b.items[index+1:], b.items[index:]) - } - } - - b.items[index].pgid = newid - b.items[index].key = key -} - -// read initializes the item data from an on-disk page. -func (b *branch) read(p *page) { - b.pgid = p.id - b.items = make(branchItems, int(p.count)) - bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr)) - for i := 0; i < int(p.count); i++ { - bnode := &bnodes[i] - item := &b.items[i] - item.pgid = bnode.pgid - item.key = bnode.key() - } -} - -// write writes the items onto a branch page. -func (b *branch) write(p *page) { - // Initialize page. - p.flags |= p_branch - p.count = uint16(len(b.items)) - - // Loop over each item and write it to the page. - bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr)) - buf := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(b.items):] - for index, item := range b.items { - // Write node. - bnode := &bnodes[index] - bnode.pgid = item.pgid - bnode.pos = uint32(uintptr(unsafe.Pointer(&buf[0])) - uintptr(unsafe.Pointer(bnode))) - bnode.ksize = uint32(len(item.key)) - - // Write key to the end of the page. - copy(buf[0:], item.key) - buf = buf[len(item.key):] - } -} - -// split divides up the noes in the branch into appropriately sized groups. -func (b *branch) split(pageSize int) []*branch { - // Ignore the split if the page doesn't have at least enough nodes for - // multiple pages or if the data can fit on a single page. - if len(b.items) <= (minKeysPerPage*2) || b.size() < pageSize { - return []*branch{b} - } - - // Set fill threshold to 50%. - threshold := pageSize / 2 - - // Otherwise group into smaller pages and target a given fill size. - size := 0 - current := &branch{} - branches := make([]*branch, 0) - - for index, item := range b.items { - nodeSize := bnodeSize + len(item.key) - - if len(current.items) >= minKeysPerPage && index < len(b.items)-minKeysPerPage && size+nodeSize > threshold { - size = pageHeaderSize - branches = append(branches, current) - current = &branch{} - } - - size += nodeSize - current.items = append(current.items, item) - } - branches = append(branches, current) - - return branches -} - -type branches []*branch - -func (s branches) Len() int { return len(s) } -func (s branches) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s branches) Less(i, j int) bool { return s[i].depth < s[j].depth } - -type branchItems []branchItem - -type branchItem struct { - pgid pgid - key []byte -} - -func (s branchItems) Len() int { return len(s) } -func (s branchItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s branchItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 } diff --git a/branch_test.go b/branch_test.go deleted file mode 100644 index 4a810c8..0000000 --- a/branch_test.go +++ /dev/null @@ -1,161 +0,0 @@ -package bolt - -import ( - "testing" - "unsafe" - - "github.com/stretchr/testify/assert" -) - -// Ensure that a branch can replace a key. -func TestBranchPutReplace(t *testing.T) { - b := &branch{ - items: branchItems{ - branchItem{pgid: 1, key: []byte("bar")}, - branchItem{pgid: 2, key: []byte("baz")}, - branchItem{pgid: 3, key: []byte("foo")}, - }, - } - b.put(1, 4, []byte("bar"), true) - b.put(2, 5, []byte("boo"), true) - assert.Equal(t, len(b.items), 3) - assert.Equal(t, b.items[0].pgid, pgid(4)) - assert.Equal(t, string(b.items[0].key), "bar") - assert.Equal(t, b.items[1].pgid, pgid(5)) - assert.Equal(t, string(b.items[1].key), "boo") - assert.Equal(t, b.items[2].pgid, pgid(3)) - assert.Equal(t, string(b.items[2].key), "foo") -} - -// Ensure that a branch can insert a key. -func TestBranchPutInsert(t *testing.T) { - b := &branch{ - items: branchItems{ - branchItem{pgid: 1, key: []byte("bar")}, - branchItem{pgid: 2, key: []byte("foo")}, - }, - } - b.put(1, 4, []byte("baz"), false) - b.put(2, 5, []byte("zzz"), false) - assert.Equal(t, len(b.items), 4) - assert.Equal(t, b.items[0].pgid, pgid(1)) - assert.Equal(t, string(b.items[0].key), "bar") - assert.Equal(t, b.items[1].pgid, pgid(4)) - assert.Equal(t, string(b.items[1].key), "baz") - assert.Equal(t, b.items[2].pgid, pgid(2)) - assert.Equal(t, string(b.items[2].key), "foo") - assert.Equal(t, b.items[3].pgid, pgid(5)) - assert.Equal(t, string(b.items[3].key), "zzz") -} - -// Ensure that a branch can deserialize from a page. -func TestBranchRead(t *testing.T) { - // Create a page. - var buf [4096]byte - page := (*page)(unsafe.Pointer(&buf[0])) - page.count = 2 - - // Insert 2 items at the beginning. sizeof(bnode) == 16 - nodes := (*[3]bnode)(unsafe.Pointer(&page.ptr)) - nodes[0] = bnode{pos: 32, ksize: 3, pgid: 100} // pos = sizeof(bnode) * 2 - nodes[1] = bnode{pos: 19, ksize: 10, pgid: 101} // pos = sizeof(bnode) + 3 - - // Write data for the nodes at the end. - data := (*[4096]byte)(unsafe.Pointer(&nodes[2])) - copy(data[:], []byte("bar")) - copy(data[3:], []byte("helloworld")) - - // Deserialize page into a branch. - b := &branch{} - b.read(page) - - // Check that there are two items with correct data. - assert.Equal(t, len(b.items), 2) - assert.Equal(t, b.items[0].key, []byte("bar")) - assert.Equal(t, b.items[1].key, []byte("helloworld")) -} - -// Ensure that a branch can serialize itself. -func TestBranchWrite(t *testing.T) { - b := &branch{ - items: branchItems{ - branchItem{pgid: 1, key: []byte("susy")}, - branchItem{pgid: 2, key: []byte("ricki")}, - branchItem{pgid: 3, key: []byte("john")}, - }, - } - - // Write it to a page. - var buf [4096]byte - p := (*page)(unsafe.Pointer(&buf[0])) - b.write(p) - - // Read the page back in. - b2 := &branch{} - b2.read(p) - - // Check that the two pages are the same. - assert.Equal(t, len(b2.items), 3) - assert.Equal(t, b2.items[0].pgid, pgid(1)) - assert.Equal(t, b2.items[0].key, []byte("susy")) - assert.Equal(t, b2.items[1].pgid, pgid(2)) - assert.Equal(t, b2.items[1].key, []byte("ricki")) - assert.Equal(t, b2.items[2].pgid, pgid(3)) - assert.Equal(t, b2.items[2].key, []byte("john")) -} - -// Ensure that a branch can split into appropriate subgroups. -func TestBranchSplit(t *testing.T) { - // Create a branch. - b := &branch{ - items: branchItems{ - branchItem{pgid: 1, key: []byte("00000001")}, - branchItem{pgid: 2, key: []byte("00000002")}, - branchItem{pgid: 3, key: []byte("00000003")}, - branchItem{pgid: 4, key: []byte("00000004")}, - branchItem{pgid: 5, key: []byte("00000005")}, - }, - } - - // Split between 3 & 4. - branches := b.split(100) - - assert.Equal(t, len(branches), 2) - assert.Equal(t, len(branches[0].items), 2) - assert.Equal(t, len(branches[1].items), 3) -} - -// Ensure that a branch with the minimum number of items just returns a single branch. -func TestBranchSplitWithMinKeys(t *testing.T) { - // Create a branch. - b := &branch{ - items: branchItems{ - branchItem{pgid: 1, key: []byte("00000001")}, - branchItem{pgid: 2, key: []byte("00000002")}, - }, - } - - // Split. - branches := b.split(20) - assert.Equal(t, len(branches), 1) - assert.Equal(t, len(branches[0].items), 2) -} - -// Ensure that a branch that has keys that all fit on a page just returns one branch. -func TestBranchSplitFitsInPage(t *testing.T) { - // Create a branch. - b := &branch{ - items: branchItems{ - branchItem{pgid: 1, key: []byte("00000001")}, - branchItem{pgid: 2, key: []byte("00000002")}, - branchItem{pgid: 3, key: []byte("00000003")}, - branchItem{pgid: 4, key: []byte("00000004")}, - branchItem{pgid: 5, key: []byte("00000005")}, - }, - } - - // Split. - branches := b.split(4096) - assert.Equal(t, len(branches), 1) - assert.Equal(t, len(branches[0].items), 5) -} diff --git a/bucket.go b/bucket.go index 6cfe1ca..b1c1cc0 100644 --- a/bucket.go +++ b/bucket.go @@ -15,13 +15,8 @@ func (b *Bucket) Name() string { return b.name } -// Get retrieves the value for a key in the bucket. -func (b *Bucket) Get(key []byte) []byte { - return b.Cursor().Get(key) -} - -// Cursor creates a new cursor for this bucket. -func (b *Bucket) Cursor() *Cursor { +// cursor creates a new cursor for this bucket. +func (b *Bucket) cursor() *Cursor { return &Cursor{ transaction: b.transaction, root: b.root, diff --git a/cursor.go b/cursor.go index 7b8c9bb..d56119b 100644 --- a/cursor.go +++ b/cursor.go @@ -14,7 +14,7 @@ type Cursor struct { // elem represents a node on a page that's on the cursor's stack. type elem struct { page *page - index int + index uint16 } // First moves the cursor to the first item in the bucket and returns its key and data. @@ -30,25 +30,29 @@ func (c *Cursor) Next() ([]byte, []byte) { // Get positions the cursor at a specific key and returns the its value. func (c *Cursor) Get(key []byte) []byte { - if c.Goto(key) { - return c.node().value() - } - return nil -} - -// Goto positions the cursor at a specific key. -// Returns true if an exact match or false if positioned after the closest match. -func (c *Cursor) Goto(key []byte) bool { - // TODO(benbjohnson): Optimize for specific use cases. - // Start from root page and traverse to correct page. c.stack = c.stack[:0] c.search(key, c.transaction.page(c.root)) + p, index := c.top() - return false + // If the cursor is pointing to the end of page then return nil. + if index == p.count { + return nil + } + + // If our target node isn't the same key as what's passed in then return nil. + // c.page().hexdump(512) + if !bytes.Equal(key, c.node().key()) { + return nil + } + + return c.node().value() } func (c *Cursor) search(key []byte, p *page) { + if (p.flags & (p_branch | p_leaf)) == 0 { + panic("invalid page type: " + p.typ()) + } e := elem{page: p} c.stack = append(c.stack, e) @@ -58,12 +62,26 @@ func (c *Cursor) search(key []byte, p *page) { return } - // Binary search for the correct branch node. - nodes := p.bnodes() - e.index = sort.Search(int(p.count)-1, func(i int) bool { return bytes.Compare(nodes[i+1].key(), key) != -1 }) + // Binary search for the correct range. + inodes := p.branchPageElements() + + var exact bool + index := sort.Search(int(p.count), func(i int) bool { + // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now. + // sort.Search() finds the lowest index where f() != -1 but we need the highest index. + ret := bytes.Compare(inodes[i].key(), key) + if ret == 0 { + exact = true + } + return ret != -1 + }) + if !exact && index > 0 { + index-- + } + e.index = uint16(index) // Recursively search to the next page. - c.search(key, c.transaction.page(nodes[e.index].pgid)) + c.search(key, c.transaction.page(inodes[e.index].pgid)) } // nsearch searches a leaf node for the index of the node that matches key. @@ -71,16 +89,17 @@ func (c *Cursor) nsearch(key []byte, p *page) { e := &c.stack[len(c.stack)-1] // Binary search for the correct leaf node index. - nodes := p.lnodes() - e.index = sort.Search(int(p.count), func(i int) bool { - return bytes.Compare(nodes[i].key(), key) != -1 + inodes := p.leafPageElements() + index := sort.Search(int(p.count), func(i int) bool { + return bytes.Compare(inodes[i].key(), key) != -1 }) + e.index = uint16(index) } // top returns the page and leaf node that the cursor is currently pointing at. -func (c *Cursor) top() (*page, *lnode) { +func (c *Cursor) top() (*page, uint16) { elem := c.stack[len(c.stack)-1] - return elem.page, elem.page.lnode(elem.index) + return elem.page, elem.index } // page returns the page that the cursor is currently pointing at. @@ -89,7 +108,7 @@ func (c *Cursor) page() *page { } // node returns the leaf node that the cursor is currently positioned on. -func (c *Cursor) node() *lnode { +func (c *Cursor) node() *leafPageElement { elem := c.stack[len(c.stack)-1] - return elem.page.lnode(elem.index) + return elem.page.leafPageElement(elem.index) } diff --git a/db.go b/db.go index 646c31b..41fbc64 100644 --- a/db.go +++ b/db.go @@ -1,6 +1,7 @@ package bolt import ( + "io" "os" "sync" "syscall" @@ -114,7 +115,7 @@ func (db *DB) mmap() error { } // TEMP(benbjohnson): Set max size to 1MB. - size := 2 << 20 + size := 2 << 30 // Memory-map the data file as a byte slice. if db.data, err = db.syscall.Mmap(int(db.file.Fd()), 0, size, syscall.PROT_READ, syscall.MAP_SHARED); err != nil { @@ -224,10 +225,7 @@ func (db *DB) RWTransaction() (*RWTransaction, error) { } // Create a transaction associated with the database. - t := &RWTransaction{ - branches: make(map[pgid]*branch), - leafs: make(map[pgid]*leaf), - } + t := &RWTransaction{nodes: make(map[pgid]*node)} t.init(db) return t, nil @@ -319,6 +317,44 @@ func (db *DB) Delete(name string, key []byte) error { return t.Commit() } +// Copy writes the entire database to a writer. +func (db *DB) Copy(w io.Writer) error { + if !db.opened { + return DatabaseNotOpenError + } + + // Maintain a reader transaction so pages don't get reclaimed. + t, err := db.Transaction() + if err != nil { + return err + } + defer t.Close() + + // Open reader on the database. + f, err := os.Open(db.path) + if err != nil { + return err + } + defer f.Close() + + // Copy everything. + if _, err := io.Copy(w, f); err != nil { + return err + } + return nil +} + +// CopyFile copies the entire database to file at the given path. +func (db *DB) CopyFile(path string) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + + return db.Copy(f) +} + // page retrieves a page reference from the mmap based on the current page size. func (db *DB) page(id pgid) *page { return (*page)(unsafe.Pointer(&db.data[id*pgid(db.pageSize)])) diff --git a/db_test.go b/db_test.go index 53988b7..50ec3c3 100644 --- a/db_test.go +++ b/db_test.go @@ -1,11 +1,14 @@ package bolt import ( + "bytes" + "fmt" "io" "io/ioutil" "os" "syscall" "testing" + "testing/quick" "time" "unsafe" @@ -99,25 +102,6 @@ func TestDBMmapStatError(t *testing.T) { }) } -// Ensure that mmap errors get returned. -/* -func TestDBMmapError(t *testing.T) { - withMockDB(func(db *DB, mockos *mockos, mocksyscall *mocksyscall, path string) { - exp := errors.New("") - file, metafile := &mockfile{}, &mockfile{} - mockos.On("OpenFile", path, os.O_RDWR|os.O_CREATE, os.FileMode(0666)).Return(file, nil) - mockos.On("OpenFile", path, os.O_RDWR|os.O_SYNC, os.FileMode(0666)).Return(metafile, nil) - mockos.On("Getpagesize").Return(0x1000) - file.On("ReadAt", mock.Anything, int64(0)).Return(0, nil) - file.On("Stat").Return(&mockfileinfo{"", 0x2000, 0666, time.Now(), false, nil}, nil) - metafile.On("WriteAt", mock.Anything, int64(0)).Return(0, nil) - mocksyscall.On("Mmap", 0, int64(0), 0x2000, syscall.PROT_READ, syscall.MAP_SHARED).Return(([]byte)(nil), exp) - err := db.Open(path, 0666) - assert.Equal(t, err, exp) - }) -} -*/ - // Ensure that corrupt meta0 page errors get returned. func TestDBCorruptMeta0(t *testing.T) { withMockDB(func(db *DB, mockos *mockos, mocksyscall *mocksyscall, path string) { @@ -150,10 +134,6 @@ func TestDBCorruptMeta0(t *testing.T) { }) } -//-------------------------------------- -// Transaction() -//-------------------------------------- - // Ensure that a database cannot open a transaction when it's not open. func TestDBTransactionDatabaseNotOpenError(t *testing.T) { withDB(func(db *DB, path string) { @@ -163,6 +143,54 @@ func TestDBTransactionDatabaseNotOpenError(t *testing.T) { }) } +// Ensure that a bucket can write a key/value. +func TestDBPut(t *testing.T) { + withOpenDB(func(db *DB, path string) { + db.CreateBucket("widgets") + err := db.Put("widgets", []byte("foo"), []byte("bar")) + assert.NoError(t, err) + value, err := db.Get("widgets", []byte("foo")) + if assert.NoError(t, err) { + assert.Equal(t, value, []byte("bar")) + } + }) +} + +// Ensure that a bucket can write random keys and values across multiple txns. +func TestDBPutRandom(t *testing.T) { + f := func(items testKeyValuePairs) bool { + withOpenDB(func(db *DB, path string) { + db.CreateBucket("widgets") + for _, item := range items { + if len(item.Key) == 0 { + continue + } + if err := db.Put("widgets", item.Key, item.Value); err != nil { + panic("put error: " + err.Error()) + } + } + for _, item := range items { + if len(item.Key) == 0 { + continue + } + value, err := db.Get("widgets", item.Key) + if err != nil { + panic("get error: " + err.Error()) + } + if !bytes.Equal(value, []byte(item.Value)) { + // db.CopyFile("/tmp/bolt.random.db") + t.Fatalf("value mismatch:\n%x\n%x", item.Value, value) + } + } + fmt.Fprint(os.Stderr, ".") + }) + return true + } + if err := quick.Check(f, qc()); err != nil { + t.Error(err) + } +} + // withDB executes a function with a database reference. func withDB(fn func(*DB, string)) { f, _ := ioutil.TempFile("", "bolt-") diff --git a/leaf.go b/leaf.go deleted file mode 100644 index c86e041..0000000 --- a/leaf.go +++ /dev/null @@ -1,120 +0,0 @@ -package bolt - -import ( - "bytes" - "sort" - "unsafe" -) - -// leaf represents an in-memory, deserialized leaf page. -type leaf struct { - pgid pgid - parent *branch - items leafItems -} - -// size returns the size of the leaf after serialization. -func (l *leaf) size() int { - var size int = pageHeaderSize - for _, item := range l.items { - size += lnodeSize + len(item.key) + len(item.value) - } - return size -} - -// put inserts or replaces a key on a leaf page. -func (l *leaf) put(key []byte, value []byte) { - // Find insertion index. - index := sort.Search(len(l.items), func(i int) bool { return bytes.Compare(l.items[i].key, key) != -1 }) - - // If there is no existing key then add a new item. - if index == len(l.items) { - l.items = append(l.items, leafItem{}) - } else if len(l.items) == 0 || !bytes.Equal(l.items[index].key, key) { - l.items = append(l.items, leafItem{}) - copy(l.items[index+1:], l.items[index:]) - } - l.items[index].key = key - l.items[index].value = value -} - -// read initializes the item data from an on-disk page. -func (l *leaf) read(p *page) { - l.pgid = p.id - l.items = make(leafItems, int(p.count)) - lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)) - for i := 0; i < int(p.count); i++ { - lnode := &lnodes[i] - item := &l.items[i] - item.key = lnode.key() - item.value = lnode.value() - } -} - -// write writes the items onto one or more leaf pages. -func (l *leaf) write(p *page) { - // Initialize page. - p.flags |= p_leaf - p.count = uint16(len(l.items)) - - // Loop over each item and write it to the page. - lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)) - b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(l.items):] - for index, item := range l.items { - // Write node. - lnode := &lnodes[index] - lnode.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(lnode))) - lnode.ksize = uint32(len(item.key)) - lnode.vsize = uint32(len(item.value)) - - // Write data to the end of the page. - copy(b[0:], item.key) - b = b[len(item.key):] - copy(b[0:], item.value) - b = b[len(item.value):] - } -} - -// split divides up the noes in the page into appropriately sized groups. -func (l *leaf) split(pageSize int) []*leaf { - // Ignore the split if the page doesn't have at least enough nodes for - // multiple pages or if the data can fit on a single page. - if len(l.items) <= (minKeysPerPage*2) || l.size() < pageSize { - return []*leaf{l} - } - - // Set fill threshold to 50%. - threshold := pageSize / 2 - - // Otherwise group into smaller pages and target a given fill size. - size := 0 - current := &leaf{} - leafs := make([]*leaf, 0) - - for index, item := range l.items { - nodeSize := lnodeSize + len(item.key) + len(item.value) - - if len(current.items) >= minKeysPerPage && index < len(l.items)-minKeysPerPage && size+nodeSize > threshold { - size = pageHeaderSize - leafs = append(leafs, current) - current = &leaf{} - } - - size += nodeSize - current.items = append(current.items, item) - } - leafs = append(leafs, current) - - return leafs -} - -type leafItems []leafItem - -type leafItem struct { - key []byte - value []byte -} - -func (s leafItems) Len() int { return len(s) } -func (s leafItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s leafItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 } diff --git a/leaf_test.go b/leaf_test.go deleted file mode 100644 index ce98e37..0000000 --- a/leaf_test.go +++ /dev/null @@ -1,124 +0,0 @@ -package bolt - -import ( - "testing" - "unsafe" - - "github.com/stretchr/testify/assert" -) - -// Ensure that a leaf can insert a key/value. -func TestLeafPut(t *testing.T) { - l := &leaf{items: make(leafItems, 0)} - l.put([]byte("baz"), []byte("2")) - l.put([]byte("foo"), []byte("0")) - l.put([]byte("bar"), []byte("1")) - l.put([]byte("foo"), []byte("3")) - assert.Equal(t, len(l.items), 3) - assert.Equal(t, l.items[0], leafItem{[]byte("bar"), []byte("1")}) - assert.Equal(t, l.items[1], leafItem{[]byte("baz"), []byte("2")}) - assert.Equal(t, l.items[2], leafItem{[]byte("foo"), []byte("3")}) -} - -// Ensure that a leaf can deserialize from a page. -func TestLeafRead(t *testing.T) { - // Create a page. - var buf [4096]byte - page := (*page)(unsafe.Pointer(&buf[0])) - page.count = 2 - - // Insert 2 leaf items at the beginning. sizeof(lnode) == 16 - nodes := (*[3]lnode)(unsafe.Pointer(&page.ptr)) - nodes[0] = lnode{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(lnode) * 2 - nodes[1] = lnode{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(lnode) + 3 + 4 - - // Write data for the nodes at the end. - data := (*[4096]byte)(unsafe.Pointer(&nodes[2])) - copy(data[:], []byte("barfooz")) - copy(data[7:], []byte("helloworldbye")) - - // Deserialize page into a leaf. - l := &leaf{} - l.read(page) - - // Check that there are two items with correct data. - assert.Equal(t, len(l.items), 2) - assert.Equal(t, l.items[0].key, []byte("bar")) - assert.Equal(t, l.items[0].value, []byte("fooz")) - assert.Equal(t, l.items[1].key, []byte("helloworld")) - assert.Equal(t, l.items[1].value, []byte("bye")) -} - -// Ensure that a leaf can serialize itself. -func TestLeafWrite(t *testing.T) { - // Create a leaf. - l := &leaf{items: make(leafItems, 0)} - l.put([]byte("susy"), []byte("que")) - l.put([]byte("ricki"), []byte("lake")) - l.put([]byte("john"), []byte("johnson")) - - // Write it to a page. - var buf [4096]byte - p := (*page)(unsafe.Pointer(&buf[0])) - l.write(p) - - // Read the page back in. - l2 := &leaf{} - l2.read(p) - - // Check that the two pages are the same. - assert.Equal(t, len(l2.items), 3) - assert.Equal(t, l2.items[0].key, []byte("john")) - assert.Equal(t, l2.items[0].value, []byte("johnson")) - assert.Equal(t, l2.items[1].key, []byte("ricki")) - assert.Equal(t, l2.items[1].value, []byte("lake")) - assert.Equal(t, l2.items[2].key, []byte("susy")) - assert.Equal(t, l2.items[2].value, []byte("que")) -} - -// Ensure that a leaf can split into appropriate subgroups. -func TestLeafSplit(t *testing.T) { - // Create a leaf. - l := &leaf{items: make(leafItems, 0)} - l.put([]byte("00000001"), []byte("0123456701234567")) - l.put([]byte("00000002"), []byte("0123456701234567")) - l.put([]byte("00000003"), []byte("0123456701234567")) - l.put([]byte("00000004"), []byte("0123456701234567")) - l.put([]byte("00000005"), []byte("0123456701234567")) - - // Split between 3 & 4. - leafs := l.split(100) - - assert.Equal(t, len(leafs), 2) - assert.Equal(t, len(leafs[0].items), 2) - assert.Equal(t, len(leafs[1].items), 3) -} - -// Ensure that a leaf with the minimum number of items just returns a single leaf. -func TestLeafSplitWithMinKeys(t *testing.T) { - // Create a leaf. - l := &leaf{items: make(leafItems, 0)} - l.put([]byte("00000001"), []byte("0123456701234567")) - l.put([]byte("00000002"), []byte("0123456701234567")) - - // Split. - leafs := l.split(20) - assert.Equal(t, len(leafs), 1) - assert.Equal(t, len(leafs[0].items), 2) -} - -// Ensure that a leaf that has keys that all fit on a page just returns one leaf. -func TestLeafSplitFitsInPage(t *testing.T) { - // Create a leaf. - l := &leaf{items: make(leafItems, 0)} - l.put([]byte("00000001"), []byte("0123456701234567")) - l.put([]byte("00000002"), []byte("0123456701234567")) - l.put([]byte("00000003"), []byte("0123456701234567")) - l.put([]byte("00000004"), []byte("0123456701234567")) - l.put([]byte("00000005"), []byte("0123456701234567")) - - // Split. - leafs := l.split(4096) - assert.Equal(t, len(leafs), 1) - assert.Equal(t, len(leafs[0].items), 5) -} diff --git a/lnode.go b/lnode.go deleted file mode 100644 index dd6f494..0000000 --- a/lnode.go +++ /dev/null @@ -1,27 +0,0 @@ -package bolt - -import ( - "unsafe" -) - -const lnodeSize = int(unsafe.Sizeof(lnode{})) - -// lnode represents a node on a leaf page. -type lnode struct { - flags uint32 - pos uint32 - ksize uint32 - vsize uint32 -} - -// key returns a byte slice of the node key. -func (n *lnode) key() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return buf[n.pos : n.pos+n.ksize] -} - -// value returns a byte slice of the node value. -func (n *lnode) value() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize] -} diff --git a/meta.go b/meta.go index 1ef9ffc..871d092 100644 --- a/meta.go +++ b/meta.go @@ -6,9 +6,10 @@ type meta struct { magic uint32 version uint32 pageSize uint32 - pgid pgid - free pgid + flags uint32 sys pgid + free pgid + pgid pgid txnid txnid } diff --git a/node.go b/node.go new file mode 100644 index 0000000..8d03681 --- /dev/null +++ b/node.go @@ -0,0 +1,178 @@ +package bolt + +import ( + "bytes" + "sort" + "unsafe" +) + +// node represents an in-memory, deserialized page. +type node struct { + isLeaf bool + key []byte + depth int + pgid pgid + parent *node + inodes inodes +} + +// size returns the size of the node after serialization. +func (n *node) size() int { + var elementSize int = n.pageElementSize() + + var size int = pageHeaderSize + for _, item := range n.inodes { + size += elementSize + len(item.key) + len(item.value) + } + return size +} + +// pageElementSize returns the size of each page element based on the type of node. +func (n *node) pageElementSize() int { + if n.isLeaf { + return leafPageElementSize + } + return branchPageElementSize +} + +// root returns the root node in the tree. +func (n *node) root() *node { + if n.parent == nil { + return n + } + return n.parent +} + +// put inserts a key/value. +func (n *node) put(oldKey, newKey, value []byte, pgid pgid) { + // Find insertion index. + index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 }) + + // Add capacity and shift nodes if we don't have an exact match and need to insert. + exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey)) + if !exact { + n.inodes = append(n.inodes, inode{}) + copy(n.inodes[index+1:], n.inodes[index:]) + } + + inode := &n.inodes[index] + inode.key = newKey + inode.value = value + inode.pgid = pgid +} + +// read initializes the node from a page. +func (n *node) read(p *page) { + n.pgid = p.id + n.isLeaf = ((p.flags & p_leaf) != 0) + n.inodes = make(inodes, int(p.count)) + + for i := 0; i < int(p.count); i++ { + inode := &n.inodes[i] + if n.isLeaf { + elem := p.leafPageElement(uint16(i)) + inode.key = elem.key() + inode.value = elem.value() + } else { + elem := p.branchPageElement(uint16(i)) + inode.pgid = elem.pgid + inode.key = elem.key() + } + } + + // Save first key so we can find the node in the parent when we spill. + if len(n.inodes) > 0 { + n.key = n.inodes[0].key + } else { + n.key = nil + } +} + +// write writes the items onto one or more pages. +func (n *node) write(p *page) { + // Initialize page. + if n.isLeaf { + p.flags |= p_leaf + } else { + p.flags |= p_branch + } + p.count = uint16(len(n.inodes)) + + // Loop over each item and write it to the page. + b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):] + for i, item := range n.inodes { + // Write the page element. + if n.isLeaf { + elem := p.leafPageElement(uint16(i)) + elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) + elem.ksize = uint32(len(item.key)) + elem.vsize = uint32(len(item.value)) + } else { + elem := p.branchPageElement(uint16(i)) + elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) + elem.ksize = uint32(len(item.key)) + elem.pgid = item.pgid + } + + // Write data for the element to the end of the page. + copy(b[0:], item.key) + b = b[len(item.key):] + copy(b[0:], item.value) + b = b[len(item.value):] + } +} + +// split divides up the node into appropriately sized nodes. +func (n *node) split(pageSize int) []*node { + // Ignore the split if the page doesn't have at least enough nodes for + // multiple pages or if the data can fit on a single page. + if len(n.inodes) <= (minKeysPerPage*2) || n.size() < pageSize { + return []*node{n} + } + + // Set fill threshold to 50%. + threshold := pageSize / 2 + + // Group into smaller pages and target a given fill size. + size := 0 + current := &node{isLeaf: n.isLeaf} + nodes := make([]*node, 0) + + for i, inode := range n.inodes { + elemSize := n.pageElementSize() + len(inode.key) + len(inode.value) + + if len(current.inodes) >= minKeysPerPage && i < len(n.inodes)-minKeysPerPage && size+elemSize > threshold { + size = pageHeaderSize + nodes = append(nodes, current) + current = &node{isLeaf: n.isLeaf} + } + + size += elemSize + current.inodes = append(current.inodes, inode) + } + nodes = append(nodes, current) + + return nodes +} + +// nodesByDepth sorts a list of branches by deepest first. +type nodesByDepth []*node + +func (s nodesByDepth) Len() int { return len(s) } +func (s nodesByDepth) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s nodesByDepth) Less(i, j int) bool { return s[i].depth > s[j].depth } + +// inode represents an internal node inside of a node. +// It can be used to point to elements in a page or point +// to an element which hasn't been added to a page yet. +type inode struct { + pgid pgid + key []byte + value []byte +} + +type inodes []inode + +func (s inodes) Len() int { return len(s) } +func (s inodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s inodes) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 } diff --git a/node_test.go b/node_test.go new file mode 100644 index 0000000..15d6498 --- /dev/null +++ b/node_test.go @@ -0,0 +1,129 @@ +package bolt + +import ( + "testing" + "unsafe" + + "github.com/stretchr/testify/assert" +) + +// Ensure that a node can insert a key/value. +func TestNodePut(t *testing.T) { + n := &node{inodes: make(inodes, 0)} + n.put([]byte("baz"), []byte("baz"), []byte("2"), 0) + n.put([]byte("foo"), []byte("foo"), []byte("0"), 0) + n.put([]byte("bar"), []byte("bar"), []byte("1"), 0) + n.put([]byte("foo"), []byte("foo"), []byte("3"), 0) + assert.Equal(t, len(n.inodes), 3) + assert.Equal(t, n.inodes[0].key, []byte("bar")) + assert.Equal(t, n.inodes[0].value, []byte("1")) + assert.Equal(t, n.inodes[1].key, []byte("baz")) + assert.Equal(t, n.inodes[1].value, []byte("2")) + assert.Equal(t, n.inodes[2].key, []byte("foo")) + assert.Equal(t, n.inodes[2].value, []byte("3")) +} + +// Ensure that a node can deserialize from a leaf page. +func TestNodeReadLeafPage(t *testing.T) { + // Create a page. + var buf [4096]byte + page := (*page)(unsafe.Pointer(&buf[0])) + page.flags = p_leaf + page.count = 2 + + // Insert 2 elements at the beginning. sizeof(leafPageElement) == 16 + nodes := (*[3]leafPageElement)(unsafe.Pointer(&page.ptr)) + nodes[0] = leafPageElement{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(leafPageElement) * 2 + nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4 + + // Write data for the nodes at the end. + data := (*[4096]byte)(unsafe.Pointer(&nodes[2])) + copy(data[:], []byte("barfooz")) + copy(data[7:], []byte("helloworldbye")) + + // Deserialize page into a leaf. + n := &node{} + n.read(page) + + // Check that there are two inodes with correct data. + assert.True(t, n.isLeaf) + assert.Equal(t, len(n.inodes), 2) + assert.Equal(t, n.inodes[0].key, []byte("bar")) + assert.Equal(t, n.inodes[0].value, []byte("fooz")) + assert.Equal(t, n.inodes[1].key, []byte("helloworld")) + assert.Equal(t, n.inodes[1].value, []byte("bye")) +} + +// Ensure that a node can serialize into a leaf page. +func TestNodeWriteLeafPage(t *testing.T) { + // Create a node. + n := &node{isLeaf: true, inodes: make(inodes, 0)} + n.put([]byte("susy"), []byte("susy"), []byte("que"), 0) + n.put([]byte("ricki"), []byte("ricki"), []byte("lake"), 0) + n.put([]byte("john"), []byte("john"), []byte("johnson"), 0) + + // Write it to a page. + var buf [4096]byte + p := (*page)(unsafe.Pointer(&buf[0])) + n.write(p) + + // Read the page back in. + n2 := &node{} + n2.read(p) + + // Check that the two pages are the same. + assert.Equal(t, len(n2.inodes), 3) + assert.Equal(t, n2.inodes[0].key, []byte("john")) + assert.Equal(t, n2.inodes[0].value, []byte("johnson")) + assert.Equal(t, n2.inodes[1].key, []byte("ricki")) + assert.Equal(t, n2.inodes[1].value, []byte("lake")) + assert.Equal(t, n2.inodes[2].key, []byte("susy")) + assert.Equal(t, n2.inodes[2].value, []byte("que")) +} + +// Ensure that a node can split into appropriate subgroups. +func TestNodeSplit(t *testing.T) { + // Create a node. + n := &node{inodes: make(inodes, 0)} + n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0) + n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0) + n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0) + n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0) + n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0) + + // Split between 3 & 4. + nodes := n.split(100) + + assert.Equal(t, len(nodes), 2) + assert.Equal(t, len(nodes[0].inodes), 2) + assert.Equal(t, len(nodes[1].inodes), 3) +} + +// Ensure that a page with the minimum number of inodes just returns a single node. +func TestNodeSplitWithMinKeys(t *testing.T) { + // Create a node. + n := &node{inodes: make(inodes, 0)} + n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0) + n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0) + + // Split. + nodes := n.split(20) + assert.Equal(t, len(nodes), 1) + assert.Equal(t, len(nodes[0].inodes), 2) +} + +// Ensure that a node that has keys that all fit on a page just returns one leaf. +func TestNodeSplitFitsInPage(t *testing.T) { + // Create a node. + n := &node{inodes: make(inodes, 0)} + n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0) + n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0) + n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0) + n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0) + n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0) + + // Split. + nodes := n.split(4096) + assert.Equal(t, len(nodes), 1) + assert.Equal(t, len(nodes[0].inodes), 5) +} diff --git a/page.go b/page.go index c18c6b4..d05dce6 100644 --- a/page.go +++ b/page.go @@ -1,6 +1,8 @@ package bolt import ( + "fmt" + "os" "unsafe" ) @@ -10,6 +12,9 @@ const maxAllocSize = 0xFFFFFFF const minKeysPerPage = 2 const maxNodesPerPage = 65535 +const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{})) +const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{})) + const ( p_branch = 0x01 p_leaf = 0x02 @@ -28,29 +33,46 @@ type page struct { ptr uintptr } +// typ returns a human readable page type string used for debugging. +func (p *page) typ() string { + if (p.flags & p_branch) != 0 { + return "branch" + } else if (p.flags & p_leaf) != 0 { + return "leaf" + } else if (p.flags & p_meta) != 0 { + return "meta" + } else if (p.flags & p_sys) != 0 { + return "system" + } else if (p.flags & p_freelist) != 0 { + return "freelist" + } + return fmt.Sprintf("unknown<%02x>", p.flags) +} + // meta returns a pointer to the metadata section of the page. func (p *page) meta() *meta { return (*meta)(unsafe.Pointer(&p.ptr)) } -// lnode retrieves the leaf node by index -func (p *page) lnode(index int) *lnode { - return &((*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)))[index] +// leafPageElement retrieves the leaf node by index +func (p *page) leafPageElement(index uint16) *leafPageElement { + n := &((*[maxNodesPerPage]leafPageElement)(unsafe.Pointer(&p.ptr)))[index] + return n } -// lnodes retrieves a list of leaf nodes. -func (p *page) lnodes() []lnode { - return ((*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)))[:] +// leafPageElements retrieves a list of leaf nodes. +func (p *page) leafPageElements() []leafPageElement { + return ((*[maxNodesPerPage]leafPageElement)(unsafe.Pointer(&p.ptr)))[:] } -// bnode retrieves the branch node by index -func (p *page) bnode(index int) *bnode { - return &((*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr)))[index] +// branchPageElement retrieves the branch node by index +func (p *page) branchPageElement(index uint16) *branchPageElement { + return &((*[maxNodesPerPage]branchPageElement)(unsafe.Pointer(&p.ptr)))[index] } -// bnodes retrieves a list of branch nodes. -func (p *page) bnodes() []bnode { - return ((*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr)))[:] +// branchPageElements retrieves a list of branch nodes. +func (p *page) branchPageElements() []branchPageElement { + return ((*[maxNodesPerPage]branchPageElement)(unsafe.Pointer(&p.ptr)))[:] } // freelist retrieves a list of page ids from a freelist page. @@ -58,8 +80,47 @@ func (p *page) freelist() []pgid { return ((*[maxNodesPerPage]pgid)(unsafe.Pointer(&p.ptr)))[0:p.count] } +// dump writes n bytes of the page to STDERR as hex output. +func (p *page) hexdump(n int) { + buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n] + fmt.Fprintf(os.Stderr, "%x\n", buf) +} + type pages []*page func (s pages) Len() int { return len(s) } func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func (s pages) Less(i, j int) bool { return s[i].id < s[j].id } + +// branchPageElement represents a node on a branch page. +type branchPageElement struct { + pos uint32 + ksize uint32 + pgid pgid +} + +// key returns a byte slice of the node key. +func (n *branchPageElement) key() []byte { + buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) + return buf[n.pos : n.pos+n.ksize] +} + +// leafPageElement represents a node on a leaf page. +type leafPageElement struct { + flags uint32 + pos uint32 + ksize uint32 + vsize uint32 +} + +// key returns a byte slice of the node key. +func (n *leafPageElement) key() []byte { + buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) + return buf[n.pos : n.pos+n.ksize] +} + +// value returns a byte slice of the node value. +func (n *leafPageElement) value() []byte { + buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) + return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize] +} diff --git a/quick_test.go b/quick_test.go new file mode 100644 index 0000000..d85249d --- /dev/null +++ b/quick_test.go @@ -0,0 +1,65 @@ +package bolt + +import ( + "flag" + "math/rand" + "reflect" + "testing/quick" + "time" +) + +// testing/quick defaults to 100 iterations and a random seed. +// You can override these settings from the command line: +// +// -quickchecks The number of iterations to perform. +// -quick.seed The seed to use for randomizing. +// -quick.maxitems The maximum number of items to insert into a DB. +// -quick.maxksize The maximum size of a key. +// -quick.maxvsize The maximum size of a value. +// + +var seed, testMaxItemCount, testMaxKeySize, testMaxValueSize int + +func init() { + flag.IntVar(&seed, "quick.seed", int(time.Now().UnixNano())%100000, "") + flag.IntVar(&testMaxItemCount, "quick.maxitems", 1024, "") + flag.IntVar(&testMaxKeySize, "quick.maxksize", 1024, "") + flag.IntVar(&testMaxValueSize, "quick.maxvsize", 1024, "") + warn("seed:", seed) +} + +// qc creates a testing/quick configuration. +func qc() *quick.Config { + return &quick.Config{Rand: rand.New(rand.NewSource(int64(seed)))} +} + +type testKeyValuePairs []testKeyValuePair + +func (t testKeyValuePairs) Generate(rand *rand.Rand, size int) reflect.Value { + n := rand.Intn(testMaxItemCount-1) + 1 + items := make(testKeyValuePairs, n) + for i := 0; i < n; i++ { + items[i].Generate(rand, size) + } + return reflect.ValueOf(items) +} + +type testKeyValuePair struct { + Key []byte + Value []byte +} + +func (t testKeyValuePair) Generate(rand *rand.Rand, size int) reflect.Value { + t.Key = randByteSlice(rand, 1, testMaxKeySize) + t.Value = randByteSlice(rand, 0, testMaxValueSize) + return reflect.ValueOf(t) +} + +func randByteSlice(rand *rand.Rand, minSize, maxSize int) []byte { + n := rand.Intn(maxSize - minSize) + minSize + b := make([]byte, n) + for i := 0; i < n; i++ { + b[i] = byte(rand.Intn(255)) + } + return b +} diff --git a/rwtransaction.go b/rwtransaction.go index 145e0cf..2485c62 100644 --- a/rwtransaction.go +++ b/rwtransaction.go @@ -9,8 +9,7 @@ import ( // Only one read/write transaction can be active for a DB at a time. type RWTransaction struct { Transaction - branches map[pgid]*branch - leafs map[pgid]*leaf + nodes map[pgid]*node } // init initializes the transaction. @@ -21,7 +20,7 @@ func (t *RWTransaction) init(db *DB) { // Copy the meta and increase the transaction id. t.meta = &meta{} db.meta().copy(t.meta) - t.meta.txnid += txnid(2) + t.meta.txnid += txnid(1) } // CreateBucket creates a new bucket. @@ -32,7 +31,7 @@ func (t *RWTransaction) CreateBucket(name string) error { } else if len(name) == 0 { return &Error{"bucket name cannot be blank", nil} } else if len(name) > MaxBucketNameSize { - return &Error{"bucket name too large", nil} + return &Error{"bucket name too long", nil} } // Create a blank root leaf page. @@ -72,9 +71,9 @@ func (t *RWTransaction) Put(name string, key []byte, value []byte) error { } // Insert a new node. - c := b.Cursor() - c.Goto(key) - t.leaf(c).put(key, value) + c := b.cursor() + c.Get(key) + t.node(c.stack).put(key, key, value, 0) return nil } @@ -121,8 +120,8 @@ func (t *RWTransaction) Rollback() { } func (t *RWTransaction) close() { - // Clear temporary pages. - t.leafs = nil + // Clear nodes. + t.nodes = nil // TODO: Release writer lock. } @@ -146,55 +145,81 @@ func (t *RWTransaction) allocate(count int) *page { return p } -// spill writes all the leafs and branches to dirty pages. +// spill writes all the nodes to dirty pages. func (t *RWTransaction) spill() { - // Spill leafs first. - for _, l := range t.leafs { - t.spillLeaf(l) + // Keep track of the current root nodes. + // We will update this at the end once all nodes are created. + type root struct { + node *node + pgid pgid } + var roots []root - // Sort branches by highest depth first. - branches := make(branches, 0, len(t.branches)) - for _, b := range t.branches { - branches = append(branches, b) + // Sort nodes by highest depth first. + nodes := make(nodesByDepth, 0, len(t.nodes)) + for _, n := range t.nodes { + nodes = append(nodes, n) } - sort.Sort(branches) + sort.Sort(nodes) - // Spill branches by deepest first. - for _, b := range branches { - t.spillBranch(b) - } -} + // Spill nodes by deepest first. + for i := 0; i < len(nodes); i++ { + n := nodes[i] -// spillLeaf writes a leaf to one or more dirty pages. -func (t *RWTransaction) spillLeaf(l *leaf) { - parent := l.parent + // Save existing root buckets for later. + if n.parent == nil && n.pgid != 0 { + roots = append(roots, root{n, n.pgid}) + } - // Split leaf, if necessary. - leafs := l.split(t.db.pageSize) + // Split nodes and write them. + newNodes := n.split(t.db.pageSize) - // TODO: If this is a root leaf and we split then add a parent branch. + // If this is a root node that split then create a parent node. + if n.parent == nil && len(newNodes) > 1 { + n.parent = &node{ + isLeaf: false, + key: newNodes[0].inodes[0].key, + depth: n.depth - 1, + inodes: make(inodes, 0), + } + nodes = append(nodes, n.parent) + sort.Sort(nodes) + } - // Process each resulting leaf. - previd := leafs[0].pgid - for index, l := range leafs { - // Allocate contiguous space for the leaf. - p := t.allocate((l.size() / t.db.pageSize) + 1) + // Write nodes to dirty pages. + for i, newNode := range newNodes { + // Allocate contiguous space for the node. + p := t.allocate((newNode.size() / t.db.pageSize) + 1) - // Write the leaf to the page. - l.write(p) + // Write the node to the page. + newNode.write(p) + newNode.pgid = p.id + newNode.parent = n.parent - // Insert or replace the node in the parent branch with the new pgid. - if parent != nil { - parent.put(previd, p.id, l.items[0].key, (index == 0)) - previd = l.pgid + // The first node should use the existing entry, other nodes are inserts. + var oldKey []byte + if i == 0 { + oldKey = n.key + } else { + oldKey = newNode.inodes[0].key + } + + // Update the parent entry. + if newNode.parent != nil { + newNode.parent.put(oldKey, newNode.inodes[0].key, nil, newNode.pgid) + } } } -} -// spillBranch writes a branch to one or more dirty pages. -func (t *RWTransaction) spillBranch(l *branch) { - warn("[pending] RWTransaction.spillBranch()") // TODO + // Update roots with new roots. + for _, root := range roots { + for _, b := range t.sys.buckets { + if b.root == root.pgid { + b.root = root.node.root().pgid + break + } + } + } } // write writes any dirty pages to disk. @@ -231,28 +256,8 @@ func (t *RWTransaction) writeMeta() error { return nil } -// leaf retrieves a leaf object based on the current position of a cursor. -func (t *RWTransaction) leaf(c *Cursor) *leaf { - e := c.stack[len(c.stack)-1] - id := e.page.id - - // Retrieve leaf if it has already been fetched. - if l := t.leafs[id]; l != nil { - return l - } - - // Otherwise create a leaf and cache it. - l := &leaf{} - l.read(t.page(id)) - l.parent = t.branch(c.stack[:len(c.stack)-1]) - t.leafs[id] = l - - return l -} - -// branch retrieves a branch object based a cursor stack. -// This should only be called from leaf(). -func (t *RWTransaction) branch(stack []elem) *branch { +// node retrieves a node based a cursor stack. +func (t *RWTransaction) node(stack []elem) *node { if len(stack) == 0 { return nil } @@ -260,16 +265,16 @@ func (t *RWTransaction) branch(stack []elem) *branch { // Retrieve branch if it has already been fetched. e := &stack[len(stack)-1] id := e.page.id - if b := t.branches[id]; b != nil { - return b + if n := t.nodes[id]; n != nil { + return n } // Otherwise create a branch and cache it. - b := &branch{} - b.read(t.page(id)) - b.depth = len(stack) - 1 - b.parent = t.branch(stack[:len(stack)-1]) - t.branches[id] = b + n := &node{} + n.read(t.page(id)) + n.depth = len(stack) - 1 + n.parent = t.node(stack[:len(stack)-1]) + t.nodes[id] = n - return b + return n } diff --git a/rwtransaction_test.go b/rwtransaction_test.go index b6e971e..4d256cc 100644 --- a/rwtransaction_test.go +++ b/rwtransaction_test.go @@ -1,6 +1,7 @@ package bolt import ( + "strings" "testing" "github.com/stretchr/testify/assert" @@ -28,3 +29,35 @@ func TestTransactionCreateBucket(t *testing.T) { assert.NoError(t, err) }) } + +// Ensure that a bucket cannot be created twice. +func TestTransactionRecreateBucket(t *testing.T) { + withOpenDB(func(db *DB, path string) { + // Create a bucket. + err := db.CreateBucket("widgets") + assert.NoError(t, err) + + // Create the same bucket again. + err = db.CreateBucket("widgets") + assert.Equal(t, err, &Error{"bucket already exists", nil}) + }) +} + +// Ensure that a bucket is created with a non-blank name. +func TestTransactionCreateBucketWithoutName(t *testing.T) { + withOpenDB(func(db *DB, path string) { + err := db.CreateBucket("") + assert.Equal(t, err, &Error{"bucket name cannot be blank", nil}) + }) +} + +// Ensure that a bucket name is not too long. +func TestTransactionCreateBucketWithLongName(t *testing.T) { + withOpenDB(func(db *DB, path string) { + err := db.CreateBucket(strings.Repeat("X", 255)) + assert.NoError(t, err) + + err = db.CreateBucket(strings.Repeat("X", 256)) + assert.Equal(t, err, &Error{"bucket name too long", nil}) + }) +} diff --git a/sys.go b/sys.go index 657c973..cf15413 100644 --- a/sys.go +++ b/sys.go @@ -25,6 +25,16 @@ func (s *sys) get(key string) *bucket { return s.buckets[key] } +// getByRoot retrieves a bucket by root page id. +func (s *sys) getByRoot(pgid pgid) *bucket { + for _, b := range s.buckets { + if b.root == pgid { + return b + } + } + panic("root not found") +} + // put sets a new value for a bucket. func (s *sys) put(key string, b *bucket) { s.buckets[key] = b @@ -32,7 +42,9 @@ func (s *sys) put(key string, b *bucket) { // del deletes a bucket by name. func (s *sys) del(key string) { - delete(s.buckets, key) + if b := s.buckets[key]; b != nil { + delete(s.buckets, key) + } } // read initializes the data from an on-disk page. @@ -61,7 +73,8 @@ func (s *sys) read(p *page) { // Associate keys and buckets. for index, key := range keys { - s.buckets[key] = buckets[index] + b := &bucket{buckets[index].root} + s.buckets[key] = b } } diff --git a/transaction.go b/transaction.go index e479dfb..29337c6 100644 --- a/transaction.go +++ b/transaction.go @@ -62,7 +62,7 @@ func (t *Transaction) Cursor(name string) *Cursor { if b == nil { return nil } - return b.Cursor() + return b.cursor() } // Get retrieves the value for a key in a named bucket. diff --git a/warn.go b/warn.go index 7e9a2a7..eadf664 100644 --- a/warn.go +++ b/warn.go @@ -10,5 +10,5 @@ func warn(v ...interface{}) { } func warnf(msg string, v ...interface{}) { - fmt.Fprintf(os.Stderr, msg, v...) + fmt.Fprintf(os.Stderr, msg+"\n", v...) }