mirror of https://github.com/etcd-io/bbolt.git
Fix incorrect unsafe usage
After checkptr fixes by 2fc6815c
, it was discovered that new issues
were hit in production systems, in particular when a single process
opened and updated multiple separate databases. This indicates that
some bug relating to bad unsafe usage was introduced during this
commit.
This commit combines several attempts at fixing this new issue. For
example, slices are once again created by slicing an array of "max
allocation" elements, but this time with the cap set to the intended
length. This operation is espressly permitted according to the Go
wiki, so it should be preferred to type converting a
reflect.SliceHeader.
pull/220/head
parent
a8af23b57f
commit
f9d3ff6648
46
freelist.go
46
freelist.go
|
@ -2,7 +2,6 @@ package bbolt
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"reflect"
|
|
||||||
"sort"
|
"sort"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
@ -94,24 +93,8 @@ func (f *freelist) pending_count() int {
|
||||||
return count
|
return count
|
||||||
}
|
}
|
||||||
|
|
||||||
// copyallunsafe copies a list of all free ids and all pending ids in one sorted list.
|
// copyall copies a list of all free ids and all pending ids in one sorted list.
|
||||||
// f.count returns the minimum length required for dst.
|
// f.count returns the minimum length required for dst.
|
||||||
func (f *freelist) copyallunsafe(dstptr unsafe.Pointer) { // dstptr is []pgid data pointer
|
|
||||||
m := make(pgids, 0, f.pending_count())
|
|
||||||
for _, txp := range f.pending {
|
|
||||||
m = append(m, txp.ids...)
|
|
||||||
}
|
|
||||||
sort.Sort(m)
|
|
||||||
fpgids := f.getFreePageIDs()
|
|
||||||
sz := len(fpgids) + len(m)
|
|
||||||
dst := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{
|
|
||||||
Data: uintptr(dstptr),
|
|
||||||
Len: sz,
|
|
||||||
Cap: sz,
|
|
||||||
}))
|
|
||||||
mergepgids(dst, fpgids, m)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *freelist) copyall(dst []pgid) {
|
func (f *freelist) copyall(dst []pgid) {
|
||||||
m := make(pgids, 0, f.pending_count())
|
m := make(pgids, 0, f.pending_count())
|
||||||
for _, txp := range f.pending {
|
for _, txp := range f.pending {
|
||||||
|
@ -287,18 +270,15 @@ func (f *freelist) read(p *page) {
|
||||||
var idx, count uintptr = 0, uintptr(p.count)
|
var idx, count uintptr = 0, uintptr(p.count)
|
||||||
if count == 0xFFFF {
|
if count == 0xFFFF {
|
||||||
idx = 1
|
idx = 1
|
||||||
count = uintptr(*(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))))
|
count = uintptr(*(*pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy the list of page ids from the freelist.
|
// Copy the list of page ids from the freelist.
|
||||||
if count == 0 {
|
if count == 0 {
|
||||||
f.ids = nil
|
f.ids = nil
|
||||||
} else {
|
} else {
|
||||||
ids := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{
|
ids := (*[maxAllocSize]pgid)(unsafeAdd(
|
||||||
Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + idx*unsafe.Sizeof(pgid(0)),
|
unsafe.Pointer(p), unsafe.Sizeof(*p)))[idx : idx+count : idx+count]
|
||||||
Len: int(count),
|
|
||||||
Cap: int(count),
|
|
||||||
}))
|
|
||||||
|
|
||||||
// copy the ids, so we don't modify on the freelist page directly
|
// copy the ids, so we don't modify on the freelist page directly
|
||||||
idsCopy := make([]pgid, count)
|
idsCopy := make([]pgid, count)
|
||||||
|
@ -331,16 +311,18 @@ func (f *freelist) write(p *page) error {
|
||||||
|
|
||||||
// The page.count can only hold up to 64k elements so if we overflow that
|
// The page.count can only hold up to 64k elements so if we overflow that
|
||||||
// number then we handle it by putting the size in the first element.
|
// number then we handle it by putting the size in the first element.
|
||||||
lenids := f.count()
|
l := f.count()
|
||||||
if lenids == 0 {
|
if l == 0 {
|
||||||
p.count = uint16(lenids)
|
p.count = uint16(l)
|
||||||
} else if lenids < 0xFFFF {
|
} else if l < 0xFFFF {
|
||||||
p.count = uint16(lenids)
|
p.count = uint16(l)
|
||||||
f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p)))
|
ids := (*[maxAllocSize]pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))[:l:l]
|
||||||
|
f.copyall(ids)
|
||||||
} else {
|
} else {
|
||||||
p.count = 0xFFFF
|
p.count = 0xFFFF
|
||||||
*(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))) = pgid(lenids)
|
ids := (*[maxAllocSize]pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))[: l+1 : l+1]
|
||||||
f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + unsafe.Sizeof(pgid(0))))
|
ids[0] = pgid(l)
|
||||||
|
f.copyall(ids[1:])
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
25
node.go
25
node.go
|
@ -3,7 +3,6 @@ package bbolt
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"reflect"
|
|
||||||
"sort"
|
"sort"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
@ -208,36 +207,32 @@ func (n *node) write(p *page) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Loop over each item and write it to the page.
|
// Loop over each item and write it to the page.
|
||||||
bp := uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes))
|
// off tracks the offset into p of the start of the next data.
|
||||||
|
off := unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes))
|
||||||
for i, item := range n.inodes {
|
for i, item := range n.inodes {
|
||||||
_assert(len(item.key) > 0, "write: zero-length inode key")
|
_assert(len(item.key) > 0, "write: zero-length inode key")
|
||||||
|
|
||||||
|
// Create a slice to write into of needed size and advance
|
||||||
|
// byte pointer for next iteration.
|
||||||
|
sz := len(item.key) + len(item.value)
|
||||||
|
b := unsafeByteSlice(unsafe.Pointer(p), off, 0, sz)
|
||||||
|
off += uintptr(sz)
|
||||||
|
|
||||||
// Write the page element.
|
// Write the page element.
|
||||||
if n.isLeaf {
|
if n.isLeaf {
|
||||||
elem := p.leafPageElement(uint16(i))
|
elem := p.leafPageElement(uint16(i))
|
||||||
elem.pos = uint32(bp - uintptr(unsafe.Pointer(elem)))
|
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||||
elem.flags = item.flags
|
elem.flags = item.flags
|
||||||
elem.ksize = uint32(len(item.key))
|
elem.ksize = uint32(len(item.key))
|
||||||
elem.vsize = uint32(len(item.value))
|
elem.vsize = uint32(len(item.value))
|
||||||
} else {
|
} else {
|
||||||
elem := p.branchPageElement(uint16(i))
|
elem := p.branchPageElement(uint16(i))
|
||||||
elem.pos = uint32(bp - uintptr(unsafe.Pointer(elem)))
|
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||||
elem.ksize = uint32(len(item.key))
|
elem.ksize = uint32(len(item.key))
|
||||||
elem.pgid = item.pgid
|
elem.pgid = item.pgid
|
||||||
_assert(elem.pgid != p.id, "write: circular dependency occurred")
|
_assert(elem.pgid != p.id, "write: circular dependency occurred")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a slice to write into of needed size and advance
|
|
||||||
// byte pointer for next iteration.
|
|
||||||
klen, vlen := len(item.key), len(item.value)
|
|
||||||
sz := klen + vlen
|
|
||||||
b := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
|
||||||
Data: bp,
|
|
||||||
Len: sz,
|
|
||||||
Cap: sz,
|
|
||||||
}))
|
|
||||||
bp += uintptr(sz)
|
|
||||||
|
|
||||||
// Write data for the element to the end of the page.
|
// Write data for the element to the end of the page.
|
||||||
l := copy(b, item.key)
|
l := copy(b, item.key)
|
||||||
copy(b[l:], item.value)
|
copy(b[l:], item.value)
|
||||||
|
|
|
@ -44,9 +44,9 @@ func TestNode_read_LeafPage(t *testing.T) {
|
||||||
nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4
|
nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4
|
||||||
|
|
||||||
// Write data for the nodes at the end.
|
// Write data for the nodes at the end.
|
||||||
data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
|
const s = "barfoozhelloworldbye"
|
||||||
copy(data[:], "barfooz")
|
data := unsafeByteSlice(unsafe.Pointer(&nodes[2]), 0, 0, len(s))
|
||||||
copy(data[7:], "helloworldbye")
|
copy(data, s)
|
||||||
|
|
||||||
// Deserialize page into a leaf.
|
// Deserialize page into a leaf.
|
||||||
n := &node{}
|
n := &node{}
|
||||||
|
|
53
page.go
53
page.go
|
@ -3,7 +3,6 @@ package bbolt
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
|
||||||
"sort"
|
"sort"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
@ -51,13 +50,13 @@ func (p *page) typ() string {
|
||||||
|
|
||||||
// meta returns a pointer to the metadata section of the page.
|
// meta returns a pointer to the metadata section of the page.
|
||||||
func (p *page) meta() *meta {
|
func (p *page) meta() *meta {
|
||||||
return (*meta)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p)))
|
return (*meta)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
|
||||||
}
|
}
|
||||||
|
|
||||||
// leafPageElement retrieves the leaf node by index
|
// leafPageElement retrieves the leaf node by index
|
||||||
func (p *page) leafPageElement(index uint16) *leafPageElement {
|
func (p *page) leafPageElement(index uint16) *leafPageElement {
|
||||||
off := uintptr(index) * leafPageElementSize
|
return (*leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
|
||||||
return (*leafPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + off))
|
leafPageElementSize, int(index)))
|
||||||
}
|
}
|
||||||
|
|
||||||
// leafPageElements retrieves a list of leaf nodes.
|
// leafPageElements retrieves a list of leaf nodes.
|
||||||
|
@ -65,17 +64,14 @@ func (p *page) leafPageElements() []leafPageElement {
|
||||||
if p.count == 0 {
|
if p.count == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return *(*[]leafPageElement)(unsafe.Pointer(&reflect.SliceHeader{
|
return (*[maxAllocSize]leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
|
||||||
Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p),
|
unsafe.Sizeof(leafPageElement{}), 0))[:p.count:p.count]
|
||||||
Len: int(p.count),
|
|
||||||
Cap: int(p.count),
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// branchPageElement retrieves the branch node by index
|
// branchPageElement retrieves the branch node by index
|
||||||
func (p *page) branchPageElement(index uint16) *branchPageElement {
|
func (p *page) branchPageElement(index uint16) *branchPageElement {
|
||||||
off := uintptr(index) * unsafe.Sizeof(branchPageElement{})
|
return (*branchPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
|
||||||
return (*branchPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + off))
|
unsafe.Sizeof(branchPageElement{}), int(index)))
|
||||||
}
|
}
|
||||||
|
|
||||||
// branchPageElements retrieves a list of branch nodes.
|
// branchPageElements retrieves a list of branch nodes.
|
||||||
|
@ -83,20 +79,13 @@ func (p *page) branchPageElements() []branchPageElement {
|
||||||
if p.count == 0 {
|
if p.count == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return *(*[]branchPageElement)(unsafe.Pointer(&reflect.SliceHeader{
|
return (*[maxAllocSize]branchPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
|
||||||
Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p),
|
unsafe.Sizeof(branchPageElement{}), 0))[:p.count:p.count]
|
||||||
Len: int(p.count),
|
|
||||||
Cap: int(p.count),
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// dump writes n bytes of the page to STDERR as hex output.
|
// dump writes n bytes of the page to STDERR as hex output.
|
||||||
func (p *page) hexdump(n int) {
|
func (p *page) hexdump(n int) {
|
||||||
buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, n)
|
||||||
Data: uintptr(unsafe.Pointer(p)),
|
|
||||||
Len: n,
|
|
||||||
Cap: n,
|
|
||||||
}))
|
|
||||||
fmt.Fprintf(os.Stderr, "%x\n", buf)
|
fmt.Fprintf(os.Stderr, "%x\n", buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,11 +104,7 @@ type branchPageElement struct {
|
||||||
|
|
||||||
// key returns a byte slice of the node key.
|
// key returns a byte slice of the node key.
|
||||||
func (n *branchPageElement) key() []byte {
|
func (n *branchPageElement) key() []byte {
|
||||||
return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
return unsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize))
|
||||||
Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos),
|
|
||||||
Len: int(n.ksize),
|
|
||||||
Cap: int(n.ksize),
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// leafPageElement represents a node on a leaf page.
|
// leafPageElement represents a node on a leaf page.
|
||||||
|
@ -132,20 +117,16 @@ type leafPageElement struct {
|
||||||
|
|
||||||
// key returns a byte slice of the node key.
|
// key returns a byte slice of the node key.
|
||||||
func (n *leafPageElement) key() []byte {
|
func (n *leafPageElement) key() []byte {
|
||||||
return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
i := int(n.pos)
|
||||||
Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos),
|
j := i + int(n.ksize)
|
||||||
Len: int(n.ksize),
|
return unsafeByteSlice(unsafe.Pointer(n), 0, i, j)
|
||||||
Cap: int(n.ksize),
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// value returns a byte slice of the node value.
|
// value returns a byte slice of the node value.
|
||||||
func (n *leafPageElement) value() []byte {
|
func (n *leafPageElement) value() []byte {
|
||||||
return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
i := int(n.pos) + int(n.ksize)
|
||||||
Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos) + uintptr(n.ksize),
|
j := i + int(n.vsize)
|
||||||
Len: int(n.vsize),
|
return unsafeByteSlice(unsafe.Pointer(n), 0, i, j)
|
||||||
Cap: int(n.vsize),
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// PageInfo represents human readable information about a page.
|
// PageInfo represents human readable information about a page.
|
||||||
|
|
27
tx.go
27
tx.go
|
@ -4,7 +4,6 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
@ -524,24 +523,18 @@ func (tx *Tx) write() error {
|
||||||
|
|
||||||
// Write pages to disk in order.
|
// Write pages to disk in order.
|
||||||
for _, p := range pages {
|
for _, p := range pages {
|
||||||
size := (int(p.overflow) + 1) * tx.db.pageSize
|
rem := (uint64(p.overflow) + 1) * uint64(tx.db.pageSize)
|
||||||
offset := int64(p.id) * int64(tx.db.pageSize)
|
offset := int64(p.id) * int64(tx.db.pageSize)
|
||||||
|
var written uintptr
|
||||||
|
|
||||||
// Write out page in "max allocation" sized chunks.
|
// Write out page in "max allocation" sized chunks.
|
||||||
ptr := uintptr(unsafe.Pointer(p))
|
|
||||||
for {
|
for {
|
||||||
// Limit our write to our max allocation size.
|
sz := rem
|
||||||
sz := size
|
|
||||||
if sz > maxAllocSize-1 {
|
if sz > maxAllocSize-1 {
|
||||||
sz = maxAllocSize - 1
|
sz = maxAllocSize - 1
|
||||||
}
|
}
|
||||||
|
buf := unsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz))
|
||||||
|
|
||||||
// Write chunk to disk.
|
|
||||||
buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
|
||||||
Data: ptr,
|
|
||||||
Len: sz,
|
|
||||||
Cap: sz,
|
|
||||||
}))
|
|
||||||
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
|
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -550,14 +543,14 @@ func (tx *Tx) write() error {
|
||||||
tx.stats.Write++
|
tx.stats.Write++
|
||||||
|
|
||||||
// Exit inner for loop if we've written all the chunks.
|
// Exit inner for loop if we've written all the chunks.
|
||||||
size -= sz
|
rem -= sz
|
||||||
if size == 0 {
|
if rem == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise move offset forward and move pointer to next chunk.
|
// Otherwise move offset forward and move pointer to next chunk.
|
||||||
offset += int64(sz)
|
offset += int64(sz)
|
||||||
ptr += uintptr(sz)
|
written += uintptr(sz)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -576,11 +569,7 @@ func (tx *Tx) write() error {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize)
|
||||||
Data: uintptr(unsafe.Pointer(p)),
|
|
||||||
Len: tx.db.pageSize,
|
|
||||||
Cap: tx.db.pageSize,
|
|
||||||
}))
|
|
||||||
|
|
||||||
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
|
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
|
||||||
for i := range buf {
|
for i := range buf {
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
package bbolt
|
||||||
|
|
||||||
|
import "unsafe"
|
||||||
|
|
||||||
|
func unsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer {
|
||||||
|
return unsafe.Pointer(uintptr(base) + offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
func unsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer {
|
||||||
|
return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz)
|
||||||
|
}
|
||||||
|
|
||||||
|
func unsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte {
|
||||||
|
return (*[maxAllocSize]byte)(unsafeAdd(base, offset))[i:j:j]
|
||||||
|
}
|
Loading…
Reference in New Issue