mirror of https://github.com/etcd-io/bbolt.git
Fix incorrect unsafe usage
After checkptr fixes by 2fc6815c
, it was discovered that new issues
were hit in production systems, in particular when a single process
opened and updated multiple separate databases. This indicates that
some bug relating to bad unsafe usage was introduced during this
commit.
This commit combines several attempts at fixing this new issue. For
example, slices are once again created by slicing an array of "max
allocation" elements, but this time with the cap set to the intended
length. This operation is espressly permitted according to the Go
wiki, so it should be preferred to type converting a
reflect.SliceHeader.
pull/220/head
parent
a8af23b57f
commit
f9d3ff6648
46
freelist.go
46
freelist.go
|
@ -2,7 +2,6 @@ package bbolt
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"unsafe"
|
||||
)
|
||||
|
@ -94,24 +93,8 @@ func (f *freelist) pending_count() int {
|
|||
return count
|
||||
}
|
||||
|
||||
// copyallunsafe copies a list of all free ids and all pending ids in one sorted list.
|
||||
// copyall copies a list of all free ids and all pending ids in one sorted list.
|
||||
// f.count returns the minimum length required for dst.
|
||||
func (f *freelist) copyallunsafe(dstptr unsafe.Pointer) { // dstptr is []pgid data pointer
|
||||
m := make(pgids, 0, f.pending_count())
|
||||
for _, txp := range f.pending {
|
||||
m = append(m, txp.ids...)
|
||||
}
|
||||
sort.Sort(m)
|
||||
fpgids := f.getFreePageIDs()
|
||||
sz := len(fpgids) + len(m)
|
||||
dst := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(dstptr),
|
||||
Len: sz,
|
||||
Cap: sz,
|
||||
}))
|
||||
mergepgids(dst, fpgids, m)
|
||||
}
|
||||
|
||||
func (f *freelist) copyall(dst []pgid) {
|
||||
m := make(pgids, 0, f.pending_count())
|
||||
for _, txp := range f.pending {
|
||||
|
@ -287,18 +270,15 @@ func (f *freelist) read(p *page) {
|
|||
var idx, count uintptr = 0, uintptr(p.count)
|
||||
if count == 0xFFFF {
|
||||
idx = 1
|
||||
count = uintptr(*(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))))
|
||||
count = uintptr(*(*pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))))
|
||||
}
|
||||
|
||||
// Copy the list of page ids from the freelist.
|
||||
if count == 0 {
|
||||
f.ids = nil
|
||||
} else {
|
||||
ids := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + idx*unsafe.Sizeof(pgid(0)),
|
||||
Len: int(count),
|
||||
Cap: int(count),
|
||||
}))
|
||||
ids := (*[maxAllocSize]pgid)(unsafeAdd(
|
||||
unsafe.Pointer(p), unsafe.Sizeof(*p)))[idx : idx+count : idx+count]
|
||||
|
||||
// copy the ids, so we don't modify on the freelist page directly
|
||||
idsCopy := make([]pgid, count)
|
||||
|
@ -331,16 +311,18 @@ func (f *freelist) write(p *page) error {
|
|||
|
||||
// The page.count can only hold up to 64k elements so if we overflow that
|
||||
// number then we handle it by putting the size in the first element.
|
||||
lenids := f.count()
|
||||
if lenids == 0 {
|
||||
p.count = uint16(lenids)
|
||||
} else if lenids < 0xFFFF {
|
||||
p.count = uint16(lenids)
|
||||
f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p)))
|
||||
l := f.count()
|
||||
if l == 0 {
|
||||
p.count = uint16(l)
|
||||
} else if l < 0xFFFF {
|
||||
p.count = uint16(l)
|
||||
ids := (*[maxAllocSize]pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))[:l:l]
|
||||
f.copyall(ids)
|
||||
} else {
|
||||
p.count = 0xFFFF
|
||||
*(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))) = pgid(lenids)
|
||||
f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + unsafe.Sizeof(pgid(0))))
|
||||
ids := (*[maxAllocSize]pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))[: l+1 : l+1]
|
||||
ids[0] = pgid(l)
|
||||
f.copyall(ids[1:])
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
25
node.go
25
node.go
|
@ -3,7 +3,6 @@ package bbolt
|
|||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"unsafe"
|
||||
)
|
||||
|
@ -208,36 +207,32 @@ func (n *node) write(p *page) {
|
|||
}
|
||||
|
||||
// Loop over each item and write it to the page.
|
||||
bp := uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes))
|
||||
// off tracks the offset into p of the start of the next data.
|
||||
off := unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes))
|
||||
for i, item := range n.inodes {
|
||||
_assert(len(item.key) > 0, "write: zero-length inode key")
|
||||
|
||||
// Create a slice to write into of needed size and advance
|
||||
// byte pointer for next iteration.
|
||||
sz := len(item.key) + len(item.value)
|
||||
b := unsafeByteSlice(unsafe.Pointer(p), off, 0, sz)
|
||||
off += uintptr(sz)
|
||||
|
||||
// Write the page element.
|
||||
if n.isLeaf {
|
||||
elem := p.leafPageElement(uint16(i))
|
||||
elem.pos = uint32(bp - uintptr(unsafe.Pointer(elem)))
|
||||
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||
elem.flags = item.flags
|
||||
elem.ksize = uint32(len(item.key))
|
||||
elem.vsize = uint32(len(item.value))
|
||||
} else {
|
||||
elem := p.branchPageElement(uint16(i))
|
||||
elem.pos = uint32(bp - uintptr(unsafe.Pointer(elem)))
|
||||
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
|
||||
elem.ksize = uint32(len(item.key))
|
||||
elem.pgid = item.pgid
|
||||
_assert(elem.pgid != p.id, "write: circular dependency occurred")
|
||||
}
|
||||
|
||||
// Create a slice to write into of needed size and advance
|
||||
// byte pointer for next iteration.
|
||||
klen, vlen := len(item.key), len(item.value)
|
||||
sz := klen + vlen
|
||||
b := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: bp,
|
||||
Len: sz,
|
||||
Cap: sz,
|
||||
}))
|
||||
bp += uintptr(sz)
|
||||
|
||||
// Write data for the element to the end of the page.
|
||||
l := copy(b, item.key)
|
||||
copy(b[l:], item.value)
|
||||
|
|
|
@ -44,9 +44,9 @@ func TestNode_read_LeafPage(t *testing.T) {
|
|||
nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4
|
||||
|
||||
// Write data for the nodes at the end.
|
||||
data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
|
||||
copy(data[:], "barfooz")
|
||||
copy(data[7:], "helloworldbye")
|
||||
const s = "barfoozhelloworldbye"
|
||||
data := unsafeByteSlice(unsafe.Pointer(&nodes[2]), 0, 0, len(s))
|
||||
copy(data, s)
|
||||
|
||||
// Deserialize page into a leaf.
|
||||
n := &node{}
|
||||
|
|
53
page.go
53
page.go
|
@ -3,7 +3,6 @@ package bbolt
|
|||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"unsafe"
|
||||
)
|
||||
|
@ -51,13 +50,13 @@ func (p *page) typ() string {
|
|||
|
||||
// meta returns a pointer to the metadata section of the page.
|
||||
func (p *page) meta() *meta {
|
||||
return (*meta)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p)))
|
||||
return (*meta)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)))
|
||||
}
|
||||
|
||||
// leafPageElement retrieves the leaf node by index
|
||||
func (p *page) leafPageElement(index uint16) *leafPageElement {
|
||||
off := uintptr(index) * leafPageElementSize
|
||||
return (*leafPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + off))
|
||||
return (*leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
|
||||
leafPageElementSize, int(index)))
|
||||
}
|
||||
|
||||
// leafPageElements retrieves a list of leaf nodes.
|
||||
|
@ -65,17 +64,14 @@ func (p *page) leafPageElements() []leafPageElement {
|
|||
if p.count == 0 {
|
||||
return nil
|
||||
}
|
||||
return *(*[]leafPageElement)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p),
|
||||
Len: int(p.count),
|
||||
Cap: int(p.count),
|
||||
}))
|
||||
return (*[maxAllocSize]leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
|
||||
unsafe.Sizeof(leafPageElement{}), 0))[:p.count:p.count]
|
||||
}
|
||||
|
||||
// branchPageElement retrieves the branch node by index
|
||||
func (p *page) branchPageElement(index uint16) *branchPageElement {
|
||||
off := uintptr(index) * unsafe.Sizeof(branchPageElement{})
|
||||
return (*branchPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + off))
|
||||
return (*branchPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
|
||||
unsafe.Sizeof(branchPageElement{}), int(index)))
|
||||
}
|
||||
|
||||
// branchPageElements retrieves a list of branch nodes.
|
||||
|
@ -83,20 +79,13 @@ func (p *page) branchPageElements() []branchPageElement {
|
|||
if p.count == 0 {
|
||||
return nil
|
||||
}
|
||||
return *(*[]branchPageElement)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p),
|
||||
Len: int(p.count),
|
||||
Cap: int(p.count),
|
||||
}))
|
||||
return (*[maxAllocSize]branchPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p),
|
||||
unsafe.Sizeof(branchPageElement{}), 0))[:p.count:p.count]
|
||||
}
|
||||
|
||||
// dump writes n bytes of the page to STDERR as hex output.
|
||||
func (p *page) hexdump(n int) {
|
||||
buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(unsafe.Pointer(p)),
|
||||
Len: n,
|
||||
Cap: n,
|
||||
}))
|
||||
buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, n)
|
||||
fmt.Fprintf(os.Stderr, "%x\n", buf)
|
||||
}
|
||||
|
||||
|
@ -115,11 +104,7 @@ type branchPageElement struct {
|
|||
|
||||
// key returns a byte slice of the node key.
|
||||
func (n *branchPageElement) key() []byte {
|
||||
return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos),
|
||||
Len: int(n.ksize),
|
||||
Cap: int(n.ksize),
|
||||
}))
|
||||
return unsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize))
|
||||
}
|
||||
|
||||
// leafPageElement represents a node on a leaf page.
|
||||
|
@ -132,20 +117,16 @@ type leafPageElement struct {
|
|||
|
||||
// key returns a byte slice of the node key.
|
||||
func (n *leafPageElement) key() []byte {
|
||||
return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos),
|
||||
Len: int(n.ksize),
|
||||
Cap: int(n.ksize),
|
||||
}))
|
||||
i := int(n.pos)
|
||||
j := i + int(n.ksize)
|
||||
return unsafeByteSlice(unsafe.Pointer(n), 0, i, j)
|
||||
}
|
||||
|
||||
// value returns a byte slice of the node value.
|
||||
func (n *leafPageElement) value() []byte {
|
||||
return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos) + uintptr(n.ksize),
|
||||
Len: int(n.vsize),
|
||||
Cap: int(n.vsize),
|
||||
}))
|
||||
i := int(n.pos) + int(n.ksize)
|
||||
j := i + int(n.vsize)
|
||||
return unsafeByteSlice(unsafe.Pointer(n), 0, i, j)
|
||||
}
|
||||
|
||||
// PageInfo represents human readable information about a page.
|
||||
|
|
27
tx.go
27
tx.go
|
@ -4,7 +4,6 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -524,24 +523,18 @@ func (tx *Tx) write() error {
|
|||
|
||||
// Write pages to disk in order.
|
||||
for _, p := range pages {
|
||||
size := (int(p.overflow) + 1) * tx.db.pageSize
|
||||
rem := (uint64(p.overflow) + 1) * uint64(tx.db.pageSize)
|
||||
offset := int64(p.id) * int64(tx.db.pageSize)
|
||||
var written uintptr
|
||||
|
||||
// Write out page in "max allocation" sized chunks.
|
||||
ptr := uintptr(unsafe.Pointer(p))
|
||||
for {
|
||||
// Limit our write to our max allocation size.
|
||||
sz := size
|
||||
sz := rem
|
||||
if sz > maxAllocSize-1 {
|
||||
sz = maxAllocSize - 1
|
||||
}
|
||||
buf := unsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz))
|
||||
|
||||
// Write chunk to disk.
|
||||
buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: ptr,
|
||||
Len: sz,
|
||||
Cap: sz,
|
||||
}))
|
||||
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -550,14 +543,14 @@ func (tx *Tx) write() error {
|
|||
tx.stats.Write++
|
||||
|
||||
// Exit inner for loop if we've written all the chunks.
|
||||
size -= sz
|
||||
if size == 0 {
|
||||
rem -= sz
|
||||
if rem == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// Otherwise move offset forward and move pointer to next chunk.
|
||||
offset += int64(sz)
|
||||
ptr += uintptr(sz)
|
||||
written += uintptr(sz)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -576,11 +569,7 @@ func (tx *Tx) write() error {
|
|||
continue
|
||||
}
|
||||
|
||||
buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(unsafe.Pointer(p)),
|
||||
Len: tx.db.pageSize,
|
||||
Cap: tx.db.pageSize,
|
||||
}))
|
||||
buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize)
|
||||
|
||||
// See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1
|
||||
for i := range buf {
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
package bbolt
|
||||
|
||||
import "unsafe"
|
||||
|
||||
func unsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer {
|
||||
return unsafe.Pointer(uintptr(base) + offset)
|
||||
}
|
||||
|
||||
func unsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer {
|
||||
return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz)
|
||||
}
|
||||
|
||||
func unsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte {
|
||||
return (*[maxAllocSize]byte)(unsafeAdd(base, offset))[i:j:j]
|
||||
}
|
Loading…
Reference in New Issue