use segregated hashmap to boost the freelist allocate and release performance (#141)

pull/143/head
Xingyu Chen 2019-01-26 02:30:05 +08:00 committed by Xiang Li
parent 26245f2a4c
commit 8693da9f4d
7 changed files with 450 additions and 48 deletions

View File

@ -5,7 +5,9 @@ GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)"
default: build
race:
@go test -v -race -test.run="TestSimulate_(100op|1000op)"
@TEST_FREELIST_TYPE=hashmap go test -v -race -test.run="TestSimulate_(100op|1000op)"
@echo "array freelist test"
@TEST_FREELIST_TYPE=array go test -v -race -test.run="TestSimulate_(100op|1000op)"
fmt:
!(gofmt -l -s -d $(shell find . -name \*.go) | grep '[a-z]')
@ -23,8 +25,14 @@ errcheck:
@errcheck -ignorepkg=bytes -ignore=os:Remove go.etcd.io/bbolt
test:
go test -timeout 20m -v -coverprofile cover.out -covermode atomic
TEST_FREELIST_TYPE=hashmap go test -timeout 20m -v -coverprofile cover.out -covermode atomic
# Note: gets "program not an importable package" in out of path builds
go test -v ./cmd/bbolt
TEST_FREELIST_TYPE=hashmap go test -v ./cmd/bbolt
@echo "array freelist test"
@TEST_FREELIST_TYPE=array go test -timeout 20m -v -coverprofile cover.out -covermode atomic
# Note: gets "program not an importable package" in out of path builds
@TEST_FREELIST_TYPE=array go test -v ./cmd/bbolt
.PHONY: race fmt errcheck test gosimple unused

View File

@ -5,7 +5,7 @@ import (
)
func TestTx_allocatePageStats(t *testing.T) {
f := newFreelist()
f := newTestFreelist()
ids := []pgid{2, 3}
f.readIDs(ids)

34
db.go
View File

@ -43,6 +43,16 @@ var defaultPageSize = os.Getpagesize()
// The time elapsed between consecutive file locking attempts.
const flockRetryTimeout = 50 * time.Millisecond
// FreelistType is the type of the freelist backend
type FreelistType string
const (
// FreelistArrayType indicates backend freelist type is array
FreelistArrayType = FreelistType("array")
// FreelistMapType indicates backend freelist type is hashmap
FreelistMapType = FreelistType("hashmap")
)
// DB represents a collection of buckets persisted to a file on disk.
// All data access is performed through transactions which can be obtained through the DB.
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
@ -70,6 +80,13 @@ type DB struct {
// re-sync during recovery.
NoFreelistSync bool
// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and framentation in freelist is common.
// The alternative one is using hashmap, it is faster in almost all circumstances
// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
// The default type is array
FreelistType FreelistType
// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
@ -169,6 +186,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
db.NoGrowSync = options.NoGrowSync
db.MmapFlags = options.MmapFlags
db.NoFreelistSync = options.NoFreelistSync
db.FreelistType = options.FreelistType
// Set default values for later DB operations.
db.MaxBatchSize = DefaultMaxBatchSize
@ -283,7 +301,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
// concurrent accesses being made to the freelist.
func (db *DB) loadFreelist() {
db.freelistLoad.Do(func() {
db.freelist = newFreelist()
db.freelist = newFreelist(db.FreelistType)
if !db.hasSyncedFreelist() {
// Reconstruct free list by scanning the DB.
db.freelist.readIDs(db.freepages())
@ -291,7 +309,7 @@ func (db *DB) loadFreelist() {
// Read free list from freelist page.
db.freelist.read(db.page(db.meta().freelist))
}
db.stats.FreePageN = len(db.freelist.getFreePageIDs())
db.stats.FreePageN = db.freelist.free_count()
})
}
@ -1005,6 +1023,13 @@ type Options struct {
// under normal operation, but requires a full database re-sync during recovery.
NoFreelistSync bool
// FreelistType sets the backend freelist type. There are two options. Array which is simple but endures
// dramatic performance degradation if database is large and framentation in freelist is common.
// The alternative one is using hashmap, it is faster in almost all circumstances
// but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe.
// The default type is array
FreelistType FreelistType
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool
@ -1034,8 +1059,9 @@ type Options struct {
// DefaultOptions represent the options used if nil options are passed into Open().
// No timeout is used which will cause Bolt to wait indefinitely for a lock.
var DefaultOptions = &Options{
Timeout: 0,
NoGrowSync: false,
Timeout: 0,
NoGrowSync: false,
FreelistType: FreelistArrayType,
}
// Stats represents statistics about the database.

View File

@ -1605,6 +1605,16 @@ func MustOpenDB() *DB {
// MustOpenDBWithOption returns a new, open DB at a temporary location with given options.
func MustOpenWithOption(o *bolt.Options) *DB {
f := tempfile()
if o == nil {
o = bolt.DefaultOptions
}
freelistType := bolt.FreelistArrayType
if env := os.Getenv(bolt.TestFreelistType); env == string(bolt.FreelistMapType) {
freelistType = bolt.FreelistMapType
}
o.FreelistType = freelistType
db, err := bolt.Open(f, 0666, o)
if err != nil {
panic(err)

View File

@ -14,22 +14,54 @@ type txPending struct {
lastReleaseBegin txid // beginning txid of last matching releaseRange
}
// pidSet holds the set of starting pgids which have the same span size
type pidSet map[pgid]struct{}
// freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct {
ids []pgid // all free and available free page ids.
allocs map[pgid]txid // mapping of txid that allocated a pgid.
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[pgid]bool // fast lookup of all free and pending page ids.
freelistType FreelistType // freelist type
ids []pgid // all free and available free page ids.
allocs map[pgid]txid // mapping of txid that allocated a pgid.
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
cache map[pgid]bool // fast lookup of all free and pending page ids.
freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size
forwardMap map[pgid]uint64 // key is start pgid, value is its span size
backwardMap map[pgid]uint64 // key is end pgid, value is its span size
allocate func(txid txid, n int) pgid // the freelist allocate func
free_count func() int // the function which gives you free page number
mergeSpans func(ids pgids) // the mergeSpan func
getFreePageIDs func() []pgid // get free pgids func
readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist
}
// newFreelist returns an empty, initialized freelist.
func newFreelist() *freelist {
return &freelist{
allocs: make(map[pgid]txid),
pending: make(map[txid]*txPending),
cache: make(map[pgid]bool),
func newFreelist(freelistType FreelistType) *freelist {
f := &freelist{
freelistType: freelistType,
allocs: make(map[pgid]txid),
pending: make(map[txid]*txPending),
cache: make(map[pgid]bool),
freemaps: make(map[uint64]pidSet),
forwardMap: make(map[pgid]uint64),
backwardMap: make(map[pgid]uint64),
}
if freelistType == FreelistMapType {
f.allocate = f.hashmapAllocate
f.free_count = f.hashmapFreeCount
f.mergeSpans = f.hashmapMergeSpans
f.getFreePageIDs = f.hashmapGetFreePageIDs
f.readIDs = f.hashmapReadIDs
} else {
f.allocate = f.arrayAllocate
f.free_count = f.arrayFreeCount
f.mergeSpans = f.arrayMergeSpans
f.getFreePageIDs = f.arrayGetFreePageIDs
f.readIDs = f.arrayReadIDs
}
return f
}
// size returns the size of the page after serialization.
@ -47,8 +79,8 @@ func (f *freelist) count() int {
return f.free_count() + f.pending_count()
}
// free_count returns count of free pages
func (f *freelist) free_count() int {
// arrayFreeCount returns count of free pages(array version)
func (f *freelist) arrayFreeCount() int {
return len(f.ids)
}
@ -72,9 +104,9 @@ func (f *freelist) copyall(dst []pgid) {
mergepgids(dst, f.getFreePageIDs(), m)
}
// allocate returns the starting page id of a contiguous list of pages of a given size.
// arrayAllocate returns the starting page id of a contiguous list of pages of a given size.
// If a contiguous block cannot be found then 0 is returned.
func (f *freelist) allocate(txid txid, n int) pgid {
func (f *freelist) arrayAllocate(txid txid, n int) pgid {
if len(f.ids) == 0 {
return 0
}
@ -160,8 +192,7 @@ func (f *freelist) release(txid txid) {
delete(f.pending, tid)
}
}
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
f.mergeSpans(m)
}
// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
@ -194,8 +225,7 @@ func (f *freelist) releaseRange(begin, end txid) {
delete(f.pending, tid)
}
}
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
f.mergeSpans(m)
}
// rollback removes the pages from a given pending tx.
@ -222,8 +252,7 @@ func (f *freelist) rollback(txid txid) {
}
// Remove pages from pending list and mark as free if allocated by txid.
delete(f.pending, txid)
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
f.mergeSpans(m)
}
// freed returns whether a given page is in the free list.
@ -249,24 +278,24 @@ func (f *freelist) read(p *page) {
f.ids = nil
} else {
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count]
f.ids = make([]pgid, len(ids))
copy(f.ids, ids)
// copy the ids, so we don't modify on the freelist page directly
idsCopy := make([]pgid, count)
copy(idsCopy, ids)
// Make sure they're sorted.
sort.Sort(pgids(f.ids))
}
sort.Sort(pgids(idsCopy))
// Rebuild the page cache.
f.reindex()
f.readIDs(idsCopy)
}
}
// readIDs initializes the freelist from a given list of ids.
func (f *freelist) readIDs(ids []pgid) {
// arrayReadIDs initializes the freelist from a given list of ids.
func (f *freelist) arrayReadIDs(ids []pgid) {
f.ids = ids
f.reindex()
}
func (f *freelist) getFreePageIDs() []pgid {
func (f *freelist) arrayGetFreePageIDs() []pgid {
return f.ids
}
@ -322,8 +351,9 @@ func (f *freelist) reload(p *page) {
// reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() {
f.cache = make(map[pgid]bool, len(f.getFreePageIDs()))
for _, id := range f.getFreePageIDs() {
ids := f.getFreePageIDs()
f.cache = make(map[pgid]bool, len(ids))
for _, id := range ids {
f.cache[id] = true
}
for _, txp := range f.pending {
@ -332,3 +362,9 @@ func (f *freelist) reindex() {
}
}
}
// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array
func (f *freelist) arrayMergeSpans(ids pgids) {
sort.Sort(ids)
f.ids = pgids(f.ids).merge(ids)
}

178
freelist_hmap.go Normal file
View File

@ -0,0 +1,178 @@
package bbolt
import "sort"
// hashmapFreeCount returns count of free pages(hashmap version)
func (f *freelist) hashmapFreeCount() int {
// use the forwardmap to get the total count
count := 0
for _, size := range f.forwardMap {
count += int(size)
}
return count
}
// hashmapAllocate serves the same purpose as arrayAllocate, but use hashmap as backend
func (f *freelist) hashmapAllocate(txid txid, n int) pgid {
if n == 0 {
return 0
}
// if we have a exact size match just return short path
if bm, ok := f.freemaps[uint64(n)]; ok {
for pid := range bm {
// remove the span
f.delSpan(pid, uint64(n))
f.allocs[pid] = txid
for i := pgid(0); i < pgid(n); i++ {
delete(f.cache, pid+pgid(i))
}
return pid
}
}
// lookup the map to find larger span
for size, bm := range f.freemaps {
if size < uint64(n) {
continue
}
for pid := range bm {
// remove the initial
f.delSpan(pid, uint64(size))
f.allocs[pid] = txid
remain := size - uint64(n)
// add remain span
f.addSpan(pid+pgid(n), remain)
for i := pgid(0); i < pgid(n); i++ {
delete(f.cache, pid+pgid(i))
}
return pid
}
}
return 0
}
// hashmapReadIDs reads pgids as input an initial the freelist(hashmap version)
func (f *freelist) hashmapReadIDs(pgids []pgid) {
f.init(pgids)
// Rebuild the page cache.
f.reindex()
}
// hashmapGetFreePageIDs returns the sorted free page ids
func (f *freelist) hashmapGetFreePageIDs() []pgid {
count := f.free_count()
if count == 0 {
return nil
}
m := make([]pgid, 0, count)
for start, size := range f.forwardMap {
for i := 0; i < int(size); i++ {
m = append(m, start+pgid(i))
}
}
sort.Sort(pgids(m))
return m
}
// hashmapMergeSpans try to merge list of pages(represented by pgids) with existing spans
func (f *freelist) hashmapMergeSpans(ids pgids) {
for _, id := range ids {
// try to see if we can merge and update
f.mergeWithExistingSpan(id)
}
}
// mergeWithExistingSpan merges pid to the existing free spans, try to merge it backward and forward
func (f *freelist) mergeWithExistingSpan(pid pgid) {
prev := pid - 1
next := pid + 1
preSize, mergeWithPrev := f.backwardMap[prev]
nextSize, mergeWithNext := f.forwardMap[next]
newStart := pid
newSize := uint64(1)
if mergeWithPrev {
//merge with previous span
start := prev + 1 - pgid(preSize)
f.delSpan(start, preSize)
newStart -= pgid(preSize)
newSize += preSize
}
if mergeWithNext {
// merge with next span
f.delSpan(next, nextSize)
newSize += nextSize
}
f.addSpan(newStart, newSize)
}
func (f *freelist) addSpan(start pgid, size uint64) {
f.backwardMap[start-1+pgid(size)] = size
f.forwardMap[start] = size
if _, ok := f.freemaps[size]; !ok {
f.freemaps[size] = make(map[pgid]struct{})
}
f.freemaps[size][start] = struct{}{}
}
func (f *freelist) delSpan(start pgid, size uint64) {
delete(f.forwardMap, start)
delete(f.backwardMap, start+pgid(size-1))
delete(f.freemaps[size], start)
if len(f.freemaps[size]) == 0 {
delete(f.freemaps, size)
}
}
// initial from pgids using when use hashmap version
// pgids must be sorted
func (f *freelist) init(pgids []pgid) {
if len(pgids) == 0 {
return
}
size := uint64(1)
start := pgids[0]
if !sort.SliceIsSorted([]pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) {
panic("pgids not sorted")
}
f.freemaps = make(map[uint64]pidSet)
f.forwardMap = make(map[pgid]uint64)
f.backwardMap = make(map[pgid]uint64)
for i := 1; i < len(pgids); i++ {
// continuous page
if pgids[i] == pgids[i-1]+1 {
size++
} else {
f.addSpan(start, size)
size = 1
start = pgids[i]
}
}
// init the tail
if size != 0 && start != 0 {
f.addSpan(start, size)
}
}

View File

@ -2,15 +2,19 @@ package bbolt
import (
"math/rand"
"os"
"reflect"
"sort"
"testing"
"unsafe"
)
// TestFreelistType is used as a env variable for test to indicate the backend type
const TestFreelistType = "TEST_FREELIST_TYPE"
// Ensure that a page is added to a transaction's freelist.
func TestFreelist_free(t *testing.T) {
f := newFreelist()
f := newTestFreelist()
f.free(100, &page{id: 12})
if !reflect.DeepEqual([]pgid{12}, f.pending[100].ids) {
t.Fatalf("exp=%v; got=%v", []pgid{12}, f.pending[100])
@ -19,7 +23,7 @@ func TestFreelist_free(t *testing.T) {
// Ensure that a page and its overflow is added to a transaction's freelist.
func TestFreelist_free_overflow(t *testing.T) {
f := newFreelist()
f := newTestFreelist()
f.free(100, &page{id: 12, overflow: 3})
if exp := []pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100].ids) {
t.Fatalf("exp=%v; got=%v", exp, f.pending[100])
@ -28,7 +32,7 @@ func TestFreelist_free_overflow(t *testing.T) {
// Ensure that a transaction's free pages can be released.
func TestFreelist_release(t *testing.T) {
f := newFreelist()
f := newTestFreelist()
f.free(100, &page{id: 12, overflow: 1})
f.free(100, &page{id: 9})
f.free(102, &page{id: 39})
@ -147,7 +151,7 @@ func TestFreelist_releaseRange(t *testing.T) {
}
for _, c := range releaseRangeTests {
f := newFreelist()
f := newTestFreelist()
var ids []pgid
for _, p := range c.pagesIn {
for i := uint64(0); i < uint64(p.n); i++ {
@ -173,9 +177,41 @@ func TestFreelist_releaseRange(t *testing.T) {
}
}
func TestFreelistHashmap_allocate(t *testing.T) {
f := newTestFreelist()
if f.freelistType != FreelistMapType {
t.Skip()
}
ids := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
f.readIDs(ids)
f.allocate(1, 3)
if x := f.free_count(); x != 6 {
t.Fatalf("exp=5; got=%v", x)
}
f.allocate(1, 2)
if x := f.free_count(); x != 4 {
t.Fatalf("exp=3; got=%v", x)
}
f.allocate(1, 1)
if x := f.free_count(); x != 3 {
t.Fatalf("exp=2; got=%v", x)
}
f.allocate(1, 0)
if x := f.free_count(); x != 3 {
t.Fatalf("exp=2; got=%v", x)
}
}
// Ensure that a freelist can find contiguous blocks of pages.
func TestFreelist_allocate(t *testing.T) {
f := newFreelist()
func TestFreelistArray_allocate(t *testing.T) {
f := newTestFreelist()
if f.freelistType != FreelistArrayType {
t.Skip()
}
ids := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
f.readIDs(ids)
if id := int(f.allocate(1, 3)); id != 3 {
@ -231,7 +267,7 @@ func TestFreelist_read(t *testing.T) {
ids[1] = 50
// Deserialize page into a freelist.
f := newFreelist()
f := newTestFreelist()
f.read(page)
// Ensure that there are two page ids in the freelist.
@ -244,7 +280,9 @@ func TestFreelist_read(t *testing.T) {
func TestFreelist_write(t *testing.T) {
// Create a freelist and write it to a page.
var buf [4096]byte
f := &freelist{ids: []pgid{12, 39}, pending: make(map[txid]*txPending)}
f := newTestFreelist()
f.readIDs([]pgid{12, 39})
f.pending[100] = &txPending{ids: []pgid{28, 11}}
f.pending[101] = &txPending{ids: []pgid{3}}
p := (*page)(unsafe.Pointer(&buf[0]))
@ -253,7 +291,7 @@ func TestFreelist_write(t *testing.T) {
}
// Read the page back out.
f2 := newFreelist()
f2 := newTestFreelist()
f2.read(p)
// Ensure that the freelist is correct.
@ -274,7 +312,9 @@ func benchmark_FreelistRelease(b *testing.B, size int) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
txp := &txPending{ids: pending}
f := &freelist{ids: ids, pending: map[txid]*txPending{1: txp}}
f := newTestFreelist()
f.pending = map[txid]*txPending{1: txp}
f.readIDs(ids)
f.release(1)
}
}
@ -288,3 +328,107 @@ func randomPgids(n int) []pgid {
sort.Sort(pgids)
return pgids
}
func Test_freelist_ReadIDs_and_getFreePageIDs(t *testing.T) {
f := newTestFreelist()
exp := []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
f.readIDs(exp)
if got := f.getFreePageIDs(); !reflect.DeepEqual(exp, got) {
t.Fatalf("exp=%v; got=%v", exp, got)
}
f2 := newTestFreelist()
var exp2 []pgid
f2.readIDs(exp2)
if got2 := f2.getFreePageIDs(); !reflect.DeepEqual(got2, exp2) {
t.Fatalf("exp2=%#v; got2=%#v", exp2, got2)
}
}
func Test_freelist_mergeWithExist(t *testing.T) {
bm1 := pidSet{1: struct{}{}}
bm2 := pidSet{5: struct{}{}}
tests := []struct {
name string
ids []pgid
pgid pgid
want []pgid
wantForwardmap map[pgid]uint64
wantBackwardmap map[pgid]uint64
wantfreemap map[uint64]pidSet
}{
{
name: "test1",
ids: []pgid{1, 2, 4, 5, 6},
pgid: 3,
want: []pgid{1, 2, 3, 4, 5, 6},
wantForwardmap: map[pgid]uint64{1: 6},
wantBackwardmap: map[pgid]uint64{6: 6},
wantfreemap: map[uint64]pidSet{6: bm1},
},
{
name: "test2",
ids: []pgid{1, 2, 5, 6},
pgid: 3,
want: []pgid{1, 2, 3, 5, 6},
wantForwardmap: map[pgid]uint64{1: 3, 5: 2},
wantBackwardmap: map[pgid]uint64{6: 2, 3: 3},
wantfreemap: map[uint64]pidSet{3: bm1, 2: bm2},
},
{
name: "test3",
ids: []pgid{1, 2},
pgid: 3,
want: []pgid{1, 2, 3},
wantForwardmap: map[pgid]uint64{1: 3},
wantBackwardmap: map[pgid]uint64{3: 3},
wantfreemap: map[uint64]pidSet{3: bm1},
},
{
name: "test4",
ids: []pgid{2, 3},
pgid: 1,
want: []pgid{1, 2, 3},
wantForwardmap: map[pgid]uint64{1: 3},
wantBackwardmap: map[pgid]uint64{3: 3},
wantfreemap: map[uint64]pidSet{3: bm1},
},
}
for _, tt := range tests {
f := newTestFreelist()
if f.freelistType == FreelistArrayType {
t.Skip()
}
f.readIDs(tt.ids)
f.mergeWithExistingSpan(tt.pgid)
if got := f.getFreePageIDs(); !reflect.DeepEqual(tt.want, got) {
t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.want, got)
}
if got := f.forwardMap; !reflect.DeepEqual(tt.wantForwardmap, got) {
t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantForwardmap, got)
}
if got := f.backwardMap; !reflect.DeepEqual(tt.wantBackwardmap, got) {
t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantBackwardmap, got)
}
if got := f.freemaps; !reflect.DeepEqual(tt.wantfreemap, got) {
t.Fatalf("name %s; exp=%v; got=%v", tt.name, tt.wantfreemap, got)
}
}
}
// newTestFreelist get the freelist type from env and initial the freelist
func newTestFreelist() *freelist {
freelistType := FreelistArrayType
if env := os.Getenv(TestFreelistType); env == string(FreelistMapType) {
freelistType = FreelistMapType
}
return newFreelist(freelistType)
}