mirror of https://github.com/etcd-io/bbolt.git
Merge pull request #3 from heyitsanthony/range-gc
Garbage collect pages allocated after minimum txidpull/6/head
commit
ad39960eb4
36
db.go
36
db.go
|
@ -8,6 +8,7 @@ import (
|
|||
"os"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
@ -537,21 +538,36 @@ func (db *DB) beginRWTx() (*Tx, error) {
|
|||
t := &Tx{writable: true}
|
||||
t.init(db)
|
||||
db.rwtx = t
|
||||
db.freePages()
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// Free any pages associated with closed read-only transactions.
|
||||
var minid txid = 0xFFFFFFFFFFFFFFFF
|
||||
for _, t := range db.txs {
|
||||
if t.meta.txid < minid {
|
||||
minid = t.meta.txid
|
||||
}
|
||||
// freePages releases any pages associated with closed read-only transactions.
|
||||
func (db *DB) freePages() {
|
||||
// Free all pending pages prior to earliest open transaction.
|
||||
sort.Sort(txsById(db.txs))
|
||||
minid := txid(0xFFFFFFFFFFFFFFFF)
|
||||
if len(db.txs) > 0 {
|
||||
minid = db.txs[0].meta.txid
|
||||
}
|
||||
if minid > 0 {
|
||||
db.freelist.release(minid - 1)
|
||||
}
|
||||
|
||||
return t, nil
|
||||
// Release unused txid extents.
|
||||
for _, t := range db.txs {
|
||||
db.freelist.releaseRange(minid, t.meta.txid-1)
|
||||
minid = t.meta.txid + 1
|
||||
}
|
||||
db.freelist.releaseRange(minid, txid(0xFFFFFFFFFFFFFFFF))
|
||||
// Any page both allocated and freed in an extent is safe to release.
|
||||
}
|
||||
|
||||
type txsById []*Tx
|
||||
|
||||
func (t txsById) Len() int { return len(t) }
|
||||
func (t txsById) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
|
||||
func (t txsById) Less(i, j int) bool { return t[i].meta.txid < t[j].meta.txid }
|
||||
|
||||
// removeTx removes a transaction from the database.
|
||||
func (db *DB) removeTx(tx *Tx) {
|
||||
// Release the read lock on the mmap.
|
||||
|
@ -837,7 +853,7 @@ func (db *DB) meta() *meta {
|
|||
}
|
||||
|
||||
// allocate returns a contiguous block of memory starting at a given page.
|
||||
func (db *DB) allocate(count int) (*page, error) {
|
||||
func (db *DB) allocate(txid txid, count int) (*page, error) {
|
||||
// Allocate a temporary buffer for the page.
|
||||
var buf []byte
|
||||
if count == 1 {
|
||||
|
@ -849,7 +865,7 @@ func (db *DB) allocate(count int) (*page, error) {
|
|||
p.overflow = uint32(count - 1)
|
||||
|
||||
// Use pages from the freelist if they are available.
|
||||
if p.id = db.freelist.allocate(count); p.id != 0 {
|
||||
if p.id = db.freelist.allocate(txid, count); p.id != 0 {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
|
|
121
freelist.go
121
freelist.go
|
@ -6,18 +6,29 @@ import (
|
|||
"unsafe"
|
||||
)
|
||||
|
||||
|
||||
// txPending holds a list of pgids and corresponding allocation txns
|
||||
// that are pending to be freed.
|
||||
type txPending struct {
|
||||
ids []pgid
|
||||
alloctx []txid // txids allocating the ids
|
||||
lastReleaseBegin txid // beginning txid of last matching releaseRange
|
||||
}
|
||||
|
||||
// freelist represents a list of all pages that are available for allocation.
|
||||
// It also tracks pages that have been freed but are still in use by open transactions.
|
||||
type freelist struct {
|
||||
ids []pgid // all free and available free page ids.
|
||||
pending map[txid][]pgid // mapping of soon-to-be free page ids by tx.
|
||||
cache map[pgid]bool // fast lookup of all free and pending page ids.
|
||||
ids []pgid // all free and available free page ids.
|
||||
allocs map[pgid]txid // mapping of txid that allocated a pgid.
|
||||
pending map[txid]*txPending // mapping of soon-to-be free page ids by tx.
|
||||
cache map[pgid]bool // fast lookup of all free and pending page ids.
|
||||
}
|
||||
|
||||
// newFreelist returns an empty, initialized freelist.
|
||||
func newFreelist() *freelist {
|
||||
return &freelist{
|
||||
pending: make(map[txid][]pgid),
|
||||
allocs: make(map[pgid]txid),
|
||||
pending: make(map[txid]*txPending),
|
||||
cache: make(map[pgid]bool),
|
||||
}
|
||||
}
|
||||
|
@ -45,8 +56,8 @@ func (f *freelist) free_count() int {
|
|||
// pending_count returns count of pending pages
|
||||
func (f *freelist) pending_count() int {
|
||||
var count int
|
||||
for _, list := range f.pending {
|
||||
count += len(list)
|
||||
for _, txp := range f.pending {
|
||||
count += len(txp.ids)
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
@ -55,8 +66,8 @@ func (f *freelist) pending_count() int {
|
|||
// f.count returns the minimum length required for dst.
|
||||
func (f *freelist) copyall(dst []pgid) {
|
||||
m := make(pgids, 0, f.pending_count())
|
||||
for _, list := range f.pending {
|
||||
m = append(m, list...)
|
||||
for _, txp := range f.pending {
|
||||
m = append(m, txp.ids...)
|
||||
}
|
||||
sort.Sort(m)
|
||||
mergepgids(dst, f.ids, m)
|
||||
|
@ -64,7 +75,7 @@ func (f *freelist) copyall(dst []pgid) {
|
|||
|
||||
// allocate returns the starting page id of a contiguous list of pages of a given size.
|
||||
// If a contiguous block cannot be found then 0 is returned.
|
||||
func (f *freelist) allocate(n int) pgid {
|
||||
func (f *freelist) allocate(txid txid, n int) pgid {
|
||||
if len(f.ids) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
@ -97,7 +108,7 @@ func (f *freelist) allocate(n int) pgid {
|
|||
for i := pgid(0); i < pgid(n); i++ {
|
||||
delete(f.cache, initial+i)
|
||||
}
|
||||
|
||||
f.allocs[initial] = txid
|
||||
return initial
|
||||
}
|
||||
|
||||
|
@ -114,28 +125,73 @@ func (f *freelist) free(txid txid, p *page) {
|
|||
}
|
||||
|
||||
// Free page and all its overflow pages.
|
||||
var ids = f.pending[txid]
|
||||
txp := f.pending[txid]
|
||||
if txp == nil {
|
||||
txp = &txPending{}
|
||||
f.pending[txid] = txp
|
||||
}
|
||||
allocTxid, ok := f.allocs[p.id]
|
||||
if ok {
|
||||
delete(f.allocs, p.id)
|
||||
} else if (p.flags & (freelistPageFlag | metaPageFlag)) != 0 {
|
||||
// Safe to claim txid as allocating since these types are private to txid.
|
||||
allocTxid = txid
|
||||
}
|
||||
|
||||
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
|
||||
// Verify that page is not already free.
|
||||
if f.cache[id] {
|
||||
panic(fmt.Sprintf("page %d already freed", id))
|
||||
}
|
||||
|
||||
// Add to the freelist and cache.
|
||||
ids = append(ids, id)
|
||||
txp.ids = append(txp.ids, id)
|
||||
txp.alloctx = append(txp.alloctx, allocTxid)
|
||||
f.cache[id] = true
|
||||
}
|
||||
f.pending[txid] = ids
|
||||
}
|
||||
|
||||
// release moves all page ids for a transaction id (or older) to the freelist.
|
||||
func (f *freelist) release(txid txid) {
|
||||
m := make(pgids, 0)
|
||||
for tid, ids := range f.pending {
|
||||
for tid, txp := range f.pending {
|
||||
if tid <= txid {
|
||||
// Move transaction's pending pages to the available freelist.
|
||||
// Don't remove from the cache since the page is still free.
|
||||
m = append(m, ids...)
|
||||
m = append(m, txp.ids...)
|
||||
delete(f.pending, tid)
|
||||
}
|
||||
}
|
||||
sort.Sort(m)
|
||||
f.ids = pgids(f.ids).merge(m)
|
||||
}
|
||||
|
||||
// releaseRange moves pending pages allocated within an extent [begin,end] to the free list.
|
||||
func (f *freelist) releaseRange(begin, end txid) {
|
||||
if begin > end {
|
||||
return
|
||||
}
|
||||
var m pgids
|
||||
for tid, txp := range f.pending {
|
||||
if tid < begin || tid > end {
|
||||
continue
|
||||
}
|
||||
// Don't recompute freed pages if ranges haven't updated.
|
||||
if txp.lastReleaseBegin == begin {
|
||||
continue
|
||||
}
|
||||
for i := 0; i < len(txp.ids); i++ {
|
||||
if atx := txp.alloctx[i]; atx < begin || atx > end {
|
||||
continue
|
||||
}
|
||||
m = append(m, txp.ids[i])
|
||||
txp.ids[i] = txp.ids[len(txp.ids)-1]
|
||||
txp.ids = txp.ids[:len(txp.ids)-1]
|
||||
txp.alloctx[i] = txp.alloctx[len(txp.alloctx)-1]
|
||||
txp.alloctx = txp.alloctx[:len(txp.alloctx)-1]
|
||||
i--
|
||||
}
|
||||
txp.lastReleaseBegin = begin
|
||||
if len(txp.ids) == 0 {
|
||||
delete(f.pending, tid)
|
||||
}
|
||||
}
|
||||
|
@ -146,12 +202,29 @@ func (f *freelist) release(txid txid) {
|
|||
// rollback removes the pages from a given pending tx.
|
||||
func (f *freelist) rollback(txid txid) {
|
||||
// Remove page ids from cache.
|
||||
for _, id := range f.pending[txid] {
|
||||
delete(f.cache, id)
|
||||
txp := f.pending[txid]
|
||||
if txp == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Remove pages from pending list.
|
||||
var m pgids
|
||||
for i, pgid := range txp.ids {
|
||||
delete(f.cache, pgid)
|
||||
tx := txp.alloctx[i]
|
||||
if tx == 0 {
|
||||
continue
|
||||
}
|
||||
if tx != txid {
|
||||
// Pending free aborted; restore page back to alloc list.
|
||||
f.allocs[pgid] = tx
|
||||
} else {
|
||||
// Freed page was allocated by this txn; OK to throw away.
|
||||
m = append(m, pgid)
|
||||
}
|
||||
}
|
||||
// Remove pages from pending list and mark as free if allocated by txid.
|
||||
delete(f.pending, txid)
|
||||
sort.Sort(m)
|
||||
f.ids = pgids(f.ids).merge(m)
|
||||
}
|
||||
|
||||
// freed returns whether a given page is in the free list.
|
||||
|
@ -223,8 +296,8 @@ func (f *freelist) reload(p *page) {
|
|||
|
||||
// Build a cache of only pending pages.
|
||||
pcache := make(map[pgid]bool)
|
||||
for _, pendingIDs := range f.pending {
|
||||
for _, pendingID := range pendingIDs {
|
||||
for _, txp := range f.pending {
|
||||
for _, pendingID := range txp.ids {
|
||||
pcache[pendingID] = true
|
||||
}
|
||||
}
|
||||
|
@ -250,8 +323,8 @@ func (f *freelist) reindex() {
|
|||
for _, id := range f.ids {
|
||||
f.cache[id] = true
|
||||
}
|
||||
for _, pendingIDs := range f.pending {
|
||||
for _, pendingID := range pendingIDs {
|
||||
for _, txp := range f.pending {
|
||||
for _, pendingID := range txp.ids {
|
||||
f.cache[pendingID] = true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@ import (
|
|||
func TestFreelist_free(t *testing.T) {
|
||||
f := newFreelist()
|
||||
f.free(100, &page{id: 12})
|
||||
if !reflect.DeepEqual([]pgid{12}, f.pending[100]) {
|
||||
if !reflect.DeepEqual([]pgid{12}, f.pending[100].ids) {
|
||||
t.Fatalf("exp=%v; got=%v", []pgid{12}, f.pending[100])
|
||||
}
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ func TestFreelist_free(t *testing.T) {
|
|||
func TestFreelist_free_overflow(t *testing.T) {
|
||||
f := newFreelist()
|
||||
f.free(100, &page{id: 12, overflow: 3})
|
||||
if exp := []pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100]) {
|
||||
if exp := []pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100].ids) {
|
||||
t.Fatalf("exp=%v; got=%v", exp, f.pending[100])
|
||||
}
|
||||
}
|
||||
|
@ -46,39 +46,40 @@ func TestFreelist_release(t *testing.T) {
|
|||
|
||||
// Ensure that a freelist can find contiguous blocks of pages.
|
||||
func TestFreelist_allocate(t *testing.T) {
|
||||
f := &freelist{ids: []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}}
|
||||
if id := int(f.allocate(3)); id != 3 {
|
||||
f := newFreelist()
|
||||
f.ids = []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}
|
||||
if id := int(f.allocate(1, 3)); id != 3 {
|
||||
t.Fatalf("exp=3; got=%v", id)
|
||||
}
|
||||
if id := int(f.allocate(1)); id != 6 {
|
||||
if id := int(f.allocate(1, 1)); id != 6 {
|
||||
t.Fatalf("exp=6; got=%v", id)
|
||||
}
|
||||
if id := int(f.allocate(3)); id != 0 {
|
||||
if id := int(f.allocate(1, 3)); id != 0 {
|
||||
t.Fatalf("exp=0; got=%v", id)
|
||||
}
|
||||
if id := int(f.allocate(2)); id != 12 {
|
||||
if id := int(f.allocate(1, 2)); id != 12 {
|
||||
t.Fatalf("exp=12; got=%v", id)
|
||||
}
|
||||
if id := int(f.allocate(1)); id != 7 {
|
||||
if id := int(f.allocate(1, 1)); id != 7 {
|
||||
t.Fatalf("exp=7; got=%v", id)
|
||||
}
|
||||
if id := int(f.allocate(0)); id != 0 {
|
||||
if id := int(f.allocate(1, 0)); id != 0 {
|
||||
t.Fatalf("exp=0; got=%v", id)
|
||||
}
|
||||
if id := int(f.allocate(0)); id != 0 {
|
||||
if id := int(f.allocate(1, 0)); id != 0 {
|
||||
t.Fatalf("exp=0; got=%v", id)
|
||||
}
|
||||
if exp := []pgid{9, 18}; !reflect.DeepEqual(exp, f.ids) {
|
||||
t.Fatalf("exp=%v; got=%v", exp, f.ids)
|
||||
}
|
||||
|
||||
if id := int(f.allocate(1)); id != 9 {
|
||||
if id := int(f.allocate(1, 1)); id != 9 {
|
||||
t.Fatalf("exp=9; got=%v", id)
|
||||
}
|
||||
if id := int(f.allocate(1)); id != 18 {
|
||||
if id := int(f.allocate(1, 1)); id != 18 {
|
||||
t.Fatalf("exp=18; got=%v", id)
|
||||
}
|
||||
if id := int(f.allocate(1)); id != 0 {
|
||||
if id := int(f.allocate(1, 1)); id != 0 {
|
||||
t.Fatalf("exp=0; got=%v", id)
|
||||
}
|
||||
if exp := []pgid{}; !reflect.DeepEqual(exp, f.ids) {
|
||||
|
@ -113,9 +114,9 @@ func TestFreelist_read(t *testing.T) {
|
|||
func TestFreelist_write(t *testing.T) {
|
||||
// Create a freelist and write it to a page.
|
||||
var buf [4096]byte
|
||||
f := &freelist{ids: []pgid{12, 39}, pending: make(map[txid][]pgid)}
|
||||
f.pending[100] = []pgid{28, 11}
|
||||
f.pending[101] = []pgid{3}
|
||||
f := &freelist{ids: []pgid{12, 39}, pending: make(map[txid]*txPending)}
|
||||
f.pending[100] = &txPending{ids: []pgid{28, 11}}
|
||||
f.pending[101] = &txPending{ids: []pgid{3}}
|
||||
p := (*page)(unsafe.Pointer(&buf[0]))
|
||||
if err := f.write(p); err != nil {
|
||||
t.Fatal(err)
|
||||
|
@ -142,7 +143,8 @@ func benchmark_FreelistRelease(b *testing.B, size int) {
|
|||
pending := randomPgids(len(ids) / 400)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
f := &freelist{ids: ids, pending: map[txid][]pgid{1: pending}}
|
||||
txp := &txPending{ids: pending}
|
||||
f := &freelist{ids: ids, pending: map[txid]*txPending{1: txp}}
|
||||
f.release(1)
|
||||
}
|
||||
}
|
||||
|
|
2
tx.go
2
tx.go
|
@ -465,7 +465,7 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo
|
|||
|
||||
// allocate returns a contiguous block of memory starting at a given page.
|
||||
func (tx *Tx) allocate(count int) (*page, error) {
|
||||
p, err := tx.db.allocate(count)
|
||||
p, err := tx.db.allocate(tx.meta.txid, count)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue