From adbb1a19c1b93a95f17a4eb3c5524e5be8b0d10f Mon Sep 17 00:00:00 2001 From: Tommi Virtanen Date: Sun, 24 Aug 2014 15:42:55 -0700 Subject: [PATCH] Add transaction batching DB.Batch makes it easy to make lots of small transactions with significantly better performance. Batch combines multiple concurrent Update calls into a single disk transaction, managing errors smartly. --- README.md | 42 ++++++++++ batch.go | 135 +++++++++++++++++++++++++++++++ batch_benchmark_test.go | 170 ++++++++++++++++++++++++++++++++++++++++ batch_example_test.go | 148 ++++++++++++++++++++++++++++++++++ batch_test.go | 167 +++++++++++++++++++++++++++++++++++++++ db.go | 29 +++++++ db_test.go | 28 +++++++ 7 files changed, 719 insertions(+) create mode 100644 batch.go create mode 100644 batch_benchmark_test.go create mode 100644 batch_example_test.go create mode 100644 batch_test.go diff --git a/README.md b/README.md index cb65702..bfaccfc 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,48 @@ no mutating operations are allowed within a read-only transaction. You can only retrieve buckets, retrieve values, and copy the database within a read-only transaction. + +#### Batch read-write transactions + +Each `DB.Update()` waits for disk to commit the writes. This overhead +can be minimized by combining multiple updates with the `DB.Batch()` +function: + +```go +err := db.Batch(func(tx *bolt.Tx) error { + ... + return nil +}) +``` + +Concurrent Batch calls are opportunistically combined into larger +transactions. Batch is only useful when there are multiple goroutines +calling it. + +The trade-off is that `Batch` can call the given +function multiple times, if parts of the transaction fail. The +function must be idempotent and side effects must take effect only +after a successful return from `DB.Batch()`. + +For example: don't display messages from inside the function, instead +set variables in the enclosing scope: + +```go +var id uint64 +err := db.Batch(func(tx *bolt.Tx) error { + // Find last key in bucket, decode as bigendian uint64, increment + // by one, encode back to []byte, and add new key. + ... + id = newValue + return nil +}) +if err != nil { + return ... +} +fmt.Println("Allocated ID %d", id) +``` + + #### Managing transactions manually The `DB.View()` and `DB.Update()` functions are wrappers around the `DB.Begin()` diff --git a/batch.go b/batch.go new file mode 100644 index 0000000..bef1f4a --- /dev/null +++ b/batch.go @@ -0,0 +1,135 @@ +package bolt + +import ( + "errors" + "fmt" + "sync" + "time" +) + +// Batch calls fn as part of a batch. It behaves similar to Update, +// except: +// +// 1. concurrent Batch calls can be combined into a single Bolt +// transaction. +// +// 2. the function passed to Batch may be called multiple times, +// regardless of whether it returns error or not. +// +// This means that Batch function side effects must be idempotent and +// take permanent effect only after a successful return is seen in +// caller. +// +// Batch is only useful when there are multiple goroutines calling it. +func (db *DB) Batch(fn func(*Tx) error) error { + errCh := make(chan error, 1) + + db.batchMu.Lock() + if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) { + // There is no existing batch, or the existing batch is full; start a new one. + db.batch = &batch{ + db: db, + } + db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger) + } + db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh}) + if len(db.batch.calls) >= db.MaxBatchSize { + // wake up batch, it's ready to run + go db.batch.trigger() + } + db.batchMu.Unlock() + + err := <-errCh + if err == trySolo { + err = db.Update(fn) + } + return err +} + +type call struct { + fn func(*Tx) error + err chan<- error +} + +type batch struct { + db *DB + timer *time.Timer + start sync.Once + calls []call +} + +// trigger runs the batch if it hasn't already been run. +func (b *batch) trigger() { + b.start.Do(b.run) +} + +// run performs the transactions in the batch and communicates results +// back to DB.Batch. +func (b *batch) run() { + b.db.batchMu.Lock() + b.timer.Stop() + // Make sure no new work is added to this batch, but don't break + // other batches. + if b.db.batch == b { + b.db.batch = nil + } + b.db.batchMu.Unlock() + +retry: + for len(b.calls) > 0 { + var failIdx = -1 + err := b.db.Update(func(tx *Tx) error { + for i, c := range b.calls { + if err := safelyCall(c.fn, tx); err != nil { + failIdx = i + return err + } + } + return nil + }) + + if failIdx >= 0 { + // take the failing transaction out of the batch. it's + // safe to shorten b.calls here because db.batch no longer + // points to us, and we hold the mutex anyway. + c := b.calls[failIdx] + b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1] + // tell the submitter re-run it solo, continue with the rest of the batch + c.err <- trySolo + continue retry + } + + // pass success, or bolt internal errors, to all callers + for _, c := range b.calls { + if c.err != nil { + c.err <- err + } + } + break retry + } +} + +// trySolo is a special sentinel error value used for signaling that a +// transaction function should be re-run. It should never be seen by +// callers. +var trySolo = errors.New("batch function returned an error and should be re-run solo") + +type panicked struct { + reason interface{} +} + +func (p panicked) Error() string { + if err, ok := p.reason.(error); ok { + return err.Error() + } + return fmt.Sprintf("panic: %v", p.reason) +} + +func safelyCall(fn func(*Tx) error, tx *Tx) (err error) { + defer func() { + if p := recover(); p != nil { + err = panicked{p} + } + }() + return fn(tx) +} diff --git a/batch_benchmark_test.go b/batch_benchmark_test.go new file mode 100644 index 0000000..b745a37 --- /dev/null +++ b/batch_benchmark_test.go @@ -0,0 +1,170 @@ +package bolt_test + +import ( + "bytes" + "encoding/binary" + "errors" + "hash/fnv" + "sync" + "testing" + + "github.com/boltdb/bolt" +) + +func validateBatchBench(b *testing.B, db *TestDB) { + var rollback = errors.New("sentinel error to cause rollback") + validate := func(tx *bolt.Tx) error { + bucket := tx.Bucket([]byte("bench")) + h := fnv.New32a() + buf := make([]byte, 4) + for id := uint32(0); id < 1000; id++ { + binary.LittleEndian.PutUint32(buf, id) + h.Reset() + h.Write(buf[:]) + k := h.Sum(nil) + v := bucket.Get(k) + if v == nil { + b.Errorf("not found id=%d key=%x", id, k) + continue + } + if g, e := v, []byte("filler"); !bytes.Equal(g, e) { + b.Errorf("bad value for id=%d key=%x: %s != %q", id, k, g, e) + } + if err := bucket.Delete(k); err != nil { + return err + } + } + // should be empty now + c := bucket.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + b.Errorf("unexpected key: %x = %q", k, v) + } + return rollback + } + if err := db.Update(validate); err != nil && err != rollback { + b.Error(err) + } +} + +func BenchmarkDBBatchAutomatic(b *testing.B) { + db := NewTestDB() + defer db.Close() + db.MustCreateBucket([]byte("bench")) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + start := make(chan struct{}) + var wg sync.WaitGroup + + for round := 0; round < 1000; round++ { + wg.Add(1) + + go func(id uint32) { + defer wg.Done() + <-start + + h := fnv.New32a() + buf := make([]byte, 4) + binary.LittleEndian.PutUint32(buf, id) + h.Write(buf[:]) + k := h.Sum(nil) + insert := func(tx *bolt.Tx) error { + b := tx.Bucket([]byte("bench")) + return b.Put(k, []byte("filler")) + } + if err := db.Batch(insert); err != nil { + b.Error(err) + return + } + }(uint32(round)) + } + close(start) + wg.Wait() + } + + b.StopTimer() + validateBatchBench(b, db) +} + +func BenchmarkDBBatchSingle(b *testing.B) { + db := NewTestDB() + defer db.Close() + db.MustCreateBucket([]byte("bench")) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + start := make(chan struct{}) + var wg sync.WaitGroup + + for round := 0; round < 1000; round++ { + wg.Add(1) + go func(id uint32) { + defer wg.Done() + <-start + + h := fnv.New32a() + buf := make([]byte, 4) + binary.LittleEndian.PutUint32(buf, id) + h.Write(buf[:]) + k := h.Sum(nil) + insert := func(tx *bolt.Tx) error { + b := tx.Bucket([]byte("bench")) + return b.Put(k, []byte("filler")) + } + if err := db.Update(insert); err != nil { + b.Error(err) + return + } + }(uint32(round)) + } + close(start) + wg.Wait() + } + + b.StopTimer() + validateBatchBench(b, db) +} + +func BenchmarkDBBatchManual10x100(b *testing.B) { + db := NewTestDB() + defer db.Close() + db.MustCreateBucket([]byte("bench")) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + start := make(chan struct{}) + var wg sync.WaitGroup + + for major := 0; major < 10; major++ { + wg.Add(1) + go func(id uint32) { + defer wg.Done() + <-start + + insert100 := func(tx *bolt.Tx) error { + h := fnv.New32a() + buf := make([]byte, 4) + for minor := uint32(0); minor < 100; minor++ { + binary.LittleEndian.PutUint32(buf, uint32(id*100+minor)) + h.Reset() + h.Write(buf[:]) + k := h.Sum(nil) + b := tx.Bucket([]byte("bench")) + if err := b.Put(k, []byte("filler")); err != nil { + return err + } + } + return nil + } + if err := db.Update(insert100); err != nil { + b.Fatal(err) + } + }(uint32(major)) + } + close(start) + wg.Wait() + } + + b.StopTimer() + validateBatchBench(b, db) +} diff --git a/batch_example_test.go b/batch_example_test.go new file mode 100644 index 0000000..74eff8a --- /dev/null +++ b/batch_example_test.go @@ -0,0 +1,148 @@ +package bolt_test + +import ( + "encoding/binary" + "fmt" + "io/ioutil" + "log" + "math/rand" + "net/http" + "net/http/httptest" + "os" + + "github.com/boltdb/bolt" +) + +// Set this to see how the counts are actually updated. +const verbose = false + +// Counter updates a counter in Bolt for every URL path requested. +type counter struct { + db *bolt.DB +} + +func (c counter) ServeHTTP(rw http.ResponseWriter, req *http.Request) { + // Communicates the new count from a successful database + // transaction. + var result uint64 + + increment := func(tx *bolt.Tx) error { + b, err := tx.CreateBucketIfNotExists([]byte("hits")) + if err != nil { + return err + } + key := []byte(req.URL.String()) + // Decode handles key not found for us. + count := decode(b.Get(key)) + 1 + b.Put(key, encode(count)) + // All good, communicate new count. + result = count + return nil + } + if err := c.db.Batch(increment); err != nil { + http.Error(rw, err.Error(), 500) + return + } + + if verbose { + log.Printf("server: %s: %d", req.URL.String(), result) + } + + rw.Header().Set("Content-Type", "application/octet-stream") + fmt.Fprintf(rw, "%d\n", result) +} + +func client(id int, base string, paths []string) error { + // Process paths in random order. + rng := rand.New(rand.NewSource(int64(id))) + permutation := rng.Perm(len(paths)) + + for i := range paths { + path := paths[permutation[i]] + resp, err := http.Get(base + path) + if err != nil { + return err + } + defer resp.Body.Close() + buf, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + if verbose { + log.Printf("client: %s: %s", path, buf) + } + } + return nil +} + +func ExampleDB_Batch() { + // Open the database. + db, _ := bolt.Open(tempfile(), 0666, nil) + defer os.Remove(db.Path()) + defer db.Close() + + // Start our web server + count := counter{db} + srv := httptest.NewServer(count) + defer srv.Close() + + // Decrease the batch size to make things more interesting. + db.MaxBatchSize = 3 + + // Get every path multiple times concurrently. + const clients = 10 + paths := []string{ + "/foo", + "/bar", + "/baz", + "/quux", + "/thud", + "/xyzzy", + } + errors := make(chan error, clients) + for i := 0; i < clients; i++ { + go func(id int) { + errors <- client(id, srv.URL, paths) + }(i) + } + // Check all responses to make sure there's no error. + for i := 0; i < clients; i++ { + if err := <-errors; err != nil { + fmt.Printf("client error: %v", err) + return + } + } + + // Check the final result + db.View(func(tx *bolt.Tx) error { + b := tx.Bucket([]byte("hits")) + c := b.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + fmt.Printf("hits to %s: %d\n", k, decode(v)) + } + return nil + }) + + // Output: + // hits to /bar: 10 + // hits to /baz: 10 + // hits to /foo: 10 + // hits to /quux: 10 + // hits to /thud: 10 + // hits to /xyzzy: 10 +} + +// encode marshals a counter. +func encode(n uint64) []byte { + buf := make([]byte, 8) + binary.BigEndian.PutUint64(buf, n) + return buf +} + +// decode unmarshals a counter. Nil buffers are decoded as 0. +func decode(buf []byte) uint64 { + if buf == nil { + return 0 + } + return binary.BigEndian.Uint64(buf) +} diff --git a/batch_test.go b/batch_test.go new file mode 100644 index 0000000..0b5075f --- /dev/null +++ b/batch_test.go @@ -0,0 +1,167 @@ +package bolt_test + +import ( + "testing" + "time" + + "github.com/boltdb/bolt" +) + +// Ensure two functions can perform updates in a single batch. +func TestDB_Batch(t *testing.T) { + db := NewTestDB() + defer db.Close() + db.MustCreateBucket([]byte("widgets")) + + // Iterate over multiple updates in separate goroutines. + n := 2 + ch := make(chan error) + for i := 0; i < n; i++ { + go func(i int) { + ch <- db.Batch(func(tx *bolt.Tx) error { + return tx.Bucket([]byte("widgets")).Put(u64tob(uint64(i)), []byte{}) + }) + }(i) + } + + // Check all responses to make sure there's no error. + for i := 0; i < n; i++ { + if err := <-ch; err != nil { + t.Fatal(err) + } + } + + // Ensure data is correct. + db.MustView(func(tx *bolt.Tx) error { + b := tx.Bucket([]byte("widgets")) + for i := 0; i < n; i++ { + if v := b.Get(u64tob(uint64(i))); v == nil { + t.Errorf("key not found: %d", i) + } + } + return nil + }) +} + +func TestDB_Batch_Panic(t *testing.T) { + db := NewTestDB() + defer db.Close() + + var sentinel int + var bork = &sentinel + var problem interface{} + var err error + + // Execute a function inside a batch that panics. + func() { + defer func() { + if p := recover(); p != nil { + problem = p + } + }() + err = db.Batch(func(tx *bolt.Tx) error { + panic(bork) + }) + }() + + // Verify there is no error. + if g, e := err, error(nil); g != e { + t.Fatalf("wrong error: %v != %v", g, e) + } + // Verify the panic was captured. + if g, e := problem, bork; g != e { + t.Fatalf("wrong error: %v != %v", g, e) + } +} + +func TestDB_BatchFull(t *testing.T) { + db := NewTestDB() + defer db.Close() + db.MustCreateBucket([]byte("widgets")) + + const size = 3 + // buffered so we never leak goroutines + ch := make(chan error, size) + put := func(i int) { + ch <- db.Batch(func(tx *bolt.Tx) error { + return tx.Bucket([]byte("widgets")).Put(u64tob(uint64(i)), []byte{}) + }) + } + + db.MaxBatchSize = size + // high enough to never trigger here + db.MaxBatchDelay = 1 * time.Hour + + go put(1) + go put(2) + + // Give the batch a chance to exhibit bugs. + time.Sleep(10 * time.Millisecond) + + // not triggered yet + select { + case <-ch: + t.Fatalf("batch triggered too early") + default: + } + + go put(3) + + // Check all responses to make sure there's no error. + for i := 0; i < size; i++ { + if err := <-ch; err != nil { + t.Fatal(err) + } + } + + // Ensure data is correct. + db.MustView(func(tx *bolt.Tx) error { + b := tx.Bucket([]byte("widgets")) + for i := 1; i <= size; i++ { + if v := b.Get(u64tob(uint64(i))); v == nil { + t.Errorf("key not found: %d", i) + } + } + return nil + }) +} + +func TestDB_BatchTime(t *testing.T) { + db := NewTestDB() + defer db.Close() + db.MustCreateBucket([]byte("widgets")) + + const size = 1 + // buffered so we never leak goroutines + ch := make(chan error, size) + put := func(i int) { + ch <- db.Batch(func(tx *bolt.Tx) error { + return tx.Bucket([]byte("widgets")).Put(u64tob(uint64(i)), []byte{}) + }) + } + + db.MaxBatchSize = 1000 + db.MaxBatchDelay = 0 + + go put(1) + + // Batch must trigger by time alone. + + // Check all responses to make sure there's no error. + for i := 0; i < size; i++ { + if err := <-ch; err != nil { + t.Fatal(err) + } + } + + // Ensure data is correct. + db.MustView(func(tx *bolt.Tx) error { + b := tx.Bucket([]byte("widgets")) + for i := 1; i <= size; i++ { + if v := b.Get(u64tob(uint64(i))); v == nil { + t.Errorf("key not found: %d", i) + } + } + return nil + }) +} diff --git a/db.go b/db.go index 4775850..d4c85fb 100644 --- a/db.go +++ b/db.go @@ -27,6 +27,12 @@ const magic uint32 = 0xED0CDAED // must be synchronzied using the msync(2) syscall. const IgnoreNoSync = runtime.GOOS == "openbsd" +// Default values if not set in a DB instance. +const ( + DefaultMaxBatchSize int = 1000 + DefaultMaxBatchDelay = 10 * time.Millisecond +) + // DB represents a collection of buckets persisted to a file on disk. // All data access is performed through transactions which can be obtained through the DB. // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. @@ -49,6 +55,22 @@ type DB struct { // THIS IS UNSAFE. PLEASE USE WITH CAUTION. NoSync bool + // MaxBatchSize is the maximum size of a batch. Default value is + // copied from DefaultMaxBatchSize in Open. + // + // If <=0, disables batching. + // + // Do not change concurrently with calls to Batch. + MaxBatchSize int + + // MaxBatchDelay is the maximum delay before a batch starts. + // Default value is copied from DefaultMaxBatchDelay in Open. + // + // If <=0, effectively disables batching. + // + // Do not change concurrently with calls to Batch. + MaxBatchDelay time.Duration + path string file *os.File dataref []byte @@ -63,6 +85,9 @@ type DB struct { freelist *freelist stats Stats + batchMu sync.Mutex + batch *batch + rwlock sync.Mutex // Allows only one writer at a time. metalock sync.Mutex // Protects meta page access. mmaplock sync.RWMutex // Protects mmap access during remapping. @@ -99,6 +124,10 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { options = DefaultOptions } + // Set default values for later DB operations. + db.MaxBatchSize = DefaultMaxBatchSize + db.MaxBatchDelay = DefaultMaxBatchDelay + // Open data file and separate sync handler for metadata writes. db.path = path diff --git a/db_test.go b/db_test.go index ecff7d8..ad17e87 100644 --- a/db_test.go +++ b/db_test.go @@ -618,6 +618,34 @@ func NewTestDB() *TestDB { return &TestDB{db} } +// MustView executes a read-only function. Panic on error. +func (db *TestDB) MustView(fn func(tx *bolt.Tx) error) { + if err := db.DB.View(func(tx *bolt.Tx) error { + return fn(tx) + }); err != nil { + panic(err.Error()) + } +} + +// MustUpdate executes a read-write function. Panic on error. +func (db *TestDB) MustUpdate(fn func(tx *bolt.Tx) error) { + if err := db.DB.View(func(tx *bolt.Tx) error { + return fn(tx) + }); err != nil { + panic(err.Error()) + } +} + +// MustCreateBucket creates a new bucket. Panic on error. +func (db *TestDB) MustCreateBucket(name []byte) { + if err := db.Update(func(tx *bolt.Tx) error { + _, err := tx.CreateBucket([]byte(name)) + return err + }); err != nil { + panic(err.Error()) + } +} + // Close closes the database and deletes the underlying file. func (db *TestDB) Close() { // Log statistics.