From 88d2b54695213e3788d66f0a9afde3111a432a19 Mon Sep 17 00:00:00 2001 From: Matthew Sainsbury Date: Fri, 25 Apr 2025 10:46:51 -0700 Subject: [PATCH] add support for data file size limit (#929) * add support for data file size limit closes #928 Signed-off-by: Matthew Sainsbury * respond to PR feedback Signed-off-by: Matthew Sainsbury --------- Signed-off-by: Matthew Sainsbury --- bolt_windows.go | 13 +++ db.go | 25 ++++- db_test.go | 174 ++++++++++++++++++++++++++++++++++ errors/errors.go | 3 + internal/btesting/btesting.go | 13 ++- 5 files changed, 223 insertions(+), 5 deletions(-) diff --git a/bolt_windows.go b/bolt_windows.go index ec21ecb..bba0f88 100644 --- a/bolt_windows.go +++ b/bolt_windows.go @@ -67,6 +67,19 @@ func mmap(db *DB, sz int) error { var sizelo, sizehi uint32 if !db.readOnly { + if db.MaxSize > 0 && sz > db.MaxSize { + // The max size only limits future writes; however, we don’t block opening + // and mapping the database if it already exceeds the limit. + fileSize, err := db.fileSize() + if err != nil { + return fmt.Errorf("could not check existing db file size: %s", err) + } + + if sz > fileSize { + return errors.ErrMaxSizeReached + } + } + // Truncate the database to the size of the mmap. if err := db.file.Truncate(int64(sz)); err != nil { return fmt.Errorf("truncate: %s", err) diff --git a/db.go b/db.go index 4171983..9e379ac 100644 --- a/db.go +++ b/db.go @@ -110,6 +110,12 @@ type DB struct { // of truncate() and fsync() when growing the data file. AllocSize int + // MaxSize is the maximum size (in bytes) allowed for the data file. + // If a caller's attempt to add data results in the need to grow + // the data file, an error will be returned and the data file will not grow. + // <=0 means no limit. + MaxSize int + // Mlock locks database file in memory when set to true. // It prevents major page faults, however used memory can't be reclaimed. // @@ -191,6 +197,7 @@ func Open(path string, mode os.FileMode, options *Options) (db *DB, err error) { db.PreLoadFreelist = options.PreLoadFreelist db.FreelistType = options.FreelistType db.Mlock = options.Mlock + db.MaxSize = options.MaxSize // Set default values for later DB operations. db.MaxBatchSize = common.DefaultMaxBatchSize @@ -1166,7 +1173,11 @@ func (db *DB) allocate(txid common.Txid, count int) (*common.Page, error) { var minsz = int((p.Id()+common.Pgid(count))+1) * db.pageSize if minsz >= db.datasz { if err := db.mmap(minsz); err != nil { - return nil, fmt.Errorf("mmap allocate error: %s", err) + if err == berrors.ErrMaxSizeReached { + return nil, err + } else { + return nil, fmt.Errorf("mmap allocate error: %s", err) + } } } @@ -1198,6 +1209,11 @@ func (db *DB) grow(sz int) error { sz += db.AllocSize } + if !db.readOnly && db.MaxSize > 0 && sz > db.MaxSize { + lg.Errorf("[GOOS: %s, GOARCH: %s] maximum db size reached, size: %d, db.MaxSize: %d", runtime.GOOS, runtime.GOARCH, sz, db.MaxSize) + return berrors.ErrMaxSizeReached + } + // Truncate and fsync to ensure file size metadata is flushed. // https://github.com/boltdb/bolt/issues/284 if !db.NoGrowSync && !db.readOnly { @@ -1320,6 +1336,9 @@ type Options struct { // PageSize overrides the default OS page size. PageSize int + // MaxSize sets the maximum size of the data file. <=0 means no maximum. + MaxSize int + // NoSync sets the initial value of DB.NoSync. Normally this can just be // set directly on the DB itself when returned from Open(), but this option // is useful in APIs which expose Options but not the underlying DB. @@ -1343,8 +1362,8 @@ func (o *Options) String() string { return "{}" } - return fmt.Sprintf("{Timeout: %s, NoGrowSync: %t, NoFreelistSync: %t, PreLoadFreelist: %t, FreelistType: %s, ReadOnly: %t, MmapFlags: %x, InitialMmapSize: %d, PageSize: %d, NoSync: %t, OpenFile: %p, Mlock: %t, Logger: %p}", - o.Timeout, o.NoGrowSync, o.NoFreelistSync, o.PreLoadFreelist, o.FreelistType, o.ReadOnly, o.MmapFlags, o.InitialMmapSize, o.PageSize, o.NoSync, o.OpenFile, o.Mlock, o.Logger) + return fmt.Sprintf("{Timeout: %s, NoGrowSync: %t, NoFreelistSync: %t, PreLoadFreelist: %t, FreelistType: %s, ReadOnly: %t, MmapFlags: %x, InitialMmapSize: %d, PageSize: %d, MaxSize: %d, NoSync: %t, OpenFile: %p, Mlock: %t, Logger: %p}", + o.Timeout, o.NoGrowSync, o.NoFreelistSync, o.PreLoadFreelist, o.FreelistType, o.ReadOnly, o.MmapFlags, o.InitialMmapSize, o.PageSize, o.MaxSize, o.NoSync, o.OpenFile, o.Mlock, o.Logger) } diff --git a/db_test.go b/db_test.go index 757b896..53d877e 100644 --- a/db_test.go +++ b/db_test.go @@ -11,6 +11,7 @@ import ( "os" "path/filepath" "reflect" + "runtime" "strings" "sync" "testing" @@ -1373,6 +1374,179 @@ func TestDBUnmap(t *testing.T) { db.DB = nil } +// Convenience function for inserting a bunch of keys with 1000 byte values +func fillDBWithKeys(db *btesting.DB, numKeys int) error { + return db.Fill([]byte("data"), 1, numKeys, + func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) }, + func(tx int, k int) []byte { return make([]byte, 1000) }, + ) +} + +// Creates a new database size, forces a specific allocation size jump, and fills it with the number of keys specified +func createFilledDB(t testing.TB, o *bolt.Options, allocSize int, numKeys int) *btesting.DB { + // Open a data file. + db := btesting.MustCreateDBWithOption(t, o) + db.AllocSize = allocSize + + // Insert a reasonable amount of data below the max size. + err := db.Fill([]byte("data"), 1, numKeys, + func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) }, + func(tx int, k int) []byte { return make([]byte, 1000) }, + ) + if err != nil { + t.Fatal(err) + } + return db +} + +// Ensure that a database cannot exceed its maximum size +// https://github.com/etcd-io/bbolt/issues/928 +func TestDB_MaxSizeNotExceeded(t *testing.T) { + testCases := []struct { + name string + options bolt.Options + }{ + { + name: "Standard case", + options: bolt.Options{ + MaxSize: 5 * 1024 * 1024, // 5 MiB + PageSize: 4096, + }, + }, + { + name: "NoGrowSync", + options: bolt.Options{ + MaxSize: 5 * 1024 * 1024, // 5 MiB + PageSize: 4096, + NoGrowSync: true, + }, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + db := createFilledDB(t, + &testCase.options, + 4*1024*1024, // adjust allocation jumps to 4 MiB + 2000, + ) + + path := db.Path() + + // The data file should be 4 MiB now (expanded once from zero). + // It should have space for roughly 16 more entries before trying to grow + // Keep inserting until grow is required + err := fillDBWithKeys(db, 100) + assert.ErrorIs(t, err, berrors.ErrMaxSizeReached) + + newSz := fileSize(path) + require.Greater(t, newSz, int64(0), "unexpected new file size: %d", newSz) + assert.LessOrEqual(t, newSz, int64(db.MaxSize), "The size of the data file should not exceed db.MaxSize") + + err = db.Close() + require.NoError(t, err, "Closing the re-opened database should succeed") + }) + } +} + +// Ensure that opening a database that is beyond the maximum size succeeds +// The maximum size should only apply to growing the data file +// https://github.com/etcd-io/bbolt/issues/928 +func TestDB_MaxSizeExceededCanOpen(t *testing.T) { + // Open a data file. + db := createFilledDB(t, nil, 4*1024*1024, 2000) // adjust allocation jumps to 4 MiB, fill with 2000, 1KB keys + path := db.Path() + + // Insert a reasonable amount of data below the max size. + err := fillDBWithKeys(db, 2000) + require.NoError(t, err, "fillDbWithKeys should succeed") + + err = db.Close() + require.NoError(t, err, "Close should succeed") + + // The data file should be 4 MiB now (expanded once from zero). + minimumSizeForTest := int64(1024 * 1024) + newSz := fileSize(path) + require.GreaterOrEqual(t, newSz, minimumSizeForTest, "unexpected new file size: %d. Expected at least %d", newSz, minimumSizeForTest) + + // Now try to re-open the database with an extremely small max size + t.Logf("Reopening bbolt DB at: %s", path) + db, err = btesting.OpenDBWithOption(t, path, &bolt.Options{ + MaxSize: 1, + }) + assert.NoError(t, err, "Should be able to open database bigger than MaxSize") + + err = db.Close() + require.NoError(t, err, "Closing the re-opened database should succeed") +} + +// Ensure that opening a database that is beyond the maximum size succeeds, +// even when InitialMmapSize is above the limit (mmaps should not affect file size) +// This test exists for platforms where Truncate should not be called during mmap +// https://github.com/etcd-io/bbolt/issues/928 +func TestDB_MaxSizeExceededCanOpenWithHighMmap(t *testing.T) { + if runtime.GOOS == "windows" { + // In Windows, the file must be expanded to the mmap initial size, + // so this test doesn't run in Windows. + t.SkipNow() + } + + // Open a data file. + db := createFilledDB(t, nil, 4*1024*1024, 2000) // adjust allocation jumps to 4 MiB, fill with 2000 1KB entries + path := db.Path() + + err := db.Close() + require.NoError(t, err, "Close should succeed") + + // The data file should be 4 MiB now (expanded once from zero). + minimumSizeForTest := int64(1024 * 1024) + newSz := fileSize(path) + require.GreaterOrEqual(t, newSz, minimumSizeForTest, "unexpected new file size: %d. Expected at least %d", newSz, minimumSizeForTest) + + // Now try to re-open the database with an extremely small max size + t.Logf("Reopening bbolt DB at: %s", path) + db, err = btesting.OpenDBWithOption(t, path, &bolt.Options{ + MaxSize: 1, + InitialMmapSize: int(minimumSizeForTest) * 2, + }) + assert.NoError(t, err, "Should be able to open database bigger than MaxSize when InitialMmapSize set high") + + err = db.Close() + require.NoError(t, err, "Closing the re-opened database should succeed") +} + +// Ensure that when InitialMmapSize is above the limit, opening a database +// that is beyond the maximum size fails in Windows. +// In Windows, the file must be expanded to the mmap initial size. +// https://github.com/etcd-io/bbolt/issues/928 +func TestDB_MaxSizeExceededDoesNotGrow(t *testing.T) { + if runtime.GOOS != "windows" { + // This test is only relevant on Windows + t.SkipNow() + } + + // Open a data file. + db := createFilledDB(t, nil, 4*1024*1024, 2000) // adjust allocation jumps to 4 MiB, fill with 2000 1KB entries + path := db.Path() + + err := db.Close() + require.NoError(t, err, "Close should succeed") + + // The data file should be 4 MiB now (expanded once from zero). + minimumSizeForTest := int64(1024 * 1024) + newSz := fileSize(path) + assert.GreaterOrEqual(t, newSz, minimumSizeForTest, "unexpected new file size: %d. Expected at least %d", newSz, minimumSizeForTest) + + // Now try to re-open the database with an extremely small max size and + // an initial mmap size to be greater than the actual file size, forcing an illegal grow on open + t.Logf("Reopening bbolt DB at: %s", path) + _, err = btesting.OpenDBWithOption(t, path, &bolt.Options{ + MaxSize: 1, + InitialMmapSize: int(newSz) * 2, + }) + assert.Error(t, err, "Opening the DB with InitialMmapSize > MaxSize should cause an error on Windows") +} + func ExampleDB_Update() { // Open the database. db, err := bolt.Open(tempfile(), 0600, nil) diff --git a/errors/errors.go b/errors/errors.go index c115289..dbebd63 100644 --- a/errors/errors.go +++ b/errors/errors.go @@ -69,6 +69,9 @@ var ( // ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize. ErrValueTooLarge = errors.New("value too large") + // ErrMaxSizeReached is returned when the configured maximum size of the data file is reached. + ErrMaxSizeReached = errors.New("database reached maximum size") + // ErrIncompatibleValue is returned when trying to create or delete a bucket // on an existing non-bucket key or when trying to create or delete a // non-bucket key on an existing bucket key. diff --git a/internal/btesting/btesting.go b/internal/btesting/btesting.go index c83369f..3b3d236 100644 --- a/internal/btesting/btesting.go +++ b/internal/btesting/btesting.go @@ -44,6 +44,13 @@ func MustCreateDBWithOption(t testing.TB, o *bolt.Options) *DB { } func MustOpenDBWithOption(t testing.TB, f string, o *bolt.Options) *DB { + db, err := OpenDBWithOption(t, f, o) + require.NoError(t, err) + require.NotNil(t, db) + return db +} + +func OpenDBWithOption(t testing.TB, f string, o *bolt.Options) (*DB, error) { t.Logf("Opening bbolt DB at: %s", f) if o == nil { o = bolt.DefaultOptions @@ -57,7 +64,9 @@ func MustOpenDBWithOption(t testing.TB, f string, o *bolt.Options) *DB { o.FreelistType = freelistType db, err := bolt.Open(f, 0600, o) - require.NoError(t, err) + if err != nil { + return nil, err + } resDB := &DB{ DB: db, f: f, @@ -66,7 +75,7 @@ func MustOpenDBWithOption(t testing.TB, f string, o *bolt.Options) *DB { } resDB.strictModeEnabledDefault() t.Cleanup(resDB.PostTestCleanup) - return resDB + return resDB, nil } func (db *DB) PostTestCleanup() {