add support for data file size limit (#929)

* add support for data file size limit
closes #928

Signed-off-by: Matthew Sainsbury <matthew@sainsbury.io>

* respond to PR feedback

Signed-off-by: Matthew Sainsbury <matthew@sainsbury.io>

---------

Signed-off-by: Matthew Sainsbury <matthew@sainsbury.io>
This commit is contained in:
Matthew Sainsbury 2025-04-25 10:46:51 -07:00 committed by GitHub
parent 6e830d93bc
commit 88d2b54695
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 223 additions and 5 deletions

View File

@ -67,6 +67,19 @@ func mmap(db *DB, sz int) error {
var sizelo, sizehi uint32
if !db.readOnly {
if db.MaxSize > 0 && sz > db.MaxSize {
// The max size only limits future writes; however, we dont block opening
// and mapping the database if it already exceeds the limit.
fileSize, err := db.fileSize()
if err != nil {
return fmt.Errorf("could not check existing db file size: %s", err)
}
if sz > fileSize {
return errors.ErrMaxSizeReached
}
}
// Truncate the database to the size of the mmap.
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("truncate: %s", err)

25
db.go
View File

@ -110,6 +110,12 @@ type DB struct {
// of truncate() and fsync() when growing the data file.
AllocSize int
// MaxSize is the maximum size (in bytes) allowed for the data file.
// If a caller's attempt to add data results in the need to grow
// the data file, an error will be returned and the data file will not grow.
// <=0 means no limit.
MaxSize int
// Mlock locks database file in memory when set to true.
// It prevents major page faults, however used memory can't be reclaimed.
//
@ -191,6 +197,7 @@ func Open(path string, mode os.FileMode, options *Options) (db *DB, err error) {
db.PreLoadFreelist = options.PreLoadFreelist
db.FreelistType = options.FreelistType
db.Mlock = options.Mlock
db.MaxSize = options.MaxSize
// Set default values for later DB operations.
db.MaxBatchSize = common.DefaultMaxBatchSize
@ -1166,7 +1173,11 @@ func (db *DB) allocate(txid common.Txid, count int) (*common.Page, error) {
var minsz = int((p.Id()+common.Pgid(count))+1) * db.pageSize
if minsz >= db.datasz {
if err := db.mmap(minsz); err != nil {
return nil, fmt.Errorf("mmap allocate error: %s", err)
if err == berrors.ErrMaxSizeReached {
return nil, err
} else {
return nil, fmt.Errorf("mmap allocate error: %s", err)
}
}
}
@ -1198,6 +1209,11 @@ func (db *DB) grow(sz int) error {
sz += db.AllocSize
}
if !db.readOnly && db.MaxSize > 0 && sz > db.MaxSize {
lg.Errorf("[GOOS: %s, GOARCH: %s] maximum db size reached, size: %d, db.MaxSize: %d", runtime.GOOS, runtime.GOARCH, sz, db.MaxSize)
return berrors.ErrMaxSizeReached
}
// Truncate and fsync to ensure file size metadata is flushed.
// https://github.com/boltdb/bolt/issues/284
if !db.NoGrowSync && !db.readOnly {
@ -1320,6 +1336,9 @@ type Options struct {
// PageSize overrides the default OS page size.
PageSize int
// MaxSize sets the maximum size of the data file. <=0 means no maximum.
MaxSize int
// NoSync sets the initial value of DB.NoSync. Normally this can just be
// set directly on the DB itself when returned from Open(), but this option
// is useful in APIs which expose Options but not the underlying DB.
@ -1343,8 +1362,8 @@ func (o *Options) String() string {
return "{}"
}
return fmt.Sprintf("{Timeout: %s, NoGrowSync: %t, NoFreelistSync: %t, PreLoadFreelist: %t, FreelistType: %s, ReadOnly: %t, MmapFlags: %x, InitialMmapSize: %d, PageSize: %d, NoSync: %t, OpenFile: %p, Mlock: %t, Logger: %p}",
o.Timeout, o.NoGrowSync, o.NoFreelistSync, o.PreLoadFreelist, o.FreelistType, o.ReadOnly, o.MmapFlags, o.InitialMmapSize, o.PageSize, o.NoSync, o.OpenFile, o.Mlock, o.Logger)
return fmt.Sprintf("{Timeout: %s, NoGrowSync: %t, NoFreelistSync: %t, PreLoadFreelist: %t, FreelistType: %s, ReadOnly: %t, MmapFlags: %x, InitialMmapSize: %d, PageSize: %d, MaxSize: %d, NoSync: %t, OpenFile: %p, Mlock: %t, Logger: %p}",
o.Timeout, o.NoGrowSync, o.NoFreelistSync, o.PreLoadFreelist, o.FreelistType, o.ReadOnly, o.MmapFlags, o.InitialMmapSize, o.PageSize, o.MaxSize, o.NoSync, o.OpenFile, o.Mlock, o.Logger)
}

View File

@ -11,6 +11,7 @@ import (
"os"
"path/filepath"
"reflect"
"runtime"
"strings"
"sync"
"testing"
@ -1373,6 +1374,179 @@ func TestDBUnmap(t *testing.T) {
db.DB = nil
}
// Convenience function for inserting a bunch of keys with 1000 byte values
func fillDBWithKeys(db *btesting.DB, numKeys int) error {
return db.Fill([]byte("data"), 1, numKeys,
func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) },
func(tx int, k int) []byte { return make([]byte, 1000) },
)
}
// Creates a new database size, forces a specific allocation size jump, and fills it with the number of keys specified
func createFilledDB(t testing.TB, o *bolt.Options, allocSize int, numKeys int) *btesting.DB {
// Open a data file.
db := btesting.MustCreateDBWithOption(t, o)
db.AllocSize = allocSize
// Insert a reasonable amount of data below the max size.
err := db.Fill([]byte("data"), 1, numKeys,
func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) },
func(tx int, k int) []byte { return make([]byte, 1000) },
)
if err != nil {
t.Fatal(err)
}
return db
}
// Ensure that a database cannot exceed its maximum size
// https://github.com/etcd-io/bbolt/issues/928
func TestDB_MaxSizeNotExceeded(t *testing.T) {
testCases := []struct {
name string
options bolt.Options
}{
{
name: "Standard case",
options: bolt.Options{
MaxSize: 5 * 1024 * 1024, // 5 MiB
PageSize: 4096,
},
},
{
name: "NoGrowSync",
options: bolt.Options{
MaxSize: 5 * 1024 * 1024, // 5 MiB
PageSize: 4096,
NoGrowSync: true,
},
},
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
db := createFilledDB(t,
&testCase.options,
4*1024*1024, // adjust allocation jumps to 4 MiB
2000,
)
path := db.Path()
// The data file should be 4 MiB now (expanded once from zero).
// It should have space for roughly 16 more entries before trying to grow
// Keep inserting until grow is required
err := fillDBWithKeys(db, 100)
assert.ErrorIs(t, err, berrors.ErrMaxSizeReached)
newSz := fileSize(path)
require.Greater(t, newSz, int64(0), "unexpected new file size: %d", newSz)
assert.LessOrEqual(t, newSz, int64(db.MaxSize), "The size of the data file should not exceed db.MaxSize")
err = db.Close()
require.NoError(t, err, "Closing the re-opened database should succeed")
})
}
}
// Ensure that opening a database that is beyond the maximum size succeeds
// The maximum size should only apply to growing the data file
// https://github.com/etcd-io/bbolt/issues/928
func TestDB_MaxSizeExceededCanOpen(t *testing.T) {
// Open a data file.
db := createFilledDB(t, nil, 4*1024*1024, 2000) // adjust allocation jumps to 4 MiB, fill with 2000, 1KB keys
path := db.Path()
// Insert a reasonable amount of data below the max size.
err := fillDBWithKeys(db, 2000)
require.NoError(t, err, "fillDbWithKeys should succeed")
err = db.Close()
require.NoError(t, err, "Close should succeed")
// The data file should be 4 MiB now (expanded once from zero).
minimumSizeForTest := int64(1024 * 1024)
newSz := fileSize(path)
require.GreaterOrEqual(t, newSz, minimumSizeForTest, "unexpected new file size: %d. Expected at least %d", newSz, minimumSizeForTest)
// Now try to re-open the database with an extremely small max size
t.Logf("Reopening bbolt DB at: %s", path)
db, err = btesting.OpenDBWithOption(t, path, &bolt.Options{
MaxSize: 1,
})
assert.NoError(t, err, "Should be able to open database bigger than MaxSize")
err = db.Close()
require.NoError(t, err, "Closing the re-opened database should succeed")
}
// Ensure that opening a database that is beyond the maximum size succeeds,
// even when InitialMmapSize is above the limit (mmaps should not affect file size)
// This test exists for platforms where Truncate should not be called during mmap
// https://github.com/etcd-io/bbolt/issues/928
func TestDB_MaxSizeExceededCanOpenWithHighMmap(t *testing.T) {
if runtime.GOOS == "windows" {
// In Windows, the file must be expanded to the mmap initial size,
// so this test doesn't run in Windows.
t.SkipNow()
}
// Open a data file.
db := createFilledDB(t, nil, 4*1024*1024, 2000) // adjust allocation jumps to 4 MiB, fill with 2000 1KB entries
path := db.Path()
err := db.Close()
require.NoError(t, err, "Close should succeed")
// The data file should be 4 MiB now (expanded once from zero).
minimumSizeForTest := int64(1024 * 1024)
newSz := fileSize(path)
require.GreaterOrEqual(t, newSz, minimumSizeForTest, "unexpected new file size: %d. Expected at least %d", newSz, minimumSizeForTest)
// Now try to re-open the database with an extremely small max size
t.Logf("Reopening bbolt DB at: %s", path)
db, err = btesting.OpenDBWithOption(t, path, &bolt.Options{
MaxSize: 1,
InitialMmapSize: int(minimumSizeForTest) * 2,
})
assert.NoError(t, err, "Should be able to open database bigger than MaxSize when InitialMmapSize set high")
err = db.Close()
require.NoError(t, err, "Closing the re-opened database should succeed")
}
// Ensure that when InitialMmapSize is above the limit, opening a database
// that is beyond the maximum size fails in Windows.
// In Windows, the file must be expanded to the mmap initial size.
// https://github.com/etcd-io/bbolt/issues/928
func TestDB_MaxSizeExceededDoesNotGrow(t *testing.T) {
if runtime.GOOS != "windows" {
// This test is only relevant on Windows
t.SkipNow()
}
// Open a data file.
db := createFilledDB(t, nil, 4*1024*1024, 2000) // adjust allocation jumps to 4 MiB, fill with 2000 1KB entries
path := db.Path()
err := db.Close()
require.NoError(t, err, "Close should succeed")
// The data file should be 4 MiB now (expanded once from zero).
minimumSizeForTest := int64(1024 * 1024)
newSz := fileSize(path)
assert.GreaterOrEqual(t, newSz, minimumSizeForTest, "unexpected new file size: %d. Expected at least %d", newSz, minimumSizeForTest)
// Now try to re-open the database with an extremely small max size and
// an initial mmap size to be greater than the actual file size, forcing an illegal grow on open
t.Logf("Reopening bbolt DB at: %s", path)
_, err = btesting.OpenDBWithOption(t, path, &bolt.Options{
MaxSize: 1,
InitialMmapSize: int(newSz) * 2,
})
assert.Error(t, err, "Opening the DB with InitialMmapSize > MaxSize should cause an error on Windows")
}
func ExampleDB_Update() {
// Open the database.
db, err := bolt.Open(tempfile(), 0600, nil)

View File

@ -69,6 +69,9 @@ var (
// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
ErrValueTooLarge = errors.New("value too large")
// ErrMaxSizeReached is returned when the configured maximum size of the data file is reached.
ErrMaxSizeReached = errors.New("database reached maximum size")
// ErrIncompatibleValue is returned when trying to create or delete a bucket
// on an existing non-bucket key or when trying to create or delete a
// non-bucket key on an existing bucket key.

View File

@ -44,6 +44,13 @@ func MustCreateDBWithOption(t testing.TB, o *bolt.Options) *DB {
}
func MustOpenDBWithOption(t testing.TB, f string, o *bolt.Options) *DB {
db, err := OpenDBWithOption(t, f, o)
require.NoError(t, err)
require.NotNil(t, db)
return db
}
func OpenDBWithOption(t testing.TB, f string, o *bolt.Options) (*DB, error) {
t.Logf("Opening bbolt DB at: %s", f)
if o == nil {
o = bolt.DefaultOptions
@ -57,7 +64,9 @@ func MustOpenDBWithOption(t testing.TB, f string, o *bolt.Options) *DB {
o.FreelistType = freelistType
db, err := bolt.Open(f, 0600, o)
require.NoError(t, err)
if err != nil {
return nil, err
}
resDB := &DB{
DB: db,
f: f,
@ -66,7 +75,7 @@ func MustOpenDBWithOption(t testing.TB, f string, o *bolt.Options) *DB {
}
resDB.strictModeEnabledDefault()
t.Cleanup(resDB.PostTestCleanup)
return resDB
return resDB, nil
}
func (db *DB) PostTestCleanup() {