From 048d3f19b28ee04dd6b2ca3c8f86e5fa65dc7355 Mon Sep 17 00:00:00 2001 From: Ben Johnson Date: Tue, 15 Jul 2014 07:37:46 -0600 Subject: [PATCH] Add DB.NoSync option for bulk loading. This commit adds the DB.NoSync flag to skip fsync() calls on each commit. This should only be used for bulk loading as it can corrupt your database in the event of a system failure. Initial tests show it can provide a 2x speed up for sequential inserts. --- cmd/bolt/bench.go | 2 ++ cmd/bolt/main.go | 2 ++ db.go | 9 +++++++++ tx.go | 12 ++++++++---- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/cmd/bolt/bench.go b/cmd/bolt/bench.go index 7eb503d..a3aa9b8 100644 --- a/cmd/bolt/bench.go +++ b/cmd/bolt/bench.go @@ -46,6 +46,7 @@ func Bench(options *BenchOptions) { fatal(err) return } + db.NoSync = options.NoSync db.FillPercent = options.FillPercent defer db.Close() @@ -363,6 +364,7 @@ type BenchOptions struct { BlockProfile string StatsInterval time.Duration FillPercent float64 + NoSync bool Clean bool } diff --git a/cmd/bolt/main.go b/cmd/bolt/main.go index a79302d..95bd813 100644 --- a/cmd/bolt/main.go +++ b/cmd/bolt/main.go @@ -118,6 +118,7 @@ func NewApp() *cli.App { &cli.StringFlag{Name: "blockprofile", Usage: "Block profile output path"}, &cli.StringFlag{Name: "stats-interval", Value: "0s", Usage: "Continuous stats interval"}, &cli.Float64Flag{Name: "fill-percent", Value: bolt.DefaultFillPercent, Usage: "Fill percentage"}, + &cli.BoolFlag{Name: "no-sync", Usage: "Skip fsync on every commit"}, &cli.BoolFlag{Name: "work", Usage: "Print the temp db and do not delete on exit"}, }, Action: func(c *cli.Context) { @@ -139,6 +140,7 @@ func NewApp() *cli.App { BlockProfile: c.String("blockprofile"), StatsInterval: statsInterval, FillPercent: c.Float64("fill-percent"), + NoSync: c.Bool("no-sync"), Clean: !c.Bool("work"), }) }, diff --git a/db.go b/db.go index 4df79e2..2a2e2ed 100644 --- a/db.go +++ b/db.go @@ -47,6 +47,15 @@ type DB struct { // amount if you know that your write workloads are mostly append-only. FillPercent float64 + // Setting the NoSync flag will cause the database to skip fsync() + // calls after each commit. This can be useful when bulk loading data + // into a database and you can restart the bulk load in the event of + // a system failure or database corruption. Do not set this flag for + // normal use. + // + // THIS IS UNSAFE. PLEASE USE WITH CAUTION. + NoSync bool + path string file *os.File dataref []byte diff --git a/tx.go b/tx.go index e225362..bc2842f 100644 --- a/tx.go +++ b/tx.go @@ -425,8 +425,10 @@ func (tx *Tx) write() error { // Update statistics. tx.stats.Write++ } - if err := fdatasync(tx.db.file); err != nil { - return err + if !tx.db.NoSync { + if err := fdatasync(tx.db.file); err != nil { + return err + } } // Clear out page cache. @@ -446,8 +448,10 @@ func (tx *Tx) writeMeta() error { if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil { return err } - if err := fdatasync(tx.db.file); err != nil { - return err + if !tx.db.NoSync { + if err := fdatasync(tx.db.file); err != nil { + return err + } } // Update statistics.