bbolt/tests/robustness/powerfailure_test.go

327 lines
8.0 KiB
Go

//go:build linux
package robustness
import (
"bytes"
"crypto/rand"
"fmt"
"io"
"math"
"math/big"
"net/http"
"net/url"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"testing"
"time"
"go.etcd.io/bbolt/tests/dmflakey"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/sys/unix"
)
var panicFailpoints = []string{
"beforeSyncDataPages",
"beforeSyncMetaPage",
"lackOfDiskSpace",
"mapError",
"resizeFileError",
"unmapError",
}
// TestRestartFromPowerFailureExt4 is to test data after unexpected power failure on ext4.
func TestRestartFromPowerFailureExt4(t *testing.T) {
for _, tc := range []struct {
name string
du time.Duration
fsMountOpt string
useFailpoint bool
}{
{
name: "fp_ext4_commit5s",
du: 5 * time.Second,
fsMountOpt: "commit=5",
useFailpoint: true,
},
{
name: "fp_ext4_commit1s",
du: 10 * time.Second,
fsMountOpt: "commit=1",
useFailpoint: true,
},
{
name: "fp_ext4_commit1000s",
du: 10 * time.Second,
fsMountOpt: "commit=1000",
useFailpoint: true,
},
{
name: "kill_ext4_commit5s",
du: 5 * time.Second,
fsMountOpt: "commit=5",
},
{
name: "kill_ext4_commit1s",
du: 10 * time.Second,
fsMountOpt: "commit=1",
},
{
name: "kill_ext4_commit1000s",
du: 10 * time.Second,
fsMountOpt: "commit=1000",
},
} {
t.Run(tc.name, func(t *testing.T) {
doPowerFailure(t, tc.du, dmflakey.FSTypeEXT4, "", tc.fsMountOpt, tc.useFailpoint)
})
}
}
func TestRestartFromPowerFailureXFS(t *testing.T) {
for _, tc := range []struct {
name string
mkfsOpt string
fsMountOpt string
useFailpoint bool
}{
{
name: "xfs_no_opts",
mkfsOpt: "",
fsMountOpt: "",
useFailpoint: true,
},
{
name: "lazy-log",
mkfsOpt: "-l lazy-count=1",
fsMountOpt: "",
useFailpoint: true,
},
{
name: "odd-allocsize",
mkfsOpt: "",
fsMountOpt: "allocsize=" + fmt.Sprintf("%d", 4096*5),
useFailpoint: true,
},
{
name: "nolargeio",
mkfsOpt: "",
fsMountOpt: "nolargeio",
useFailpoint: true,
},
{
name: "odd-alignment",
mkfsOpt: "-d sunit=1024,swidth=1024",
fsMountOpt: "noalign",
useFailpoint: true,
},
{
name: "openshift-sno-options",
mkfsOpt: "-m bigtime=1,finobt=1,rmapbt=0,reflink=1 -i sparse=1 -l lazy-count=1",
// openshift also supplies seclabel,relatime,prjquota on RHEL, but that's not supported on our CI
// prjquota is only unsupported on our ARM runners.
// You can find more information in either the man page with `man xfs` or `man mkfs.xfs`.
// Also refer to https://man7.org/linux/man-pages/man8/mkfs.xfs.8.html.
fsMountOpt: "rw,attr2,inode64,logbufs=8,logbsize=32k",
useFailpoint: true,
},
} {
t.Run(tc.name, func(t *testing.T) {
t.Logf("mkfs opts: %s", tc.mkfsOpt)
t.Logf("mount opts: %s", tc.fsMountOpt)
doPowerFailure(t, 5*time.Second, dmflakey.FSTypeXFS, tc.mkfsOpt, tc.fsMountOpt, tc.useFailpoint)
})
}
}
func doPowerFailure(t *testing.T, du time.Duration, fsType dmflakey.FSType, mkfsOpt string, fsMountOpt string, useFailpoint bool) {
flakey := initFlakeyDevice(t, strings.Replace(t.Name(), "/", "_", -1), fsType, mkfsOpt, fsMountOpt)
root := flakey.RootFS()
dbPath := filepath.Join(root, "boltdb")
args := []string{"bbolt", "bench",
"-work", // keep the database
"-path", dbPath,
"-count=1000000000",
"-batch-size=5", // separate total count into multiple truncation
"-value-size=512",
}
logPath := filepath.Join(t.TempDir(), fmt.Sprintf("%s.log", t.Name()))
require.NoError(t, os.MkdirAll(path.Dir(logPath), 0600))
logFd, err := os.Create(logPath)
require.NoError(t, err)
defer logFd.Close()
fpURL := "127.0.0.1:12345"
cmd := exec.Command(args[0], args[1:]...)
cmd.Stdout = logFd
cmd.Stderr = logFd
cmd.Env = append(cmd.Env, "GOFAIL_HTTP="+fpURL)
t.Logf("start %s", strings.Join(args, " "))
require.NoError(t, cmd.Start(), "args: %v", args)
errCh := make(chan error, 1)
go func() {
errCh <- cmd.Wait()
}()
defer func() {
if t.Failed() {
logData, err := os.ReadFile(logPath)
assert.NoError(t, err)
t.Logf("dump log:\n: %s", string(logData))
}
}()
time.Sleep(du)
t.Logf("simulate power failure")
if useFailpoint {
fpURL = "http://" + fpURL
targetFp := panicFailpoints[randomInt(t, math.MaxInt32)%len(panicFailpoints)]
t.Logf("random pick failpoint: %s", targetFp)
activeFailpoint(t, fpURL, targetFp, "panic")
} else {
t.Log("kill bbolt")
assert.NoError(t, cmd.Process.Kill())
}
select {
case <-time.After(10 * time.Second):
t.Log("bbolt is supposed to be already stopped, but actually not yet; forcibly kill it")
assert.NoError(t, cmd.Process.Kill())
case err := <-errCh:
require.Error(t, err)
}
require.NoError(t, flakey.PowerFailure(""))
st, err := os.Stat(dbPath)
require.NoError(t, err)
t.Logf("db size: %d", st.Size())
t.Logf("verify data")
output, err := exec.Command("bbolt", "check", dbPath).CombinedOutput()
require.NoError(t, err, "bbolt check output: %s", string(output))
}
// activeFailpoint actives the failpoint by http.
func activeFailpoint(t *testing.T, targetUrl string, fpName, fpVal string) {
u, err := url.JoinPath(targetUrl, fpName)
require.NoError(t, err, "parse url %s", targetUrl)
req, err := http.NewRequest("PUT", u, bytes.NewBuffer([]byte(fpVal)))
require.NoError(t, err)
resp, err := http.DefaultClient.Do(req)
require.NoError(t, err)
defer resp.Body.Close()
data, err := io.ReadAll(resp.Body)
require.NoError(t, err)
require.Equal(t, 204, resp.StatusCode, "response body: %s", string(data))
}
// FlakeyDevice extends dmflakey.Flakey interface.
type FlakeyDevice interface {
// RootFS returns root filesystem.
RootFS() string
// PowerFailure simulates power failure with drop all the writes.
PowerFailure(mntOpt string) error
dmflakey.Flakey
}
// initFlakeyDevice returns FlakeyDevice instance with a given filesystem.
func initFlakeyDevice(t *testing.T, name string, fsType dmflakey.FSType, mkfsOpt string, mntOpt string) FlakeyDevice {
imgDir := t.TempDir()
flakey, err := dmflakey.InitFlakey(name, imgDir, fsType, mkfsOpt)
require.NoError(t, err, "init flakey %s", name)
t.Cleanup(func() {
assert.NoError(t, flakey.Teardown())
})
rootDir := t.TempDir()
err = unix.Mount(flakey.DevicePath(), rootDir, string(fsType), 0, mntOpt)
require.NoError(t, err, "init rootfs on %s", rootDir)
t.Cleanup(func() { assert.NoError(t, unmountAll(rootDir)) })
return &flakeyT{
Flakey: flakey,
rootDir: rootDir,
mntOpt: mntOpt,
}
}
type flakeyT struct {
dmflakey.Flakey
rootDir string
mntOpt string
}
// RootFS returns root filesystem.
func (f *flakeyT) RootFS() string {
return f.rootDir
}
// PowerFailure simulates power failure with drop all the writes.
func (f *flakeyT) PowerFailure(mntOpt string) error {
if err := f.DropWrites(); err != nil {
return fmt.Errorf("failed to drop_writes: %w", err)
}
if err := unmountAll(f.rootDir); err != nil {
return fmt.Errorf("failed to unmount rootfs %s: %w", f.rootDir, err)
}
if mntOpt == "" {
mntOpt = f.mntOpt
}
if err := f.AllowWrites(); err != nil {
return fmt.Errorf("failed to allow_writes: %w", err)
}
if err := unix.Mount(f.DevicePath(), f.rootDir, string(f.Filesystem()), 0, mntOpt); err != nil {
return fmt.Errorf("failed to mount rootfs %s (%s): %w", f.rootDir, mntOpt, err)
}
return nil
}
func unmountAll(target string) error {
for i := 0; i < 50; i++ {
if err := unix.Unmount(target, 0); err != nil {
switch err {
case unix.EBUSY:
time.Sleep(500 * time.Millisecond)
continue
case unix.EINVAL:
return nil
default:
return fmt.Errorf("failed to umount %s: %w", target, err)
}
}
continue
}
return fmt.Errorf("failed to umount %s: %w", target, unix.EBUSY)
}
func randomInt(t *testing.T, max int) int {
n, err := rand.Int(rand.Reader, big.NewInt(int64(max)))
assert.NoError(t, err)
return int(n.Int64())
}