mirror of https://github.com/etcd-io/bbolt.git
327 lines
8.0 KiB
Go
327 lines
8.0 KiB
Go
//go:build linux
|
|
|
|
package robustness
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/rand"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"math/big"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"os/exec"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"go.etcd.io/bbolt/tests/dmflakey"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
var panicFailpoints = []string{
|
|
"beforeSyncDataPages",
|
|
"beforeSyncMetaPage",
|
|
"lackOfDiskSpace",
|
|
"mapError",
|
|
"resizeFileError",
|
|
"unmapError",
|
|
}
|
|
|
|
// TestRestartFromPowerFailureExt4 is to test data after unexpected power failure on ext4.
|
|
func TestRestartFromPowerFailureExt4(t *testing.T) {
|
|
for _, tc := range []struct {
|
|
name string
|
|
du time.Duration
|
|
fsMountOpt string
|
|
useFailpoint bool
|
|
}{
|
|
{
|
|
name: "fp_ext4_commit5s",
|
|
du: 5 * time.Second,
|
|
fsMountOpt: "commit=5",
|
|
useFailpoint: true,
|
|
},
|
|
{
|
|
name: "fp_ext4_commit1s",
|
|
du: 10 * time.Second,
|
|
fsMountOpt: "commit=1",
|
|
useFailpoint: true,
|
|
},
|
|
{
|
|
name: "fp_ext4_commit1000s",
|
|
du: 10 * time.Second,
|
|
fsMountOpt: "commit=1000",
|
|
useFailpoint: true,
|
|
},
|
|
{
|
|
name: "kill_ext4_commit5s",
|
|
du: 5 * time.Second,
|
|
fsMountOpt: "commit=5",
|
|
},
|
|
{
|
|
name: "kill_ext4_commit1s",
|
|
du: 10 * time.Second,
|
|
fsMountOpt: "commit=1",
|
|
},
|
|
{
|
|
name: "kill_ext4_commit1000s",
|
|
du: 10 * time.Second,
|
|
fsMountOpt: "commit=1000",
|
|
},
|
|
} {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
doPowerFailure(t, tc.du, dmflakey.FSTypeEXT4, "", tc.fsMountOpt, tc.useFailpoint)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestRestartFromPowerFailureXFS(t *testing.T) {
|
|
for _, tc := range []struct {
|
|
name string
|
|
mkfsOpt string
|
|
fsMountOpt string
|
|
useFailpoint bool
|
|
}{
|
|
{
|
|
name: "xfs_no_opts",
|
|
mkfsOpt: "",
|
|
fsMountOpt: "",
|
|
useFailpoint: true,
|
|
},
|
|
{
|
|
name: "lazy-log",
|
|
mkfsOpt: "-l lazy-count=1",
|
|
fsMountOpt: "",
|
|
useFailpoint: true,
|
|
},
|
|
{
|
|
name: "odd-allocsize",
|
|
mkfsOpt: "",
|
|
fsMountOpt: "allocsize=" + fmt.Sprintf("%d", 4096*5),
|
|
useFailpoint: true,
|
|
},
|
|
{
|
|
name: "nolargeio",
|
|
mkfsOpt: "",
|
|
fsMountOpt: "nolargeio",
|
|
useFailpoint: true,
|
|
},
|
|
{
|
|
name: "odd-alignment",
|
|
mkfsOpt: "-d sunit=1024,swidth=1024",
|
|
fsMountOpt: "noalign",
|
|
useFailpoint: true,
|
|
},
|
|
{
|
|
name: "openshift-sno-options",
|
|
mkfsOpt: "-m bigtime=1,finobt=1,rmapbt=0,reflink=1 -i sparse=1 -l lazy-count=1",
|
|
// openshift also supplies seclabel,relatime,prjquota on RHEL, but that's not supported on our CI
|
|
// prjquota is only unsupported on our ARM runners.
|
|
// You can find more information in either the man page with `man xfs` or `man mkfs.xfs`.
|
|
// Also refer to https://man7.org/linux/man-pages/man8/mkfs.xfs.8.html.
|
|
fsMountOpt: "rw,attr2,inode64,logbufs=8,logbsize=32k",
|
|
useFailpoint: true,
|
|
},
|
|
} {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
t.Logf("mkfs opts: %s", tc.mkfsOpt)
|
|
t.Logf("mount opts: %s", tc.fsMountOpt)
|
|
doPowerFailure(t, 5*time.Second, dmflakey.FSTypeXFS, tc.mkfsOpt, tc.fsMountOpt, tc.useFailpoint)
|
|
})
|
|
}
|
|
}
|
|
|
|
func doPowerFailure(t *testing.T, du time.Duration, fsType dmflakey.FSType, mkfsOpt string, fsMountOpt string, useFailpoint bool) {
|
|
flakey := initFlakeyDevice(t, strings.Replace(t.Name(), "/", "_", -1), fsType, mkfsOpt, fsMountOpt)
|
|
root := flakey.RootFS()
|
|
|
|
dbPath := filepath.Join(root, "boltdb")
|
|
|
|
args := []string{"bbolt", "bench",
|
|
"-work", // keep the database
|
|
"-path", dbPath,
|
|
"-count=1000000000",
|
|
"-batch-size=5", // separate total count into multiple truncation
|
|
"-value-size=512",
|
|
}
|
|
|
|
logPath := filepath.Join(t.TempDir(), fmt.Sprintf("%s.log", t.Name()))
|
|
require.NoError(t, os.MkdirAll(path.Dir(logPath), 0600))
|
|
|
|
logFd, err := os.Create(logPath)
|
|
require.NoError(t, err)
|
|
defer logFd.Close()
|
|
|
|
fpURL := "127.0.0.1:12345"
|
|
|
|
cmd := exec.Command(args[0], args[1:]...)
|
|
cmd.Stdout = logFd
|
|
cmd.Stderr = logFd
|
|
cmd.Env = append(cmd.Env, "GOFAIL_HTTP="+fpURL)
|
|
t.Logf("start %s", strings.Join(args, " "))
|
|
require.NoError(t, cmd.Start(), "args: %v", args)
|
|
|
|
errCh := make(chan error, 1)
|
|
go func() {
|
|
errCh <- cmd.Wait()
|
|
}()
|
|
|
|
defer func() {
|
|
if t.Failed() {
|
|
logData, err := os.ReadFile(logPath)
|
|
assert.NoError(t, err)
|
|
t.Logf("dump log:\n: %s", string(logData))
|
|
}
|
|
}()
|
|
|
|
time.Sleep(du)
|
|
t.Logf("simulate power failure")
|
|
|
|
if useFailpoint {
|
|
fpURL = "http://" + fpURL
|
|
targetFp := panicFailpoints[randomInt(t, math.MaxInt32)%len(panicFailpoints)]
|
|
t.Logf("random pick failpoint: %s", targetFp)
|
|
activeFailpoint(t, fpURL, targetFp, "panic")
|
|
} else {
|
|
t.Log("kill bbolt")
|
|
assert.NoError(t, cmd.Process.Kill())
|
|
}
|
|
|
|
select {
|
|
case <-time.After(10 * time.Second):
|
|
t.Log("bbolt is supposed to be already stopped, but actually not yet; forcibly kill it")
|
|
assert.NoError(t, cmd.Process.Kill())
|
|
case err := <-errCh:
|
|
require.Error(t, err)
|
|
}
|
|
require.NoError(t, flakey.PowerFailure(""))
|
|
|
|
st, err := os.Stat(dbPath)
|
|
require.NoError(t, err)
|
|
t.Logf("db size: %d", st.Size())
|
|
|
|
t.Logf("verify data")
|
|
output, err := exec.Command("bbolt", "check", dbPath).CombinedOutput()
|
|
require.NoError(t, err, "bbolt check output: %s", string(output))
|
|
}
|
|
|
|
// activeFailpoint actives the failpoint by http.
|
|
func activeFailpoint(t *testing.T, targetUrl string, fpName, fpVal string) {
|
|
u, err := url.JoinPath(targetUrl, fpName)
|
|
require.NoError(t, err, "parse url %s", targetUrl)
|
|
|
|
req, err := http.NewRequest("PUT", u, bytes.NewBuffer([]byte(fpVal)))
|
|
require.NoError(t, err)
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
require.NoError(t, err)
|
|
defer resp.Body.Close()
|
|
|
|
data, err := io.ReadAll(resp.Body)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 204, resp.StatusCode, "response body: %s", string(data))
|
|
}
|
|
|
|
// FlakeyDevice extends dmflakey.Flakey interface.
|
|
type FlakeyDevice interface {
|
|
// RootFS returns root filesystem.
|
|
RootFS() string
|
|
|
|
// PowerFailure simulates power failure with drop all the writes.
|
|
PowerFailure(mntOpt string) error
|
|
|
|
dmflakey.Flakey
|
|
}
|
|
|
|
// initFlakeyDevice returns FlakeyDevice instance with a given filesystem.
|
|
func initFlakeyDevice(t *testing.T, name string, fsType dmflakey.FSType, mkfsOpt string, mntOpt string) FlakeyDevice {
|
|
imgDir := t.TempDir()
|
|
|
|
flakey, err := dmflakey.InitFlakey(name, imgDir, fsType, mkfsOpt)
|
|
require.NoError(t, err, "init flakey %s", name)
|
|
t.Cleanup(func() {
|
|
assert.NoError(t, flakey.Teardown())
|
|
})
|
|
|
|
rootDir := t.TempDir()
|
|
err = unix.Mount(flakey.DevicePath(), rootDir, string(fsType), 0, mntOpt)
|
|
require.NoError(t, err, "init rootfs on %s", rootDir)
|
|
|
|
t.Cleanup(func() { assert.NoError(t, unmountAll(rootDir)) })
|
|
|
|
return &flakeyT{
|
|
Flakey: flakey,
|
|
|
|
rootDir: rootDir,
|
|
mntOpt: mntOpt,
|
|
}
|
|
}
|
|
|
|
type flakeyT struct {
|
|
dmflakey.Flakey
|
|
|
|
rootDir string
|
|
mntOpt string
|
|
}
|
|
|
|
// RootFS returns root filesystem.
|
|
func (f *flakeyT) RootFS() string {
|
|
return f.rootDir
|
|
}
|
|
|
|
// PowerFailure simulates power failure with drop all the writes.
|
|
func (f *flakeyT) PowerFailure(mntOpt string) error {
|
|
if err := f.DropWrites(); err != nil {
|
|
return fmt.Errorf("failed to drop_writes: %w", err)
|
|
}
|
|
|
|
if err := unmountAll(f.rootDir); err != nil {
|
|
return fmt.Errorf("failed to unmount rootfs %s: %w", f.rootDir, err)
|
|
}
|
|
|
|
if mntOpt == "" {
|
|
mntOpt = f.mntOpt
|
|
}
|
|
|
|
if err := f.AllowWrites(); err != nil {
|
|
return fmt.Errorf("failed to allow_writes: %w", err)
|
|
}
|
|
|
|
if err := unix.Mount(f.DevicePath(), f.rootDir, string(f.Filesystem()), 0, mntOpt); err != nil {
|
|
return fmt.Errorf("failed to mount rootfs %s (%s): %w", f.rootDir, mntOpt, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func unmountAll(target string) error {
|
|
for i := 0; i < 50; i++ {
|
|
if err := unix.Unmount(target, 0); err != nil {
|
|
switch err {
|
|
case unix.EBUSY:
|
|
time.Sleep(500 * time.Millisecond)
|
|
continue
|
|
case unix.EINVAL:
|
|
return nil
|
|
default:
|
|
return fmt.Errorf("failed to umount %s: %w", target, err)
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
return fmt.Errorf("failed to umount %s: %w", target, unix.EBUSY)
|
|
}
|
|
|
|
func randomInt(t *testing.T, max int) int {
|
|
n, err := rand.Int(rand.Reader, big.NewInt(int64(max)))
|
|
assert.NoError(t, err)
|
|
return int(n.Int64())
|
|
}
|