tests/*: introduce go-dmflakey

This commit is to add go-binding for go-dmflakey. It's used to simulate
powerfailure with common filesystems.

Signed-off-by: Wei Fu <fuweid89@gmail.com>
pull/628/head
Wei Fu 2023-12-05 21:29:07 +08:00
parent 04846951a3
commit b05bec2fe3
6 changed files with 722 additions and 0 deletions

View File

@ -15,5 +15,6 @@ jobs:
with:
go-version: ${{ steps.goversion.outputs.goversion }}
- run: |
sudo make root-test
make gofail-enable
make test-failpoint

View File

@ -81,3 +81,6 @@ test-failpoint:
@echo "[failpoint] array freelist test"
BBOLT_VERIFY=all TEST_FREELIST_TYPE=array go test -v ${TESTFLAGS} -timeout 30m ./tests/failpoint
.PHONY: root-test # run tests that require root
root-test:
go test -v ${TESTFLAGS} ./tests/dmflakey -test.root

322
tests/dmflakey/dmflakey.go Normal file
View File

@ -0,0 +1,322 @@
//go:build linux
package dmflakey
import (
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"golang.org/x/sys/unix"
)
type featCfg struct {
// SyncFS attempts to synchronize filesystem before inject failure.
syncFS bool
// interval is used to determine the up time for feature.
//
// For AllowWrites, it means that the device is available for `interval` seconds.
// For Other features, the device exhibits unreliable behaviour for
// `interval` seconds.
interval time.Duration
}
// Default values.
const (
// defaultImgSize is the default size for filesystem image.
defaultImgSize int64 = 1024 * 1024 * 1024 * 10 // 10 GiB
// defaultInterval is the default interval for the up time of feature.
defaultInterval = 2 * time.Minute
)
// defaultFeatCfg is the default setting for flakey feature.
var defaultFeatCfg = featCfg{interval: defaultInterval}
// FeatOpt is used to configure failure feature.
type FeatOpt func(*featCfg)
// WithIntervalFeatOpt updates the up time for the feature.
func WithIntervalFeatOpt(interval time.Duration) FeatOpt {
return func(cfg *featCfg) {
cfg.interval = interval
}
}
// WithSyncFSFeatOpt is to determine if the caller wants to synchronize
// filesystem before inject failure.
func WithSyncFSFeatOpt(syncFS bool) FeatOpt {
return func(cfg *featCfg) {
cfg.syncFS = syncFS
}
}
// Flakey is to inject failure into device.
type Flakey interface {
// DevicePath returns the flakey device path.
DevicePath() string
// Filesystem returns filesystem's type.
Filesystem() FSType
// AllowWrites allows write I/O.
AllowWrites(opts ...FeatOpt) error
// DropWrites drops all write I/O silently.
DropWrites(opts ...FeatOpt) error
// ErrorWrites drops all write I/O and returns error.
ErrorWrites(opts ...FeatOpt) error
// Teardown releases the flakey device.
Teardown() error
}
// FSType represents the filesystem name.
type FSType string
// Supported filesystems.
const (
FSTypeEXT4 FSType = "ext4"
FSTypeXFS FSType = "xfs"
)
// InitFlakey creates an filesystem on a loopback device and returns Flakey on it.
//
// The device-mapper device will be /dev/mapper/$flakeyDevice. And the filesystem
// image will be created at $dataStorePath/$flakeyDevice.img. By default, the
// device is available for 2 minutes and size is 10 GiB.
func InitFlakey(flakeyDevice, dataStorePath string, fsType FSType) (_ Flakey, retErr error) {
imgPath := filepath.Join(dataStorePath, fmt.Sprintf("%s.img", flakeyDevice))
if err := createEmptyFSImage(imgPath, fsType); err != nil {
return nil, err
}
defer func() {
if retErr != nil {
os.RemoveAll(imgPath)
}
}()
loopDevice, err := attachToLoopDevice(imgPath)
if err != nil {
return nil, err
}
defer func() {
if retErr != nil {
_ = detachLoopDevice(loopDevice)
}
}()
imgSize, err := getBlkSize(loopDevice)
if err != nil {
return nil, err
}
if err := newFlakeyDevice(flakeyDevice, loopDevice, defaultInterval); err != nil {
return nil, err
}
return &flakey{
fsType: fsType,
imgPath: imgPath,
imgSize: imgSize,
loopDevice: loopDevice,
flakeyDevice: flakeyDevice,
}, nil
}
type flakey struct {
fsType FSType
imgPath string
imgSize int64
loopDevice string
flakeyDevice string
}
// DevicePath returns the flakey device path.
func (f *flakey) DevicePath() string {
return fmt.Sprintf("/dev/mapper/%s", f.flakeyDevice)
}
// Filesystem returns filesystem's type.
func (f *flakey) Filesystem() FSType {
return f.fsType
}
// AllowWrites allows write I/O.
func (f *flakey) AllowWrites(opts ...FeatOpt) error {
var o = defaultFeatCfg
for _, opt := range opts {
opt(&o)
}
// NOTE: Table parameters
//
// 0 imgSize flakey <dev path> <offset> <up interval> <down interval> [<num_features> [<feature arguments>]]
//
// Mandatory parameters:
//
// <dev path>: Full pathname to the underlying block-device, or a "major:minor" device-number.
// <offset>: Starting sector within the device.
// <up interval>: Number of seconds device is available.
// <down interval>: Number of seconds device returns errors.
//
// Optional:
//
// If no feature parameters are present, during the periods of unreliability, all I/O returns errors.
//
// For AllowWrites, the device will handle data correctly in `interval` seconds.
//
// REF: https://docs.kernel.org/admin-guide/device-mapper/dm-flakey.html.
table := fmt.Sprintf("0 %d flakey %s 0 %d 0",
f.imgSize, f.loopDevice, int(o.interval.Seconds()))
return reloadFlakeyDevice(f.flakeyDevice, o.syncFS, table)
}
// DropWrites drops all write I/O silently.
func (f *flakey) DropWrites(opts ...FeatOpt) error {
var o = defaultFeatCfg
for _, opt := range opts {
opt(&o)
}
// NOTE: Table parameters
//
// 0 imgSize flakey <dev path> <offset> <up interval> <down interval> [<num_features> [<feature arguments>]]
//
// Mandatory parameters:
//
// <dev path>: Full pathname to the underlying block-device, or a "major:minor" device-number.
// <offset>: Starting sector within the device.
// <up interval>: Number of seconds device is available.
// <down interval>: Number of seconds device returns errors.
//
// Optional:
//
// <num_features>: How many arguments (length of <feature_arguments>)
//
// For DropWrites,
//
// num_features: 1 (there is only one argument)
// feature_arguments: drop_writes
//
// The Device will drop all the writes into disk in `interval` seconds.
// Read I/O is handled correctly.
//
// For example, the application calls fsync, all the dirty pages will
// be flushed into disk ideally. But during DropWrites, device will
// ignore all the data and return successfully. It can be used to
// simulate data-loss after power failure.
//
// REF: https://docs.kernel.org/admin-guide/device-mapper/dm-flakey.html.
table := fmt.Sprintf("0 %d flakey %s 0 0 %d 1 drop_writes",
f.imgSize, f.loopDevice, int(o.interval.Seconds()))
return reloadFlakeyDevice(f.flakeyDevice, o.syncFS, table)
}
// ErrorWrites drops all write I/O and returns error.
func (f *flakey) ErrorWrites(opts ...FeatOpt) error {
var o = defaultFeatCfg
for _, opt := range opts {
opt(&o)
}
// NOTE: Table parameters
//
// 0 imgSize flakey <dev path> <offset> <up interval> <down interval> [<num_features> [<feature arguments>]]
//
// Mandatory parameters:
//
// <dev path>: Full pathname to the underlying block-device, or a "major:minor" device-number.
// <offset>: Starting sector within the device.
// <up interval>: Number of seconds device is available.
// <down interval>: Number of seconds device returns errors.
//
// Optional:
//
// <num_features>: How many arguments (length of <feature_arguments>)
//
// For ErrorWrites,
//
// num_features: 1 (there is only one argument)
// feature_arguments: error_writes
//
// The Device will drop all the writes into disk in `interval` seconds
// and return failure to caller. Read I/O is handled correctly.
//
// REF: https://docs.kernel.org/admin-guide/device-mapper/dm-flakey.html.
table := fmt.Sprintf("0 %d flakey %s 0 0 %d 1 error_writes",
f.imgSize, f.loopDevice, int(o.interval.Seconds()))
return reloadFlakeyDevice(f.flakeyDevice, o.syncFS, table)
}
// Teardown releases the flakey device.
func (f *flakey) Teardown() error {
if err := deleteFlakeyDevice(f.flakeyDevice); err != nil {
if !strings.Contains(err.Error(), "No such device or address") {
return err
}
}
if err := detachLoopDevice(f.loopDevice); err != nil {
if !errors.Is(err, unix.ENXIO) {
return err
}
}
return os.RemoveAll(f.imgPath)
}
// createEmptyFSImage creates empty filesystem on dataStorePath folder with
// default size - 10 GiB.
func createEmptyFSImage(imgPath string, fsType FSType) error {
if err := validateFSType(fsType); err != nil {
return err
}
mkfs, err := exec.LookPath(fmt.Sprintf("mkfs.%s", fsType))
if err != nil {
return fmt.Errorf("failed to ensure mkfs.%s: %w", fsType, err)
}
if _, err := os.Stat(imgPath); err == nil {
return fmt.Errorf("failed to create image because %s already exists", imgPath)
}
f, err := os.Create(imgPath)
if err != nil {
return fmt.Errorf("failed to create image %s: %w", imgPath, err)
}
if err = func() error {
defer f.Close()
return f.Truncate(defaultImgSize)
}(); err != nil {
return fmt.Errorf("failed to truncate image %s with %v bytes: %w",
imgPath, defaultImgSize, err)
}
output, err := exec.Command(mkfs, imgPath).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to mkfs.%s on %s (out: %s): %w",
fsType, imgPath, string(output), err)
}
return nil
}
// validateFSType validates the fs type input.
func validateFSType(fsType FSType) error {
switch fsType {
case FSTypeEXT4, FSTypeXFS:
return nil
default:
return fmt.Errorf("unsupported filesystem %s", fsType)
}
}

View File

@ -0,0 +1,200 @@
//go:build linux
package dmflakey
import (
"errors"
"flag"
"fmt"
"os"
"os/exec"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/sys/unix"
)
var enableRoot bool
func init() {
flag.BoolVar(&enableRoot, "test.root", false, "enable tests that require root")
}
func TestMain(m *testing.M) {
flag.Parse()
requiresRoot()
os.Exit(m.Run())
}
func requiresRoot() {
if !enableRoot {
fmt.Fprintln(os.Stderr, "Skip tests that require root")
os.Exit(0)
}
if os.Getuid() != 0 {
fmt.Fprintln(os.Stderr, "This test must be run as root.")
os.Exit(1)
}
}
func TestBasic(t *testing.T) {
tmpDir := t.TempDir()
flakey, err := InitFlakey("go-dmflakey", tmpDir, FSTypeEXT4)
require.NoError(t, err, "init flakey")
defer func() {
assert.NoError(t, flakey.Teardown())
}()
target := filepath.Join(tmpDir, "root")
require.NoError(t, os.MkdirAll(target, 0600))
require.NoError(t, mount(target, flakey.DevicePath(), ""))
defer func() {
assert.NoError(t, unmount(target))
}()
file := filepath.Join(target, "test")
assert.NoError(t, writeFile(file, []byte("hello, world"), 0600, true))
assert.NoError(t, unmount(target))
assert.NoError(t, flakey.Teardown())
}
func TestDropWrites(t *testing.T) {
flakey, root := initFlakey(t, FSTypeEXT4)
// commit=1000 is to delay commit triggered by writeback thread
require.NoError(t, mount(root, flakey.DevicePath(), "commit=1000"))
// ensure testdir/f1 is synced.
target := filepath.Join(root, "testdir")
require.NoError(t, os.MkdirAll(target, 0600))
f1 := filepath.Join(target, "f1")
assert.NoError(t, writeFile(f1, []byte("hello, world from f1"), 0600, false))
require.NoError(t, syncfs(f1))
// testdir/f2 is created but without fsync
f2 := filepath.Join(target, "f2")
assert.NoError(t, writeFile(f2, []byte("hello, world from f2"), 0600, false))
// simulate power failure
assert.NoError(t, flakey.DropWrites())
assert.NoError(t, unmount(root))
assert.NoError(t, flakey.AllowWrites())
require.NoError(t, mount(root, flakey.DevicePath(), ""))
data, err := os.ReadFile(f1)
assert.NoError(t, err)
assert.Equal(t, "hello, world from f1", string(data))
_, err = os.ReadFile(f2)
assert.True(t, errors.Is(err, os.ErrNotExist))
}
func TestErrorWrites(t *testing.T) {
flakey, root := initFlakey(t, FSTypeEXT4)
// commit=1000 is to delay commit triggered by writeback thread
require.NoError(t, mount(root, flakey.DevicePath(), "commit=1000"))
// inject IO failure on write
assert.NoError(t, flakey.ErrorWrites())
f1 := filepath.Join(root, "f1")
err := writeFile(f1, []byte("hello, world during failpoint"), 0600, true)
assert.ErrorContains(t, err, "input/output error")
// resume
assert.NoError(t, flakey.AllowWrites())
err = writeFile(f1, []byte("hello, world"), 0600, true)
assert.NoError(t, err)
assert.NoError(t, unmount(root))
require.NoError(t, mount(root, flakey.DevicePath(), ""))
data, err := os.ReadFile(f1)
assert.NoError(t, err)
assert.Equal(t, "hello, world", string(data))
}
func initFlakey(t *testing.T, fsType FSType) (_ Flakey, root string) {
tmpDir := t.TempDir()
target := filepath.Join(tmpDir, "root")
require.NoError(t, os.MkdirAll(target, 0600))
flakey, err := InitFlakey("go-dmflakey", tmpDir, FSTypeEXT4)
require.NoError(t, err, "init flakey")
t.Cleanup(func() {
assert.NoError(t, unmount(target))
assert.NoError(t, flakey.Teardown())
})
return flakey, target
}
func writeFile(name string, data []byte, perm os.FileMode, sync bool) error {
f, err := os.OpenFile(name, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm)
if err != nil {
return err
}
defer f.Close()
if _, err = f.Write(data); err != nil {
return err
}
if sync {
return f.Sync()
}
return nil
}
func syncfs(file string) error {
f, err := os.Open(file)
if err != nil {
return fmt.Errorf("failed to open %s: %w", file, err)
}
defer f.Close()
_, _, errno := unix.Syscall(unix.SYS_SYNCFS, uintptr(f.Fd()), 0, 0)
if errno != 0 {
return errno
}
return nil
}
func mount(target string, devPath string, opt string) error {
args := []string{"-o", opt, devPath, target}
output, err := exec.Command("mount", args...).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to mount (args: %v) (out: %s): %w",
args, string(output), err)
}
return nil
}
func unmount(target string) error {
for i := 0; i < 50; i++ {
if err := unix.Unmount(target, 0); err != nil {
switch err {
case unix.EBUSY:
time.Sleep(500 * time.Millisecond)
continue
case unix.EINVAL:
default:
return fmt.Errorf("failed to umount %s: %w", target, err)
}
}
return nil
}
return unix.EBUSY
}

105
tests/dmflakey/dmsetup.go Normal file
View File

@ -0,0 +1,105 @@
//go:build linux
package dmflakey
import (
"fmt"
"os"
"os/exec"
"time"
"unsafe"
"golang.org/x/sys/unix"
)
// newFlakeyDevice creates flakey device.
//
// REF: https://docs.kernel.org/admin-guide/device-mapper/dm-flakey.html
func newFlakeyDevice(flakeyDevice, loopDevice string, interval time.Duration) error {
loopSize, err := getBlkSize(loopDevice)
if err != nil {
return fmt.Errorf("failed to get the size of the loop device %s: %w", loopDevice, err)
}
// The flakey device will be available in interval.Seconds().
table := fmt.Sprintf("0 %d flakey %s 0 %d 0",
loopSize, loopDevice, int(interval.Seconds()))
args := []string{"create", flakeyDevice, "--table", table}
output, err := exec.Command("dmsetup", args...).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to create flakey device %s with table %s (out: %s): %w",
flakeyDevice, table, string(output), err)
}
return nil
}
// reloadFlakeyDevice reloads the flakey device with feature table.
func reloadFlakeyDevice(flakeyDevice string, syncFS bool, table string) (retErr error) {
args := []string{"suspend", "--nolockfs", flakeyDevice}
if syncFS {
args[1] = flakeyDevice
args = args[:len(args)-1]
}
output, err := exec.Command("dmsetup", args...).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to suspend flakey device %s (out: %s): %w",
flakeyDevice, string(output), err)
}
defer func() {
output, derr := exec.Command("dmsetup", "resume", flakeyDevice).CombinedOutput()
if derr != nil {
derr = fmt.Errorf("failed to resume flakey device %s (out: %s): %w",
flakeyDevice, string(output), derr)
}
if retErr == nil {
retErr = derr
}
}()
output, err = exec.Command("dmsetup", "load", flakeyDevice, "--table", table).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to reload flakey device %s with table (%s) (out: %s): %w",
flakeyDevice, table, string(output), err)
}
return nil
}
// removeFlakeyDevice removes flakey device.
func deleteFlakeyDevice(flakeyDevice string) error {
output, err := exec.Command("dmsetup", "remove", flakeyDevice).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to remove flakey device %s (out: %s): %w",
flakeyDevice, string(output), err)
}
return nil
}
// getBlkSize64 gets device size in bytes (BLKGETSIZE64).
//
// REF: https://man7.org/linux/man-pages/man8/blockdev.8.html
func getBlkSize64(device string) (int64, error) {
deviceFd, err := os.Open(device)
if err != nil {
return 0, fmt.Errorf("failed to open device %s: %w", device, err)
}
defer deviceFd.Close()
var size int64
if _, _, err := unix.Syscall(unix.SYS_IOCTL, deviceFd.Fd(), unix.BLKGETSIZE64, uintptr(unsafe.Pointer(&size))); err != 0 {
return 0, fmt.Errorf("failed to get block size: %w", err)
}
return size, nil
}
// getBlkSize gets size in 512-byte sectors (BLKGETSIZE64 / 512).
//
// REF: https://man7.org/linux/man-pages/man8/blockdev.8.html
func getBlkSize(device string) (int64, error) {
size, err := getBlkSize64(device)
return size / 512, err
}

View File

@ -0,0 +1,91 @@
//go:build linux
package dmflakey
import (
"errors"
"fmt"
"os"
"time"
"golang.org/x/sys/unix"
)
const (
loopControlDevice = "/dev/loop-control"
loopDevicePattern = "/dev/loop%d"
maxRetryToAttach = 50
)
// attachToLoopDevice associates free loop device with backing file.
//
// There might have race condition. It needs to retry when it runs into EBUSY.
//
// REF: https://man7.org/linux/man-pages/man4/loop.4.html
func attachToLoopDevice(backingFile string) (string, error) {
backingFd, err := os.OpenFile(backingFile, os.O_RDWR, 0)
if err != nil {
return "", fmt.Errorf("failed to open loop device's backing file %s: %w",
backingFile, err)
}
defer backingFd.Close()
for i := 0; i < maxRetryToAttach; i++ {
loop, err := getFreeLoopDevice()
if err != nil {
return "", fmt.Errorf("failed to get free loop device: %w", err)
}
err = func() error {
loopFd, err := os.OpenFile(loop, os.O_RDWR, 0)
if err != nil {
return err
}
defer loopFd.Close()
return unix.IoctlSetInt(int(loopFd.Fd()),
unix.LOOP_SET_FD, int(backingFd.Fd()))
}()
if err != nil {
if errors.Is(err, unix.EBUSY) {
time.Sleep(500 * time.Millisecond)
continue
}
return "", err
}
return loop, nil
}
return "", fmt.Errorf("failed to associate free loop device with backing file %s after retry %v",
backingFile, maxRetryToAttach)
}
// detachLoopDevice disassociates the loop device from any backing file.
//
// REF: https://man7.org/linux/man-pages/man4/loop.4.html
func detachLoopDevice(loopDevice string) error {
loopFd, err := os.Open(loopDevice)
if err != nil {
return fmt.Errorf("failed to open loop %s: %w", loopDevice, err)
}
defer loopFd.Close()
return unix.IoctlSetInt(int(loopFd.Fd()), unix.LOOP_CLR_FD, 0)
}
// getFreeLoopbackDevice allocates or finds a free loop device for use.
//
// REF: https://man7.org/linux/man-pages/man4/loop.4.html
func getFreeLoopDevice() (string, error) {
control, err := os.OpenFile(loopControlDevice, os.O_RDWR, 0)
if err != nil {
return "", fmt.Errorf("failed to open %s: %w", loopControlDevice, err)
}
idx, err := unix.IoctlRetInt(int(control.Fd()), unix.LOOP_CTL_GET_FREE)
control.Close()
if err != nil {
return "", fmt.Errorf("failed to get free loop device number: %w", err)
}
return fmt.Sprintf(loopDevicePattern, idx), nil
}