Merge pull request #658 from fuweid/introduce-nightly-run

test: introduce nightly job for robustness test
pull/663/head
Benjamin Wang 2024-01-03 06:39:36 +00:00 committed by GitHub
commit 1d7fd9af3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 146 additions and 19 deletions

View File

@ -0,0 +1,17 @@
---
name: Robustness Nightly
permissions: read-all
on:
schedule:
- cron: '25 9 * * *' # runs every day at 09:25 UTC
# workflow_dispatch enables manual testing of this job by maintainers
workflow_dispatch:
jobs:
main:
# GHA has a maximum amount of 6h execution time, we try to get done within 3h
uses: ./.github/workflows/robustness_template.yaml
with:
count: 100
testTimeout: 200m
runs-on: "['ubuntu-latest-8-cores']"

View File

@ -0,0 +1,38 @@
---
name: Reusable Robustness Workflow
on:
workflow_call:
inputs:
count:
required: true
type: number
testTimeout:
required: false
type: string
default: '30m'
runs-on:
required: false
type: string
default: "['ubuntu-latest']"
permissions: read-all
jobs:
test:
timeout-minutes: 210
runs-on: ${{ fromJson(inputs.runs-on) }}
steps:
- uses: actions/checkout@v4
- id: goversion
run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
- uses: actions/setup-go@v5
with:
go-version: ${{ steps.goversion.outputs.goversion }}
- name: test-robustness
run: |
set -euo pipefail
make gofail-enable
# build bbolt with failpoint
go install ./cmd/bbolt
sudo -E PATH=$PATH make ROBUSTNESS_TESTFLAGS="--count ${{ inputs.count }} --timeout ${{ inputs.testTimeout }} -failfast" test-robustness

View File

@ -3,16 +3,8 @@ on: [push, pull_request]
permissions: read-all
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- id: goversion
run: echo "goversion=$(cat .go-version)" >> "$GITHUB_OUTPUT"
- uses: actions/setup-go@v5
with:
go-version: ${{ steps.goversion.outputs.goversion }}
- run: |
make gofail-enable
# build bbolt with failpoint
go install ./cmd/bbolt
sudo -E PATH=$PATH make test-robustness
uses: ./.github/workflows/robustness_template.yaml
with:
count: 10
testTimeout: 30m
runs-on: "['ubuntu-latest-8-cores']"

View File

@ -84,4 +84,4 @@ test-failpoint:
.PHONY: test-robustness # Running robustness tests requires root permission
test-robustness:
go test -v ${TESTFLAGS} ./tests/dmflakey -test.root
go test -v ${TESTFLAGS} ./tests/robustness -test.root
go test -v ${TESTFLAGS} ${ROBUSTNESS_TESTFLAGS} ./tests/robustness -test.root

View File

@ -7,6 +7,7 @@ import (
"fmt"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"time"
@ -289,6 +290,10 @@ func createEmptyFSImage(imgPath string, fsType FSType) error {
return fmt.Errorf("failed to create image because %s already exists", imgPath)
}
if err := os.MkdirAll(path.Dir(imgPath), 0600); err != nil {
return fmt.Errorf("failed to ensure parent directory %s: %w", path.Dir(imgPath), err)
}
f, err := os.Create(imgPath)
if err != nil {
return fmt.Errorf("failed to create image %s: %w", imgPath, err)

View File

@ -4,8 +4,11 @@ package robustness
import (
"bytes"
"crypto/rand"
"fmt"
"io"
"math"
"math/big"
"net/http"
"net/url"
"os"
@ -23,9 +26,65 @@ import (
"golang.org/x/sys/unix"
)
var panicFailpoints = []string{
"beforeSyncDataPages",
"beforeSyncMetaPage",
"lackOfDiskSpace",
"mapError",
"resizeFileError",
"unmapError",
}
// TestRestartFromPowerFailure is to test data after unexpected power failure.
func TestRestartFromPowerFailure(t *testing.T) {
flakey := initFlakeyDevice(t, t.Name(), dmflakey.FSTypeEXT4, "")
for _, tc := range []struct {
name string
du time.Duration
fsMountOpt string
useFailpoint bool
}{
{
name: "fp_ext4_commit5s",
du: 5 * time.Second,
fsMountOpt: "commit=5",
useFailpoint: true,
},
{
name: "fp_ext4_commit1s",
du: 10 * time.Second,
fsMountOpt: "commit=1",
useFailpoint: true,
},
{
name: "fp_ext4_commit1000s",
du: 10 * time.Second,
fsMountOpt: "commit=1000",
useFailpoint: true,
},
{
name: "kill_ext4_commit5s",
du: 5 * time.Second,
fsMountOpt: "commit=5",
},
{
name: "kill_ext4_commit1s",
du: 10 * time.Second,
fsMountOpt: "commit=1",
},
{
name: "kill_ext4_commit1000s",
du: 10 * time.Second,
fsMountOpt: "commit=1000",
},
} {
t.Run(tc.name, func(t *testing.T) {
doPowerFailure(t, tc.du, tc.fsMountOpt, tc.useFailpoint)
})
}
}
func doPowerFailure(t *testing.T, du time.Duration, fsMountOpt string, useFailpoint bool) {
flakey := initFlakeyDevice(t, strings.Replace(t.Name(), "/", "_", -1), dmflakey.FSTypeEXT4, fsMountOpt)
root := flakey.RootFS()
dbPath := filepath.Join(root, "boltdb")
@ -38,6 +97,8 @@ func TestRestartFromPowerFailure(t *testing.T) {
}
logPath := filepath.Join(t.TempDir(), fmt.Sprintf("%s.log", t.Name()))
require.NoError(t, os.MkdirAll(path.Dir(logPath), 0600))
logFd, err := os.Create(logPath)
require.NoError(t, err)
defer logFd.Close()
@ -64,10 +125,18 @@ func TestRestartFromPowerFailure(t *testing.T) {
}
}()
time.Sleep(time.Duration(time.Now().UnixNano()%5+1) * time.Second)
time.Sleep(du)
t.Logf("simulate power failure")
activeFailpoint(t, fpURL, "beforeSyncMetaPage", "panic")
if useFailpoint {
fpURL = "http://" + fpURL
targetFp := panicFailpoints[randomInt(t, math.MaxInt32)%len(panicFailpoints)]
t.Logf("random pick failpoint: %s", targetFp)
activeFailpoint(t, fpURL, targetFp, "panic")
} else {
t.Log("kill bbolt")
assert.NoError(t, cmd.Process.Kill())
}
select {
case <-time.After(10 * time.Second):
@ -89,10 +158,10 @@ func TestRestartFromPowerFailure(t *testing.T) {
// activeFailpoint actives the failpoint by http.
func activeFailpoint(t *testing.T, targetUrl string, fpName, fpVal string) {
u, err := url.Parse("http://" + path.Join(targetUrl, fpName))
u, err := url.JoinPath(targetUrl, fpName)
require.NoError(t, err, "parse url %s", targetUrl)
req, err := http.NewRequest("PUT", u.String(), bytes.NewBuffer([]byte(fpVal)))
req, err := http.NewRequest("PUT", u, bytes.NewBuffer([]byte(fpVal)))
require.NoError(t, err)
resp, err := http.DefaultClient.Do(req)
@ -192,3 +261,9 @@ func unmountAll(target string) error {
}
return fmt.Errorf("failed to umount %s: %w", target, unix.EBUSY)
}
func randomInt(t *testing.T, max int) int {
n, err := rand.Int(rand.Reader, big.NewInt(int64(max)))
assert.NoError(t, err)
return int(n.Int64())
}