Fail mirroring more gracefully (#34002)

* reuse recoverable error checks across mirror_pull
* add new cases for 'cannot lock ref/not our ref' (race condition in
fetch) and 'Unable to create/lock"
* move lfs sync right after commit graph write, and before other
maintenance which may fail
* try a prune for 'broken reference' as well as 'not our ref'
* always sync LFS right after commit graph write, and before other
maintenance which may fail

This handles a few cases where our very large and very active
repositories could serve mirrored git refs, but be missing lfs files:

## Case 1 (multiple variants): Race condition in git fetch
There was already a check for 'unable to resolve reference' on a failed
git fetch, after which a git prune and then subsequent fetch are
performed. This is to work around a race condition where the git remote
tells Gitea about a ref for some HEAD of a branch, then fails a few
seconds later because the remote branch was deleted, or the ref was
updated (force push).

There are two more variants to the error message you can get, but for
the same kind of race condition. These *may* be related to the git
binary version Gitea has access to (in my case, it was 2.48.1).

## Case 2: githttp.go can serve updated git refs before it's synced lfs
oids

There is probably a more aggressive refactor we could do here to have
the cat-file loop use FETCH_HEAD instead of relying on the commit graphs
to be committed locally (and thus serveable to clients of Gitea), but a
simple reduction in the occurrences of this for me was to move the lfs
sync block immediately after the commit-graph write and before any other
time-consuming (or potentially erroring/exiting) blocks.

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
pull/33858/head^2
Royce Remer 2025-03-26 11:04:46 -07:00 committed by GitHub
parent c7b85f7070
commit e0ad72e223
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 57 additions and 11 deletions

View File

@ -235,6 +235,24 @@ func pruneBrokenReferences(ctx context.Context,
return pruneErr
}
// checkRecoverableSyncError takes an error message from a git fetch command and returns false if it should be a fatal/blocking error
func checkRecoverableSyncError(stderrMessage string) bool {
switch {
case strings.Contains(stderrMessage, "unable to resolve reference") && strings.Contains(stderrMessage, "reference broken"):
return true
case strings.Contains(stderrMessage, "remote error") && strings.Contains(stderrMessage, "not our ref"):
return true
case strings.Contains(stderrMessage, "cannot lock ref") && strings.Contains(stderrMessage, "but expected"):
return true
case strings.Contains(stderrMessage, "cannot lock ref") && strings.Contains(stderrMessage, "unable to resolve reference"):
return true
case strings.Contains(stderrMessage, "Unable to create") && strings.Contains(stderrMessage, ".lock"):
return true
default:
return false
}
}
// runSync returns true if sync finished without error.
func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bool) {
repoPath := m.Repo.RepoPath()
@ -275,7 +293,7 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
stdoutMessage := util.SanitizeCredentialURLs(stdout)
// Now check if the error is a resolve reference due to broken reference
if strings.Contains(stderr, "unable to resolve reference") && strings.Contains(stderr, "reference broken") {
if checkRecoverableSyncError(stderr) {
log.Warn("SyncMirrors [repo: %-v]: failed to update mirror repository due to broken references:\nStdout: %s\nStderr: %s\nErr: %v\nAttempting Prune", m.Repo, stdoutMessage, stderrMessage, err)
err = nil
@ -324,6 +342,15 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
return nil, false
}
if m.LFS && setting.LFS.StartServer {
log.Trace("SyncMirrors [repo: %-v]: syncing LFS objects...", m.Repo)
endpoint := lfs.DetermineEndpoint(remoteURL.String(), m.LFSEndpoint)
lfsClient := lfs.NewClient(endpoint, nil)
if err = repo_module.StoreMissingLfsObjectsInRepository(ctx, m.Repo, gitRepo, lfsClient); err != nil {
log.Error("SyncMirrors [repo: %-v]: failed to synchronize LFS objects for repository: %v", m.Repo, err)
}
}
log.Trace("SyncMirrors [repo: %-v]: syncing branches...", m.Repo)
if _, err = repo_module.SyncRepoBranchesWithRepo(ctx, m.Repo, gitRepo, 0); err != nil {
log.Error("SyncMirrors [repo: %-v]: failed to synchronize branches: %v", m.Repo, err)
@ -333,15 +360,6 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
if err = repo_module.SyncReleasesWithTags(ctx, m.Repo, gitRepo); err != nil {
log.Error("SyncMirrors [repo: %-v]: failed to synchronize tags to releases: %v", m.Repo, err)
}
if m.LFS && setting.LFS.StartServer {
log.Trace("SyncMirrors [repo: %-v]: syncing LFS objects...", m.Repo)
endpoint := lfs.DetermineEndpoint(remoteURL.String(), m.LFSEndpoint)
lfsClient := lfs.NewClient(endpoint, nil)
if err = repo_module.StoreMissingLfsObjectsInRepository(ctx, m.Repo, gitRepo, lfsClient); err != nil {
log.Error("SyncMirrors [repo: %-v]: failed to synchronize LFS objects for repository: %v", m.Repo, err)
}
}
gitRepo.Close()
log.Trace("SyncMirrors [repo: %-v]: updating size of repository", m.Repo)
@ -368,7 +386,7 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
stdoutMessage := util.SanitizeCredentialURLs(stdout)
// Now check if the error is a resolve reference due to broken reference
if strings.Contains(stderrMessage, "unable to resolve reference") && strings.Contains(stderrMessage, "reference broken") {
if checkRecoverableSyncError(stderrMessage) {
log.Warn("SyncMirrors [repo: %-v Wiki]: failed to update mirror wiki repository due to broken references:\nStdout: %s\nStderr: %s\nErr: %v\nAttempting Prune", m.Repo, stdoutMessage, stderrMessage, err)
err = nil

View File

@ -64,3 +64,31 @@ func Test_parseRemoteUpdateOutput(t *testing.T) {
assert.EqualValues(t, "1c97ebc746", results[9].oldCommitID)
assert.EqualValues(t, "976d27d52f", results[9].newCommitID)
}
func Test_checkRecoverableSyncError(t *testing.T) {
cases := []struct {
recoverable bool
message string
}{
// A race condition in http git-fetch where certain refs were listed on the remote and are no longer there, would exit status 128
{true, "fatal: remote error: upload-pack: not our ref 988881adc9fc3655077dc2d4d757d480b5ea0e11"},
// A race condition where a local gc/prune removes a named ref during a git-fetch would exit status 1
{true, "cannot lock ref 'refs/pull/123456/merge': unable to resolve reference 'refs/pull/134153/merge'"},
// A race condition in http git-fetch where named refs were listed on the remote and are no longer there
{true, "error: cannot lock ref 'refs/remotes/origin/foo': unable to resolve reference 'refs/remotes/origin/foo': reference broken"},
// A race condition in http git-fetch where named refs were force-pushed during the update, would exit status 128
{true, "error: cannot lock ref 'refs/pull/123456/merge': is at 988881adc9fc3655077dc2d4d757d480b5ea0e11 but expected 7f894307ffc9553edbd0b671cab829786866f7b2"},
// A race condition with other local git operations, such as git-maintenance, would exit status 128 (well, "Unable" the "U" is uppercase)
{true, "fatal: Unable to create '/data/gitea-repositories/foo-org/bar-repo.git/./objects/info/commit-graphs/commit-graph-chain.lock': File exists."},
// Missing or unauthorized credentials, would exit status 128
{false, "fatal: Authentication failed for 'https://example.com/foo-does-not-exist/bar.git/'"},
// A non-existent remote repository, would exit status 128
{false, "fatal: Could not read from remote repository."},
// A non-functioning proxy, would exit status 128
{false, "fatal: unable to access 'https://example.com/foo-does-not-exist/bar.git/': Failed to connect to configured-https-proxy port 1080 after 0 ms: Couldn't connect to server"},
}
for _, c := range cases {
assert.Equal(t, c.recoverable, checkRecoverableSyncError(c.message), "test case: %s", c.message)
}
}