From e3bf017f780ffb103a607fc4540866f3f7d16add Mon Sep 17 00:00:00 2001 From: Marko Gacesa Date: Wed, 7 Feb 2024 09:40:04 +0000 Subject: [PATCH] Rewrite of git merge (#1023) --- app/api/controller/pullreq/merge.go | 49 ++- git/adapter/branch.go | 2 +- git/adapter/commit.go | 8 +- git/adapter/last_commit_cache.go | 2 +- git/merge.go | 307 +++++++--------- git/merge/check.go | 102 ++++++ git/merge/merge.go | 209 +++++++++++ git/sharedrepo/sharedrepo.go | 527 ++++++++++++++++++++++++++++ 8 files changed, 1016 insertions(+), 190 deletions(-) create mode 100644 git/merge/check.go create mode 100644 git/merge/merge.go create mode 100644 git/sharedrepo/sharedrepo.go diff --git a/app/api/controller/pullreq/merge.go b/app/api/controller/pullreq/merge.go index 6a31acc66..e78248e80 100644 --- a/app/api/controller/pullreq/merge.go +++ b/app/api/controller/pullreq/merge.go @@ -223,6 +223,7 @@ func (c *Controller) Merge( BaseBranch: pr.TargetBranch, HeadRepoUID: sourceRepo.GitUID, HeadBranch: pr.SourceBranch, + RefType: gitenum.RefTypeUndefined, // update no refs -> no commit will be created HeadExpectedSHA: in.SourceSHA, }) if err != nil { @@ -233,7 +234,7 @@ func (c *Controller) Merge( if pr.SourceSHA != mergeOutput.HeadSHA { return errors.New("source SHA has changed") } - if mergeOutput.MergeSHA == "" || len(mergeOutput.ConflictFiles) > 0 { + if len(mergeOutput.ConflictFiles) > 0 { pr.MergeCheckStatus = enum.MergeCheckStatusConflict pr.MergeBaseSHA = mergeOutput.MergeBaseSHA pr.MergeTargetSHA = &mergeOutput.BaseSHA @@ -275,17 +276,41 @@ func (c *Controller) Merge( return nil, &types.MergeViolations{RuleViolations: violations}, nil } - // TODO: for forking merge title might be different? - var mergeTitle string - author := *session.Principal.ToPrincipalInfo() - if in.Method == enum.MergeMethodSquash { - // squash commit should show as authored by PR author - author = pr.Author - mergeTitle = fmt.Sprintf("%s (#%d)", pr.Title, pr.Number) - } else { - mergeTitle = fmt.Sprintf("Merge branch '%s' of %s (#%d)", pr.SourceBranch, sourceRepo.Path, pr.Number) + // commit details: author, committer and message + + var author *git.Identity + + switch in.Method { + case enum.MergeMethodMerge: + author = identityFromPrincipalInfo(*session.Principal.ToPrincipalInfo()) + case enum.MergeMethodSquash: + author = identityFromPrincipalInfo(pr.Author) + case enum.MergeMethodRebase: + author = nil // Not important for the rebase merge: the author info in the commits will be preserved. } + var committer *git.Identity + + switch in.Method { + case enum.MergeMethodMerge, enum.MergeMethodSquash: + committer = identityFromPrincipalInfo(*bootstrap.NewSystemServiceSession().Principal.ToPrincipalInfo()) + case enum.MergeMethodRebase: + committer = identityFromPrincipalInfo(*session.Principal.ToPrincipalInfo()) + } + + var mergeTitle string + + switch in.Method { + case enum.MergeMethodMerge: + mergeTitle = fmt.Sprintf("Merge branch '%s' of %s (#%d)", pr.SourceBranch, sourceRepo.Path, pr.Number) + case enum.MergeMethodSquash: + mergeTitle = fmt.Sprintf("%s (#%d)", pr.Title, pr.Number) + case enum.MergeMethodRebase: + mergeTitle = "" // Not used. + } + + // create merge commit(s) + log.Ctx(ctx).Debug().Msgf("all pre-check passed, merge PR") now := time.Now() @@ -296,9 +321,9 @@ func (c *Controller) Merge( HeadBranch: pr.SourceBranch, Title: mergeTitle, Message: "", - Committer: identityFromPrincipalInfo(*bootstrap.NewSystemServiceSession().Principal.ToPrincipalInfo()), + Committer: committer, CommitterDate: &now, - Author: identityFromPrincipalInfo(author), + Author: author, AuthorDate: &now, RefType: gitenum.RefTypeBranch, RefName: pr.TargetBranch, diff --git a/git/adapter/branch.go b/git/adapter/branch.go index 6f24d9758..5b775ea6f 100644 --- a/git/adapter/branch.go +++ b/git/adapter/branch.go @@ -38,7 +38,7 @@ func (a Adapter) GetBranch( } ref := GetReferenceFromBranchName(branchName) - commit, err := getCommit(ctx, repoPath, ref, "") + commit, err := GetCommit(ctx, repoPath, ref, "") if err != nil { return nil, fmt.Errorf("failed to find the commit for the branch: %w", err) } diff --git a/git/adapter/commit.go b/git/adapter/commit.go index aacfbd856..94e6b19ea 100644 --- a/git/adapter/commit.go +++ b/git/adapter/commit.go @@ -43,7 +43,7 @@ func (a Adapter) GetLatestCommit( } treePath = cleanTreePath(treePath) - return getCommit(ctx, repoPath, rev, treePath) + return GetCommit(ctx, repoPath, rev, treePath) } func getGiteaCommits( @@ -400,7 +400,7 @@ func (a Adapter) GetCommit( return nil, ErrRepositoryPathEmpty } - return getCommit(ctx, repoPath, rev, "") + return GetCommit(ctx, repoPath, rev, "") } func (a Adapter) GetFullCommitID( @@ -564,7 +564,9 @@ func parseLinesToSlice(output []byte) []string { return slice } -func getCommit( +// GetCommit returns info about a commit. +// TODO: Move this function outside of the adapter package. +func GetCommit( ctx context.Context, repoPath string, rev string, diff --git a/git/adapter/last_commit_cache.go b/git/adapter/last_commit_cache.go index 5c37d13b6..790781f26 100644 --- a/git/adapter/last_commit_cache.go +++ b/git/adapter/last_commit_cache.go @@ -121,5 +121,5 @@ func (c commitEntryGetter) Find( path = "." } - return getCommit(ctx, repoPath, commitSHA, path) + return GetCommit(ctx, repoPath, commitSHA, path) } diff --git a/git/merge.go b/git/merge.go index 649fa76f4..5a869de6c 100644 --- a/git/merge.go +++ b/git/merge.go @@ -17,15 +17,12 @@ package git import ( "context" "fmt" - "io" - "os" - "path/filepath" "strings" "time" "github.com/harness/gitness/errors" "github.com/harness/gitness/git/enum" - "github.com/harness/gitness/git/tempdir" + "github.com/harness/gitness/git/merge" "github.com/harness/gitness/git/types" "github.com/rs/zerolog/log" @@ -120,242 +117,206 @@ type MergeOutput struct { // //nolint:gocognit,gocyclo,cyclop func (s *Service) Merge(ctx context.Context, params *MergeParams) (MergeOutput, error) { - if err := params.Validate(); err != nil { - return MergeOutput{}, fmt.Errorf("Merge: params not valid: %w", err) + err := params.Validate() + if err != nil { + return MergeOutput{}, fmt.Errorf("params not valid: %w", err) } - log := log.Ctx(ctx).With().Str("repo_uid", params.RepoUID).Logger() - repoPath := getFullPathForRepo(s.reposRoot, params.RepoUID) - baseBranch := "base" - trackingBranch := "tracking" + // prepare the merge method function - pr := &types.PullRequest{ - BaseRepoPath: repoPath, - BaseBranch: params.BaseBranch, - HeadBranch: params.HeadBranch, + mergeMethod, ok := params.Method.Sanitize() + if !ok && params.Method != "" { + return MergeOutput{}, errors.InvalidArgument("Unsupported merge method: %s", params.Method) } - log.Debug().Msg("create temporary repository") + var mergeFunc merge.Func - // Clone base repo. - tmpRepo, err := s.adapter.CreateTemporaryRepoForPR(ctx, s.tmpDir, pr, baseBranch, trackingBranch) - if err != nil { - return MergeOutput{}, fmt.Errorf("Merge: failed to initialize temporary repo: %w", err) + switch mergeMethod { + case enum.MergeMethodMerge: + mergeFunc = merge.Merge + case enum.MergeMethodSquash: + mergeFunc = merge.Squash + case enum.MergeMethodRebase: + mergeFunc = merge.Rebase + default: + // should not happen, the call to Sanitize above should handle this case. + panic("unsupported merge method") } - defer func() { - rmErr := tempdir.RemoveTemporaryPath(tmpRepo.Path) - if rmErr != nil { - log.Warn().Msgf("Removing temporary location %s for merge operation was not successful", tmpRepo.Path) + + // set up the target reference + + var refPath string + var refOldValue string + + if params.RefType != enum.RefTypeUndefined { + refPath, err = GetRefPath(params.RefName, params.RefType) + if err != nil { + return MergeOutput{}, fmt.Errorf( + "failed to generate full reference for type '%s' and name '%s' for merge operation: %w", + params.RefType, params.RefName, err) } - }() - log.Debug().Msg("get merge base") + refOldValue, err = s.adapter.GetFullCommitID(ctx, repoPath, refPath) + if errors.IsNotFound(err) { + refOldValue = types.NilSHA + } else if err != nil { + return MergeOutput{}, fmt.Errorf("failed to resolve %q: %w", refPath, err) + } + } - mergeBaseCommitSHA, _, err := s.adapter.GetMergeBase(ctx, tmpRepo.Path, "origin", baseBranch, trackingBranch) + // logger + + log := log.Ctx(ctx).With(). + Str("repo_uid", params.RepoUID). + Str("head", params.HeadBranch). + Str("base", params.BaseBranch). + Str("method", string(mergeMethod)). + Str("ref", refPath). + Logger() + + // find the commit SHAs + + baseCommitSHA, err := s.adapter.GetFullCommitID(ctx, repoPath, params.BaseBranch) + if err != nil { + return MergeOutput{}, fmt.Errorf("failed to get merge base branch commit SHA: %w", err) + } + + headCommitSHA, err := s.adapter.GetFullCommitID(ctx, repoPath, params.HeadBranch) + if err != nil { + return MergeOutput{}, fmt.Errorf("failed to get merge base branch commit SHA: %w", err) + } + + if params.HeadExpectedSHA != "" && params.HeadExpectedSHA != headCommitSHA { + return MergeOutput{}, errors.PreconditionFailed( + "head branch '%s' is on SHA '%s' which doesn't match expected SHA '%s'.", + params.HeadBranch, + headCommitSHA, + params.HeadExpectedSHA) + } + + mergeBaseCommitSHA, _, err := s.adapter.GetMergeBase(ctx, repoPath, "origin", baseCommitSHA, headCommitSHA) if err != nil { return MergeOutput{}, fmt.Errorf("failed to get merge base: %w", err) } - if tmpRepo.HeadSHA == mergeBaseCommitSHA { - return MergeOutput{}, errors.InvalidArgument("no changes between head branch %s and base branch %s", - params.HeadBranch, params.BaseBranch) + if headCommitSHA == mergeBaseCommitSHA { + return MergeOutput{}, errors.InvalidArgument("head branch doesn't contain any new commits.") } - if params.HeadExpectedSHA != "" && params.HeadExpectedSHA != tmpRepo.HeadSHA { - return MergeOutput{}, errors.PreconditionFailed( - "head branch '%s' is on SHA '%s' which doesn't match expected SHA '%s'.", - params.HeadBranch, - tmpRepo.HeadSHA, - params.HeadExpectedSHA) - } + // find short stat and number of commits - log.Debug().Msg("get diff tree") - - var outbuf, errbuf strings.Builder - // Enable sparse-checkout - sparseCheckoutList, err := s.adapter.GetDiffTree(ctx, tmpRepo.Path, baseBranch, trackingBranch) + shortStat, err := s.adapter.DiffShortStat(ctx, repoPath, baseCommitSHA, headCommitSHA, true) if err != nil { - return MergeOutput{}, fmt.Errorf("execution of GetDiffTree failed: %w", err) - } - - log.Debug().Msg("prepare sparse-checkout") - - infoPath := filepath.Join(tmpRepo.Path, ".git", "info") - if err = os.MkdirAll(infoPath, fileMode700); err != nil { - return MergeOutput{}, fmt.Errorf("unable to create .git/info in tmpRepo.Path: %w", err) - } - - sparseCheckoutListPath := filepath.Join(infoPath, "sparse-checkout") - if err = os.WriteFile(sparseCheckoutListPath, []byte(sparseCheckoutList), 0o600); err != nil { - return MergeOutput{}, - fmt.Errorf("unable to write .git/info/sparse-checkout file in tmpRepo.Path: %w", err) - } - - log.Debug().Msg("get diff stats") - - shortStat, err := s.adapter.DiffShortStat(ctx, tmpRepo.Path, tmpRepo.BaseSHA, tmpRepo.HeadSHA, true) - if err != nil { - return MergeOutput{}, fmt.Errorf("execution of DiffShortStat failed: %w", err) + return MergeOutput{}, errors.Internal(err, + "failed to find short stat between %s and %s", baseCommitSHA, headCommitSHA) } changedFileCount := shortStat.Files - log.Debug().Msg("get commit divergene") - - divergences, err := s.adapter.GetCommitDivergences(ctx, tmpRepo.Path, - []types.CommitDivergenceRequest{{From: tmpRepo.HeadSHA, To: tmpRepo.BaseSHA}}, 0) + commitCount, err := merge.CommitCount(ctx, repoPath, baseCommitSHA, headCommitSHA) if err != nil { - return MergeOutput{}, fmt.Errorf("execution of GetCommitDivergences failed: %w", err) - } - commitCount := int(divergences[0].Ahead) - - log.Debug().Msg("update git configuration") - - // Switch off LFS process (set required, clean and smudge here also) - if err = s.adapter.Config(ctx, tmpRepo.Path, "filter.lfs.process", ""); err != nil { - return MergeOutput{}, err + return MergeOutput{}, fmt.Errorf("failed to find commit count for merge check: %w", err) } - if err = s.adapter.Config(ctx, tmpRepo.Path, "filter.lfs.required", "false"); err != nil { - return MergeOutput{}, err + // handle simple merge check + + if params.RefType == enum.RefTypeUndefined { + _, _, conflicts, err := merge.FindConflicts(ctx, repoPath, baseCommitSHA, headCommitSHA) + if err != nil { + return MergeOutput{}, errors.Internal(err, + "Merge check failed to find conflicts between commits %s and %s", + baseCommitSHA, headCommitSHA) + } + + log.Debug().Msg("merged check completed") + + return MergeOutput{ + BaseSHA: baseCommitSHA, + HeadSHA: headCommitSHA, + MergeBaseSHA: mergeBaseCommitSHA, + MergeSHA: "", + CommitCount: commitCount, + ChangedFileCount: changedFileCount, + ConflictFiles: conflicts, + }, nil } - if err = s.adapter.Config(ctx, tmpRepo.Path, "filter.lfs.clean", ""); err != nil { - return MergeOutput{}, err - } + // author and committer - if err = s.adapter.Config(ctx, tmpRepo.Path, "filter.lfs.smudge", ""); err != nil { - return MergeOutput{}, err - } + now := time.Now().UTC() - if err = s.adapter.Config(ctx, tmpRepo.Path, "core.sparseCheckout", "true"); err != nil { - return MergeOutput{}, err - } + committer := types.Signature{Identity: types.Identity(params.Actor), When: now} - log.Debug().Msg("read tree") - - // Read base branch index - if err = s.adapter.ReadTree(ctx, tmpRepo.Path, "HEAD", io.Discard); err != nil { - return MergeOutput{}, fmt.Errorf("failed to read tree: %w", err) - } - outbuf.Reset() - errbuf.Reset() - - committer := params.Actor if params.Committer != nil { - committer = *params.Committer + committer.Identity = types.Identity(*params.Committer) } - committerDate := time.Now().UTC() if params.CommitterDate != nil { - committerDate = *params.CommitterDate + committer.When = *params.CommitterDate } author := committer + if params.Author != nil { - author = *params.Author + author.Identity = types.Identity(*params.Author) } - authorDate := committerDate if params.AuthorDate != nil { - authorDate = *params.AuthorDate + author.When = *params.AuthorDate } - // Because this may call hooks we should pass in the environment - // TODO: merge specific envars should be set by the adapter impl. - env := append(CreateEnvironmentForPush(ctx, params.WriteParams), - "GIT_AUTHOR_NAME="+author.Name, - "GIT_AUTHOR_EMAIL="+author.Email, - "GIT_AUTHOR_DATE="+authorDate.Format(time.RFC3339), - "GIT_COMMITTER_NAME="+committer.Name, - "GIT_COMMITTER_EMAIL="+committer.Email, - "GIT_COMMITTER_DATE="+committerDate.Format(time.RFC3339), - ) + // merge message mergeMsg := strings.TrimSpace(params.Title) if len(params.Message) > 0 { mergeMsg += "\n\n" + strings.TrimSpace(params.Message) } - if params.Method == "" { - params.Method = enum.MergeMethodMerge - } + // merge - log.Debug().Msg("perform merge") - - result, err := s.adapter.Merge( + mergeCommitSHA, conflicts, err := mergeFunc( ctx, - pr, - params.Method, - baseBranch, - trackingBranch, - tmpRepo.Path, + repoPath, s.tmpDir, + &author, &committer, mergeMsg, - &types.Identity{ - Name: author.Name, - Email: author.Email, - }, - env...) + mergeBaseCommitSHA, baseCommitSHA, headCommitSHA) if err != nil { - return MergeOutput{}, fmt.Errorf("merge failed: %w", err) + return MergeOutput{}, errors.Internal(err, "failed to merge %q to %q in %q using the %q merge method.", + params.HeadBranch, params.BaseBranch, params.RepoUID, mergeMethod) } - - if len(result.ConflictFiles) > 0 { + if len(conflicts) != 0 { return MergeOutput{ - BaseSHA: tmpRepo.BaseSHA, - HeadSHA: tmpRepo.HeadSHA, + BaseSHA: baseCommitSHA, + HeadSHA: headCommitSHA, MergeBaseSHA: mergeBaseCommitSHA, MergeSHA: "", CommitCount: commitCount, ChangedFileCount: changedFileCount, - ConflictFiles: result.ConflictFiles, + ConflictFiles: conflicts, }, nil } - log.Debug().Msg("get commit id") + // git reference update - mergeCommitSHA, err := s.adapter.GetFullCommitID(ctx, tmpRepo.Path, baseBranch) + log.Trace().Msg("merge completed - updating git reference") + + err = s.adapter.UpdateRef( + ctx, + params.EnvVars, + repoPath, + refPath, + refOldValue, + mergeCommitSHA, + ) if err != nil { - return MergeOutput{}, fmt.Errorf("failed to get full commit id for the new merge: %w", err) + return MergeOutput{}, + errors.Internal(err, "failed to update branch %q after merging commits", params.HeadBranch) } - if params.RefType == enum.RefTypeUndefined { - log.Debug().Msg("done (merge-check only)") - - return MergeOutput{ - BaseSHA: tmpRepo.BaseSHA, - HeadSHA: tmpRepo.HeadSHA, - MergeBaseSHA: mergeBaseCommitSHA, - MergeSHA: mergeCommitSHA, - CommitCount: commitCount, - ChangedFileCount: changedFileCount, - ConflictFiles: nil, - }, nil - } - - refPath, err := GetRefPath(params.RefName, params.RefType) - if err != nil { - return MergeOutput{}, fmt.Errorf( - "failed to generate full reference for type '%s' and name '%s' for merge operation: %w", - params.RefType, params.RefName, err) - } - pushRef := baseBranch + ":" + refPath - - log.Debug().Msg("push to original repo") - - if err = s.adapter.Push(ctx, tmpRepo.Path, types.PushOptions{ - Remote: "origin", - Branch: pushRef, - Force: params.Force, - Env: env, - }); err != nil { - return MergeOutput{}, fmt.Errorf("failed to push merge commit to ref '%s': %w", refPath, err) - } - - log.Debug().Msg("done") + log.Trace().Msg("merge completed - git reference updated") return MergeOutput{ - BaseSHA: tmpRepo.BaseSHA, - HeadSHA: tmpRepo.HeadSHA, + BaseSHA: baseCommitSHA, + HeadSHA: headCommitSHA, MergeBaseSHA: mergeBaseCommitSHA, MergeSHA: mergeCommitSHA, CommitCount: commitCount, diff --git a/git/merge/check.go b/git/merge/check.go new file mode 100644 index 000000000..3d05df799 --- /dev/null +++ b/git/merge/check.go @@ -0,0 +1,102 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package merge + +import ( + "bytes" + "context" + "strconv" + "strings" + + "github.com/harness/gitness/errors" + "github.com/harness/gitness/git/command" + + "github.com/rs/zerolog/log" +) + +// FindConflicts checks if two git revisions are mergeable and returns list of conflict files if they are not. +func FindConflicts( + ctx context.Context, + repoPath, + base, head string, +) (mergeable bool, treeSHA string, conflicts []string, err error) { + cmd := command.New("merge-tree", + command.WithFlag("--write-tree"), + command.WithFlag("--name-only"), + command.WithFlag("--no-messages"), + command.WithFlag("--stdin")) + + stdin := base + " " + head + stdout := bytes.NewBuffer(nil) + + err = cmd.Run(ctx, + command.WithDir(repoPath), + command.WithStdin(strings.NewReader(stdin)), + command.WithStdout(stdout)) + + if err != nil { + return false, "", nil, errors.Internal(err, "Failed to find conflicts between %s and %s", base, head) + } + + output := strings.TrimSpace(stdout.String()) + output = strings.TrimSuffix(output, "\000") + + lines := strings.Split(output, "\000") + if len(lines) < 2 { + log.Ctx(ctx).Error().Str("output", output).Msg("Unexpected merge-tree output") + return false, "", nil, errors.Internal(nil, + "Failed to find conflicts between %s and %s: Unexpected git output", base, head) + } + + status, err := strconv.Atoi(lines[0]) + if err != nil { + log.Ctx(ctx).Err(err).Str("output", output).Msg("Unexpected merge status") + return false, "", nil, errors.Internal(nil, + "Failed to find conflicts between %s and %s: Unexpected merge status", base, head) + } + + if status < 0 { + return false, "", nil, errors.Internal(nil, + "Failed to find conflicts between %s and %s: Operation blocked. Status=%d", base, head, status) + } + + if status == 1 { + return true, lines[1], nil, nil // all good, merge possible, no conflicts found + } + + return false, lines[1], lines[2:], nil // conflict found, list of conflicted files returned +} + +// CommitCount returns number of commits between the two git revisions. +func CommitCount( + ctx context.Context, + repoPath string, + start, end string, +) (int, error) { + cmd := command.New("rev-list", command.WithFlag("--count"), command.WithArg(start+".."+end)) + + stdout := bytes.NewBuffer(nil) + + if err := cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(stdout)); err != nil { + return 0, errors.Internal(err, "failed to rev-list in shared repo") + } + + commitCount, err := strconv.Atoi(strings.TrimSpace(stdout.String())) + if err != nil { + return 0, errors.Internal(err, "failed to parse commit count from rev-list output in shared repo") + } + + return commitCount, nil +} diff --git a/git/merge/merge.go b/git/merge/merge.go new file mode 100644 index 000000000..a94978436 --- /dev/null +++ b/git/merge/merge.go @@ -0,0 +1,209 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package merge + +import ( + "context" + "fmt" + + "github.com/harness/gitness/git/adapter" + "github.com/harness/gitness/git/sharedrepo" + "github.com/harness/gitness/git/types" +) + +// Func represents a merge method function. The concrete merge implementation functions must have this signature. +type Func func( + ctx context.Context, + repoPath, tmpDir string, + author, committer *types.Signature, + message string, + mergeBaseSHA, targetSHA, sourceSHA string, +) (mergeSHA string, conflicts []string, err error) + +// Merge merges two the commits (targetSHA and sourceSHA) using the Merge method. +func Merge( + ctx context.Context, + repoPath, tmpDir string, + author, committer *types.Signature, + message string, + mergeBaseSHA, targetSHA, sourceSHA string, +) (mergeSHA string, conflicts []string, err error) { + return mergeInternal(ctx, + repoPath, tmpDir, + author, committer, + message, + mergeBaseSHA, targetSHA, sourceSHA, + false) +} + +// Squash merges two the commits (targetSHA and sourceSHA) using the Squash method. +func Squash( + ctx context.Context, + repoPath, tmpDir string, + author, committer *types.Signature, + message string, + mergeBaseSHA, targetSHA, sourceSHA string, +) (mergeSHA string, conflicts []string, err error) { + return mergeInternal(ctx, + repoPath, tmpDir, + author, committer, + message, + mergeBaseSHA, targetSHA, sourceSHA, + true) +} + +// mergeInternal is internal implementation of merge used for Merge and Squash methods. +func mergeInternal( + ctx context.Context, + repoPath, tmpDir string, + author, committer *types.Signature, + message string, + mergeBaseSHA, targetSHA, sourceSHA string, + squash bool, +) (mergeSHA string, conflicts []string, err error) { + err = runInSharedRepo(ctx, tmpDir, repoPath, func(s *sharedrepo.SharedRepo) error { + var err error + + var treeSHA string + + treeSHA, conflicts, err = s.MergeTree(ctx, mergeBaseSHA, targetSHA, sourceSHA) + if err != nil { + return fmt.Errorf("merge tree failed: %w", err) + } + + if len(conflicts) > 0 { + return nil + } + + parents := make([]string, 0, 2) + parents = append(parents, targetSHA) + if !squash { + parents = append(parents, sourceSHA) + } + + mergeSHA, err = s.CommitTree(ctx, author, committer, treeSHA, message, false, parents...) + if err != nil { + return fmt.Errorf("commit tree failed: %w", err) + } + + return nil + }) + if err != nil { + return "", nil, fmt.Errorf("merge method=merge squash=%t: %w", squash, err) + } + + return mergeSHA, conflicts, nil +} + +// Rebase merges two the commits (targetSHA and sourceSHA) using the Rebase method. +func Rebase( + ctx context.Context, + repoPath, tmpDir string, + _, committer *types.Signature, // commit author isn't used here - it's copied from every commit + _ string, // commit message isn't used here + mergeBaseSHA, targetSHA, sourceSHA string, +) (mergeSHA string, conflicts []string, err error) { + err = runInSharedRepo(ctx, tmpDir, repoPath, func(s *sharedrepo.SharedRepo) error { + sourceSHAs, err := s.CommitSHAList(ctx, mergeBaseSHA, sourceSHA) + if err != nil { + return fmt.Errorf("failed to find commit list in rebase merge: %w", err) + } + + lastCommitSHA := targetSHA + + for i := len(sourceSHAs) - 1; i >= 0; i-- { + commitSHA := sourceSHAs[i] + + var treeSHA string + var commitConflicts []string + + commitInfo, err := adapter.GetCommit(ctx, s.Directory(), commitSHA, "") + if err != nil { + return fmt.Errorf("failed to get commit data in rebase merge: %w", err) + } + + // rebase merge preserves the commit author (and date) and the commit message, but changes the committer. + author := &commitInfo.Author + message := commitInfo.Title + if commitInfo.Message != "" { + message += "\n\n" + commitInfo.Message + } + + treeSHA, commitConflicts, err = s.MergeTree(ctx, mergeBaseSHA, lastCommitSHA, commitSHA) + if err != nil { + return fmt.Errorf("failed to merge tree in rebase merge: %w", err) + } + + if len(commitConflicts) > 0 { + _, _, conflicts, err = FindConflicts(ctx, s.Directory(), targetSHA, sourceSHA) + if err != nil { + return fmt.Errorf("failed to find conflicts in rebase merge: %w", err) + } + + if len(conflicts) == 0 { + return fmt.Errorf("expected to find conflicts after rebase merge between %s and %s, but couldn't", + mergeBaseSHA, sourceSHA) + } + + return nil + } + + lastCommitSHA, err = s.CommitTree(ctx, author, committer, treeSHA, message, false, lastCommitSHA) + if err != nil { + return fmt.Errorf("failed to commit tree in rebase merge: %w", err) + } + } + + mergeSHA = lastCommitSHA + + return nil + }) + if err != nil { + return "", nil, fmt.Errorf("merge method=rebase: %w", err) + } + + return mergeSHA, conflicts, nil +} + +// runInSharedRepo is helper function used to run the provided function inside a shared repository. +func runInSharedRepo( + ctx context.Context, + tmpDir, repoPath string, + fn func(s *sharedrepo.SharedRepo) error, +) error { + s, err := sharedrepo.NewSharedRepo(tmpDir, repoPath) + if err != nil { + return err + } + + defer s.Close(ctx) + + err = s.InitAsBare(ctx) + if err != nil { + return err + } + + err = fn(s) + if err != nil { + return err + } + + err = s.MoveObjects(ctx) + if err != nil { + return err + } + + return nil +} diff --git a/git/sharedrepo/sharedrepo.go b/git/sharedrepo/sharedrepo.go new file mode 100644 index 000000000..77236d233 --- /dev/null +++ b/git/sharedrepo/sharedrepo.go @@ -0,0 +1,527 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sharedrepo + +import ( + "bufio" + "bytes" + "context" + "crypto/rand" + "encoding/base32" + "fmt" + "io" + "io/fs" + "os" + "path" + "path/filepath" + "regexp" + "sort" + "strings" + + "github.com/harness/gitness/errors" + "github.com/harness/gitness/git/command" + "github.com/harness/gitness/git/tempdir" + "github.com/harness/gitness/git/types" + + "github.com/rs/zerolog/log" +) + +type SharedRepo struct { + temporaryPath string + repositoryPath string +} + +// NewSharedRepo creates a new temporary bare repository. +func NewSharedRepo( + baseTmpDir string, + repositoryPath string, +) (*SharedRepo, error) { + var buf [5]byte + _, _ = rand.Read(buf[:]) + id := base32.StdEncoding.EncodeToString(buf[:]) + + temporaryPath, err := tempdir.CreateTemporaryPath(baseTmpDir, id) + if err != nil { + return nil, fmt.Errorf("failed to create shared repository directory: %w", err) + } + + t := &SharedRepo{ + temporaryPath: temporaryPath, + repositoryPath: repositoryPath, + } + + return t, nil +} + +func (r *SharedRepo) Close(ctx context.Context) { + if err := tempdir.RemoveTemporaryPath(r.temporaryPath); err != nil { + log.Ctx(ctx).Err(err). + Str("path", r.temporaryPath). + Msg("Failed to remove temporary shared directory") + } +} + +func (r *SharedRepo) InitAsBare(ctx context.Context) error { + cmd := command.New("init", command.WithFlag("--bare")) + + if err := cmd.Run(ctx, command.WithDir(r.temporaryPath)); err != nil { + return fmt.Errorf("failed to initialize bare git repository directory: %w", err) + } + + if err := func() error { + alternates := filepath.Join(r.temporaryPath, "objects", "info", "alternates") + f, err := os.OpenFile(alternates, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o600) + if err != nil { + return fmt.Errorf("failed to create alternates file: %w", err) + } + + defer func() { _ = f.Close() }() + + data := filepath.Join(r.repositoryPath, "objects") + if _, err = fmt.Fprintln(f, data); err != nil { + return fmt.Errorf("failed to write alternates file: %w", err) + } + + return nil + }(); err != nil { + return fmt.Errorf("failed to make the alternates file in shared repository: %w", err) + } + + return nil +} + +func (r *SharedRepo) Directory() string { + return r.temporaryPath +} + +// SetDefaultIndex sets the git index to our HEAD. +func (r *SharedRepo) SetDefaultIndex(ctx context.Context) error { + cmd := command.New("read-tree", command.WithArg("HEAD")) + + if err := cmd.Run(ctx, command.WithDir(r.temporaryPath)); err != nil { + return fmt.Errorf("failed to initialize shared repository index to HEAD: %w", err) + } + + return nil +} + +// SetIndex sets the git index to the provided treeish. +func (r *SharedRepo) SetIndex(ctx context.Context, treeish string) error { + cmd := command.New("read-tree", command.WithArg(treeish)) + + if err := cmd.Run(ctx, command.WithDir(r.temporaryPath)); err != nil { + return fmt.Errorf("failed to initialize shared repository index to %q: %w", treeish, err) + } + + return nil +} + +// ClearIndex clears the git index. +func (r *SharedRepo) ClearIndex(ctx context.Context) error { + cmd := command.New("read-tree", command.WithFlag("--empty")) + + if err := cmd.Run(ctx, command.WithDir(r.temporaryPath)); err != nil { + return fmt.Errorf("failed to clear shared repository index: %w", err) + } + + return nil +} + +// LsFiles checks if the given filename arguments are in the index. +func (r *SharedRepo) LsFiles( + ctx context.Context, + filenames ...string, +) ([]string, error) { + cmd := command.New("ls-files", command.WithFlag("-z"), command.WithPostSepArg(filenames...)) + + stdout := bytes.NewBuffer(nil) + + err := cmd.Run(ctx, command.WithDir(r.temporaryPath), command.WithStdout(stdout)) + if err != nil { + return nil, fmt.Errorf("failed to list files in shared repository's git index: %w", err) + } + + files := make([]string, 0) + for _, line := range bytes.Split(stdout.Bytes(), []byte{'\000'}) { + files = append(files, string(line)) + } + + return files, nil +} + +// RemoveFilesFromIndex removes the given files from the index. +func (r *SharedRepo) RemoveFilesFromIndex( + ctx context.Context, + filenames ...string, +) error { + cmd := command.New("update-index", + command.WithFlag("--remove"), + command.WithFlag("-z"), + command.WithFlag("--index-info")) + + stdin := bytes.NewBuffer(nil) + for _, file := range filenames { + if file != "" { + stdin.WriteString("0 0000000000000000000000000000000000000000\t") + stdin.WriteString(file) + stdin.WriteByte('\000') + } + } + + if err := cmd.Run(ctx, command.WithDir(r.temporaryPath), command.WithStdin(stdin)); err != nil { + return fmt.Errorf("failed to update-index in shared repo: %w", err) + } + + return nil +} + +// WriteGitObject writes the provided content to the object db and returns its hash. +func (r *SharedRepo) WriteGitObject( + ctx context.Context, + content io.Reader, +) (string, error) { + cmd := command.New("hash-object", + command.WithFlag("-w"), + command.WithFlag("--stdin")) + + stdout := bytes.NewBuffer(nil) + + err := cmd.Run(ctx, + command.WithDir(r.temporaryPath), + command.WithStdin(content), + command.WithStdout(stdout)) + if err != nil { + return "", fmt.Errorf("failed to hash-object in shared repo: %w", err) + } + + return strings.TrimSpace(stdout.String()), nil +} + +// ShowFile dumps show file and write to io.Writer. +func (r *SharedRepo) ShowFile( + ctx context.Context, + filePath string, + rev string, + writer io.Writer, +) error { + file := strings.TrimSpace(rev) + ":" + strings.TrimSpace(filePath) + + cmd := command.New("show", command.WithArg(file)) + + if err := cmd.Run(ctx, command.WithDir(r.temporaryPath), command.WithStdout(writer)); err != nil { + return fmt.Errorf("failed to show file in shared repo: %w", err) + } + + return nil +} + +// AddObjectToIndex adds the provided object hash to the index with the provided mode and path. +func (r *SharedRepo) AddObjectToIndex( + ctx context.Context, + mode string, + objectHash string, + objectPath string, +) error { + cmd := command.New("update-index", + command.WithFlag("--add"), + command.WithFlag("--replace"), + command.WithFlag("--cacheinfo", mode, objectHash, objectPath)) + + if err := cmd.Run(ctx, command.WithDir(r.temporaryPath)); err != nil { + if matched, _ := regexp.MatchString(".*Invalid path '.*", err.Error()); matched { + return errors.InvalidArgument("invalid path '%s'", objectPath) + } + return fmt.Errorf("failed to add object to index in shared repo (path=%s): %w", objectPath, err) + } + + return nil +} + +// WriteTree writes the current index as a tree to the object db and returns its hash. +func (r *SharedRepo) WriteTree(ctx context.Context) (string, error) { + cmd := command.New("write-tree") + + stdout := bytes.NewBuffer(nil) + + if err := cmd.Run(ctx, command.WithDir(r.temporaryPath), command.WithStdout(stdout)); err != nil { + return "", fmt.Errorf("failed to write-tree in shared repo: %w", err) + } + + return strings.TrimSpace(stdout.String()), nil +} + +// MergeTree merges commits in git index. +func (r *SharedRepo) MergeTree( + ctx context.Context, + commitMergeBase, commitTarget, commitSource string, +) (string, []string, error) { + cmd := command.New("merge-tree", + command.WithFlag("--write-tree"), + command.WithFlag("--name-only"), + command.WithFlag("--no-messages"), + command.WithArg(commitTarget), + command.WithArg(commitSource)) + + if commitMergeBase != "" { + cmd.Add(command.WithFlag("--merge-base=" + commitMergeBase)) + } + + stdout := bytes.NewBuffer(nil) + + err := cmd.Run(ctx, + command.WithDir(r.temporaryPath), + command.WithStdout(stdout)) + + // no error: the output is just the tree object SHA + if err == nil { + return strings.TrimSpace(stdout.String()), nil, nil + } + + // exit code=1: the output is the tree object SHA, and list of files in conflict. + if cErr := command.AsError(err); cErr != nil && cErr.ExitCode() == 1 { + output := strings.TrimSpace(stdout.String()) + lines := strings.Split(output, "\n") + if len(lines) < 2 { + log.Ctx(ctx).Err(err).Str("output", output).Msg("unexpected output of merge-tree in shared repo") + return "", nil, fmt.Errorf("unexpected output of merge-tree in shared repo: %w", err) + } + return lines[0], lines[1:], nil + } + + return "", nil, fmt.Errorf("failed to merge-tree in shared repo: %w", err) +} + +// CommitTree creates a commit from a given tree for the user with provided message. +func (r *SharedRepo) CommitTree( + ctx context.Context, + author, committer *types.Signature, + treeHash, message string, + signoff bool, + parentCommits ...string, +) (string, error) { + cmd := command.New("commit-tree", + command.WithArg(treeHash), + command.WithAuthorAndDate( + author.Identity.Name, + author.Identity.Email, + author.When, + ), + command.WithCommitterAndDate( + committer.Identity.Name, + committer.Identity.Email, + committer.When, + ), + ) + + for _, parentCommit := range parentCommits { + cmd.Add(command.WithFlag("-p", parentCommit)) + } + + // temporary no signing + cmd.Add(command.WithFlag("--no-gpg-sign")) + + messageBytes := new(bytes.Buffer) + _, _ = messageBytes.WriteString(message) + _, _ = messageBytes.WriteString("\n") + + if signoff { + // Signed-off-by + _, _ = messageBytes.WriteString("\n") + _, _ = messageBytes.WriteString("Signed-off-by: ") + _, _ = messageBytes.WriteString(fmt.Sprintf("%s <%s>", committer.Identity.Name, committer.Identity.Email)) + } + + stdout := bytes.NewBuffer(nil) + + err := cmd.Run(ctx, + command.WithDir(r.temporaryPath), + command.WithStdout(stdout), + command.WithStdin(messageBytes)) + if err != nil { + return "", fmt.Errorf("failed to commit-tree in shared repo: %w", err) + } + + return strings.TrimSpace(stdout.String()), nil +} + +// CommitSHAList returns list of SHAs of the commits between the two git revisions. +func (r *SharedRepo) CommitSHAList( + ctx context.Context, + start, end string, +) ([]string, error) { + cmd := command.New("rev-list", command.WithArg(start+".."+end)) + + stdout := bytes.NewBuffer(nil) + + if err := cmd.Run(ctx, command.WithDir(r.temporaryPath), command.WithStdout(stdout)); err != nil { + return nil, fmt.Errorf("failed to rev-list in shared repo: %w", err) + } + + var commitSHAs []string + + scan := bufio.NewScanner(stdout) + for scan.Scan() { + commitSHA := scan.Text() + commitSHAs = append(commitSHAs, commitSHA) + } + if err := scan.Err(); err != nil { + return nil, fmt.Errorf("failed to scan rev-list output in shared repo: %w", err) + } + + return commitSHAs, nil +} + +// MergeBase returns number of commits between the two git revisions. +func (r *SharedRepo) MergeBase( + ctx context.Context, + rev1, rev2 string, +) (string, error) { + cmd := command.New("merge-base", command.WithArg(rev1), command.WithArg(rev2)) + + stdout := bytes.NewBuffer(nil) + + if err := cmd.Run(ctx, command.WithDir(r.temporaryPath), command.WithStdout(stdout)); err != nil { + return "", fmt.Errorf("failed to merge-base in shared repo: %w", err) + } + + return strings.TrimSpace(stdout.String()), nil +} + +// MoveObjects moves git object from the shared repository to the original repository. +func (r *SharedRepo) MoveObjects(ctx context.Context) error { + srcDir := path.Join(r.temporaryPath, "objects") + dstDir := path.Join(r.repositoryPath, "objects") + + var files []fileEntry + + err := filepath.WalkDir(srcDir, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + if d.IsDir() { + return nil + } + + relPath, err := filepath.Rel(srcDir, path) + if err != nil { + return fmt.Errorf("failed to get relative path: %w", err) + } + + // avoid coping anything in the info/ + if strings.HasPrefix(relPath, "info/") { + return nil + } + + fileName := filepath.Base(relPath) + + files = append(files, fileEntry{ + fileName: fileName, + fullPath: path, + relPath: relPath, + priority: filePriority(fileName), + }) + + return nil + }) + if err != nil { + return fmt.Errorf("failed to list files of shared repository directory: %w", err) + } + + sort.Slice(files, func(i, j int) bool { + return files[i].priority < files[j].priority // 0 is top priority, 5 is lowest priority + }) + + for _, f := range files { + dstPath := filepath.Join(dstDir, f.relPath) + + err = os.MkdirAll(filepath.Dir(dstPath), os.ModePerm) + if err != nil { + return fmt.Errorf("failed to create directory for git object: %w", err) + } + + // Try to move the file + + errRename := os.Rename(f.fullPath, dstPath) + if errRename == nil { + log.Ctx(ctx).Debug(). + Str("object", f.relPath). + Msg("moved git object") + continue + } + + // Try to copy the file + + copyError := func() error { + srcFile, err := os.Open(f.fullPath) + if err != nil { + return fmt.Errorf("failed to open source file: %w", err) + } + defer func() { _ = srcFile.Close() }() + + dstFile, err := os.Create(dstPath) + if err != nil { + return fmt.Errorf("failed to create target file: %w", err) + } + defer func() { _ = dstFile.Close() }() + + _, err = io.Copy(dstFile, srcFile) + if err != nil { + return fmt.Errorf("failed to copy file content: %w", err) + } + + return nil + }() + if copyError != nil { + log.Ctx(ctx).Err(copyError). + Str("object", f.relPath). + Str("renameErr", errRename.Error()). + Msg("failed to move or copy git object") + return fmt.Errorf("failed to move or copy git object: %w", copyError) + } + + log.Ctx(ctx).Warn(). + Str("object", f.relPath). + Str("renameErr", errRename.Error()). + Msg("copied git object") + } + + return nil +} + +// filePriority is based on https://github.com/git/git/blob/master/tmp-objdir.c#L168 +func filePriority(name string) int { + switch { + case !strings.HasPrefix(name, "pack"): + return 0 + case strings.HasSuffix(name, ".keep"): + return 1 + case strings.HasSuffix(name, ".pack"): + return 2 + case strings.HasSuffix(name, ".rev"): + return 3 + case strings.HasSuffix(name, ".idx"): + return 4 + default: + return 5 + } +} + +type fileEntry struct { + fileName string + fullPath string + relPath string + priority int +}