drone/git/adapter/commit.go

650 lines
17 KiB
Go

// Copyright 2023 Harness, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package adapter
import (
"bytes"
"context"
"fmt"
"strconv"
"strings"
"time"
"github.com/harness/gitness/errors"
"github.com/harness/gitness/git/command"
"github.com/harness/gitness/git/enum"
"github.com/harness/gitness/git/types"
gitea "code.gitea.io/gitea/modules/git"
"github.com/rs/zerolog/log"
)
// GetLatestCommit gets the latest commit of a path relative from the provided revision.
func (a Adapter) GetLatestCommit(
ctx context.Context,
repoPath string,
rev string,
treePath string,
) (*types.Commit, error) {
if repoPath == "" {
return nil, ErrRepositoryPathEmpty
}
treePath = cleanTreePath(treePath)
return GetCommit(ctx, repoPath, rev, treePath)
}
func getGiteaCommits(
giteaRepo *gitea.Repository,
commitIDs []string,
) ([]*gitea.Commit, error) {
var giteaCommits []*gitea.Commit
if len(commitIDs) == 0 {
return giteaCommits, nil
}
for _, commitID := range commitIDs {
commit, err := giteaRepo.GetCommit(commitID)
if err != nil {
return nil, fmt.Errorf("failed to get commit '%s': %w", commitID, err)
}
giteaCommits = append(giteaCommits, commit)
}
return giteaCommits, nil
}
func (a Adapter) listCommitSHAs(
ctx context.Context,
repoPath string,
ref string,
page int,
limit int,
filter types.CommitFilter,
) ([]string, error) {
cmd := command.New("rev-list")
// return commits only up to a certain reference if requested
if filter.AfterRef != "" {
// ^REF tells the rev-list command to return only commits that aren't reachable by SHA
cmd.Add(command.WithArg(fmt.Sprintf("^%s", filter.AfterRef)))
}
// add refCommitSHA as starting point
cmd.Add(command.WithArg(ref))
if len(filter.Path) != 0 {
cmd.Add(command.WithPostSepArg(filter.Path))
}
// add pagination if requested
// TODO: we should add absolut limits to protect git (return error)
if limit > 0 {
cmd.Add(command.WithFlag("--max-count", strconv.Itoa(limit)))
if page > 1 {
cmd.Add(command.WithFlag("--skip", strconv.Itoa((page-1)*limit)))
}
}
if filter.Since > 0 || filter.Until > 0 {
cmd.Add(command.WithFlag("--date", "unix"))
}
if filter.Since > 0 {
cmd.Add(command.WithFlag("--since", strconv.FormatInt(filter.Since, 10)))
}
if filter.Until > 0 {
cmd.Add(command.WithFlag("--until", strconv.FormatInt(filter.Until, 10)))
}
if filter.Committer != "" {
cmd.Add(command.WithFlag("--committer", filter.Committer))
}
output := &bytes.Buffer{}
err := cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(output))
if err != nil {
// TODO: handle error in case they don't have a common merge base!
return nil, processGiteaErrorf(err, "failed to trigger rev-list command")
}
return parseLinesToSlice(output.Bytes()), nil
}
// ListCommitSHAs lists the commits reachable from ref.
// Note: ref & afterRef can be Branch / Tag / CommitSHA.
// Note: commits returned are [ref->...->afterRef).
func (a Adapter) ListCommitSHAs(
ctx context.Context,
repoPath string,
ref string,
page int,
limit int,
filter types.CommitFilter,
) ([]string, error) {
return a.listCommitSHAs(ctx, repoPath, ref, page, limit, filter)
}
// ListCommits lists the commits reachable from ref.
// Note: ref & afterRef can be Branch / Tag / CommitSHA.
// Note: commits returned are [ref->...->afterRef).
func (a Adapter) ListCommits(
ctx context.Context,
repoPath string,
ref string,
page int,
limit int,
includeFileStats bool,
filter types.CommitFilter,
) ([]types.Commit, []types.PathRenameDetails, error) {
if repoPath == "" {
return nil, nil, ErrRepositoryPathEmpty
}
giteaRepo, err := gitea.OpenRepository(ctx, repoPath)
if err != nil {
return nil, nil, processGiteaErrorf(err, "failed to open repository")
}
defer giteaRepo.Close()
commitSHAs, err := a.listCommitSHAs(ctx, repoPath, ref, page, limit, filter)
if err != nil {
return nil, nil, err
}
giteaCommits, err := getGiteaCommits(giteaRepo, commitSHAs)
if err != nil {
return nil, nil, err
}
commits := make([]types.Commit, len(giteaCommits))
for i := range giteaCommits {
var commit *types.Commit
commit, err = mapGiteaCommit(giteaCommits[i])
if err != nil {
return nil, nil, err
}
commits[i] = *commit
if includeFileStats {
err = includeFileStatsInCommits(ctx, giteaRepo, commits)
if err != nil {
return nil, nil, err
}
}
}
if len(filter.Path) != 0 {
renameDetailsList, err := getRenameDetails(ctx, giteaRepo, commits, filter.Path)
if err != nil {
return nil, nil, err
}
cleanedUpCommits := cleanupCommitsForRename(commits, renameDetailsList, filter.Path)
return cleanedUpCommits, renameDetailsList, nil
}
return commits, nil, nil
}
func includeFileStatsInCommits(
ctx context.Context,
giteaRepo *gitea.Repository,
commits []types.Commit,
) error {
for i, commit := range commits {
fileStats, err := getFileStats(ctx, giteaRepo, commit.SHA)
if err != nil {
return fmt.Errorf("failed to get file stat: %w", err)
}
commits[i].FileStats = fileStats
}
return nil
}
func getFileStats(
ctx context.Context,
giteaRepo *gitea.Repository,
sha string,
) (types.CommitFileStats, error) {
changeInfos, err := getChangeInfos(ctx, giteaRepo, sha)
if err != nil {
return types.CommitFileStats{}, fmt.Errorf("failed to get change infos: %w", err)
}
fileStats := types.CommitFileStats{
Added: make([]string, 0),
Removed: make([]string, 0),
Modified: make([]string, 0),
}
for _, c := range changeInfos {
switch {
case c.ChangeType == enum.FileDiffStatusModified || c.ChangeType == enum.FileDiffStatusRenamed:
fileStats.Modified = append(fileStats.Modified, c.Path)
case c.ChangeType == enum.FileDiffStatusDeleted:
fileStats.Removed = append(fileStats.Removed, c.Path)
case c.ChangeType == enum.FileDiffStatusAdded || c.ChangeType == enum.FileDiffStatusCopied:
fileStats.Added = append(fileStats.Added, c.Path)
case c.ChangeType == enum.FileDiffStatusUndefined:
default:
log.Ctx(ctx).Warn().Msgf("unknown change type %q for path %q",
c.ChangeType, c.Path)
}
}
return fileStats, nil
}
// In case of rename of a file, same commit will be listed twice - Once in old file and second time in new file.
// Hence, we are making it a pattern to only list it as part of new file and not as part of old file.
func cleanupCommitsForRename(
commits []types.Commit,
renameDetails []types.PathRenameDetails,
path string,
) []types.Commit {
if len(commits) == 0 {
return commits
}
for _, renameDetail := range renameDetails {
// Since rename details is present it implies that we have commits and hence don't need null check.
if commits[0].SHA == renameDetail.CommitSHABefore && path == renameDetail.OldPath {
return commits[1:]
}
}
return commits
}
func getRenameDetails(
ctx context.Context,
giteaRepo *gitea.Repository,
commits []types.Commit,
path string,
) ([]types.PathRenameDetails, error) {
if len(commits) == 0 {
return []types.PathRenameDetails{}, nil
}
renameDetailsList := make([]types.PathRenameDetails, 0, 2)
renameDetails, err := giteaGetRenameDetails(ctx, giteaRepo, commits[0].SHA, path)
if err != nil {
return nil, err
}
if renameDetails.NewPath != "" || renameDetails.OldPath != "" {
renameDetails.CommitSHABefore = commits[0].SHA
renameDetailsList = append(renameDetailsList, *renameDetails)
}
if len(commits) == 1 {
return renameDetailsList, nil
}
renameDetailsLast, err := giteaGetRenameDetails(ctx, giteaRepo, commits[len(commits)-1].SHA, path)
if err != nil {
return nil, err
}
if renameDetailsLast.NewPath != "" || renameDetailsLast.OldPath != "" {
renameDetailsLast.CommitSHAAfter = commits[len(commits)-1].SHA
renameDetailsList = append(renameDetailsList, *renameDetailsLast)
}
return renameDetailsList, nil
}
func giteaGetRenameDetails(
ctx context.Context,
giteaRepo *gitea.Repository,
ref string,
path string,
) (*types.PathRenameDetails, error) {
changeInfos, err := getChangeInfos(ctx, giteaRepo, ref)
if err != nil {
return &types.PathRenameDetails{}, fmt.Errorf("failed to get change infos %w", err)
}
for _, c := range changeInfos {
if c.ChangeType == enum.FileDiffStatusRenamed && (c.Path == path || c.NewPath == path) {
return &types.PathRenameDetails{
OldPath: c.Path,
NewPath: c.NewPath,
}, nil
}
}
return &types.PathRenameDetails{}, nil
}
func getChangeInfos(
ctx context.Context,
giteaRepo *gitea.Repository,
ref string,
) ([]changeInfo, error) {
cmd := command.New("log",
command.WithArg(ref),
command.WithFlag("--name-status"),
command.WithFlag("--pretty=format:", "-1"),
)
output := &bytes.Buffer{}
err := cmd.Run(giteaRepo.Ctx, command.WithDir(giteaRepo.Path), command.WithStdout(output))
if err != nil {
return nil, fmt.Errorf("failed to trigger log command: %w", err)
}
lines := parseLinesToSlice(output.Bytes())
changeInfos, err := getFileChangeTypeFromLog(ctx, lines)
if err != nil {
return nil, err
}
return changeInfos, nil
}
type changeInfo struct {
ChangeType enum.FileDiffStatus
Path string
// populated only in case of renames
NewPath string
}
func getFileChangeTypeFromLog(
ctx context.Context,
changeStrings []string,
) ([]changeInfo, error) {
changeInfos := make([]changeInfo, len(changeStrings))
for i, changeString := range changeStrings {
changeStringSplit := strings.Split(changeString, "\t")
if len(changeStringSplit) < 1 {
return changeInfos, fmt.Errorf("could not parse changeString %q", changeString)
}
c := changeInfo{}
c.ChangeType = convertChangeType(ctx, changeStringSplit[0])
c.Path = changeStringSplit[1]
if len(changeStringSplit) == 3 {
c.NewPath = changeStringSplit[2]
}
changeInfos[i] = c
}
return changeInfos, nil
}
func convertChangeType(ctx context.Context, c string) enum.FileDiffStatus {
switch {
case strings.HasPrefix(c, "A"):
return enum.FileDiffStatusAdded
case strings.HasPrefix(c, "C"):
return enum.FileDiffStatusCopied
case strings.HasPrefix(c, "D"):
return enum.FileDiffStatusDeleted
case strings.HasPrefix(c, "M"):
return enum.FileDiffStatusModified
case strings.HasPrefix(c, "R"):
return enum.FileDiffStatusRenamed
default:
log.Ctx(ctx).Warn().Msgf("encountered unknown change type %s", c)
return enum.FileDiffStatusUndefined
}
}
// GetCommit returns the (latest) commit for a specific revision.
func (a Adapter) GetCommit(
ctx context.Context,
repoPath string,
rev string,
) (*types.Commit, error) {
if repoPath == "" {
return nil, ErrRepositoryPathEmpty
}
return GetCommit(ctx, repoPath, rev, "")
}
func (a Adapter) GetFullCommitID(
ctx context.Context,
repoPath string,
shortID string,
) (string, error) {
if repoPath == "" {
return "", ErrRepositoryPathEmpty
}
cmd := command.New("rev-parse",
command.WithArg(shortID),
)
output := &bytes.Buffer{}
err := cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(output))
if err != nil {
if strings.Contains(err.Error(), "exit status 128") {
return "", errors.NotFound("commit not found %s", shortID)
}
return "", err
}
return strings.TrimSpace(output.String()), nil
}
// GetCommits returns the (latest) commits for a specific list of refs.
// Note: ref can be Branch / Tag / CommitSHA.
func (a Adapter) GetCommits(
ctx context.Context,
repoPath string,
refs []string,
) ([]types.Commit, error) {
if repoPath == "" {
return nil, ErrRepositoryPathEmpty
}
giteaRepo, err := gitea.OpenRepository(ctx, repoPath)
if err != nil {
return nil, processGiteaErrorf(err, "failed to open repository")
}
defer giteaRepo.Close()
commits := make([]types.Commit, len(refs))
for i, sha := range refs {
var giteaCommit *gitea.Commit
giteaCommit, err = giteaRepo.GetCommit(sha)
if err != nil {
return nil, processGiteaErrorf(err, "error getting commit '%s'", sha)
}
var commit *types.Commit
commit, err = mapGiteaCommit(giteaCommit)
if err != nil {
return nil, err
}
commits[i] = *commit
}
return commits, nil
}
// GetCommitDivergences returns the count of the diverging commits for all branch pairs.
// IMPORTANT: If a max is provided it limits the overal count of diverging commits
// (max 10 could lead to (0, 10) while it's actually (2, 12)).
func (a Adapter) GetCommitDivergences(
ctx context.Context,
repoPath string,
requests []types.CommitDivergenceRequest,
max int32,
) ([]types.CommitDivergence, error) {
if repoPath == "" {
return nil, ErrRepositoryPathEmpty
}
var err error
res := make([]types.CommitDivergence, len(requests))
for i, req := range requests {
res[i], err = a.getCommitDivergence(ctx, repoPath, req, max)
if types.IsNotFoundError(err) {
res[i] = types.CommitDivergence{Ahead: -1, Behind: -1}
continue
}
if err != nil {
return nil, err
}
}
return res, nil
}
// getCommitDivergence returns the count of diverging commits for a pair of branches.
// IMPORTANT: If a max is provided it limits the overal count of diverging commits
// (max 10 could lead to (0, 10) while it's actually (2, 12)).
// NOTE: Gitea implementation makes two git cli calls, but it can be done with one
// (downside is the max behavior explained above).
func (a Adapter) getCommitDivergence(
ctx context.Context,
repoPath string,
req types.CommitDivergenceRequest,
max int32,
) (types.CommitDivergence, error) {
// prepare args
args := []string{
"rev-list",
"--count",
"--left-right",
}
// limit count if requested.
if max > 0 {
args = append(args, "--max-count")
args = append(args, fmt.Sprint(max))
}
// add query to get commits without shared base commits
args = append(args, fmt.Sprintf("%s...%s", req.From, req.To))
var err error
cmd := gitea.NewCommand(ctx, args...)
stdOut, stdErr, err := cmd.RunStdString(&gitea.RunOpts{Dir: repoPath})
if err != nil {
return types.CommitDivergence{},
processGiteaErrorf(err, "git rev-list failed for '%s...%s' (stdErr: '%s')", req.From, req.To, stdErr)
}
// parse output, e.g.: `1 2\n`
rawLeft, rawRight, ok := strings.Cut(stdOut, "\t")
if !ok {
return types.CommitDivergence{}, fmt.Errorf("git rev-list returned unexpected output '%s'", stdOut)
}
// trim any unnecessary characters
rawLeft = strings.TrimRight(rawLeft, " \t")
rawRight = strings.TrimRight(rawRight, " \t\n")
// parse numbers
left, err := strconv.ParseInt(rawLeft, 10, 32)
if err != nil {
return types.CommitDivergence{},
fmt.Errorf("failed to parse git rev-list output for ahead '%s' (full: '%s')): %w", rawLeft, stdOut, err)
}
right, err := strconv.ParseInt(rawRight, 10, 32)
if err != nil {
return types.CommitDivergence{},
fmt.Errorf("failed to parse git rev-list output for behind '%s' (full: '%s')): %w", rawRight, stdOut, err)
}
return types.CommitDivergence{
Ahead: int32(left),
Behind: int32(right),
}, nil
}
func parseLinesToSlice(output []byte) []string {
if len(output) == 0 {
return nil
}
lines := bytes.Split(bytes.TrimSpace(output), []byte{'\n'})
slice := make([]string, len(lines))
for i, line := range lines {
slice[i] = string(line)
}
return slice
}
// GetCommit returns info about a commit.
// TODO: Move this function outside of the adapter package.
func GetCommit(
ctx context.Context,
repoPath string,
rev string,
path string,
) (*types.Commit, error) {
const format = "" +
fmtCommitHash + fmtZero + // 0
fmtAuthorName + fmtZero + // 1
fmtAuthorEmail + fmtZero + // 2
fmtAuthorTime + fmtZero + // 3
fmtCommitterName + fmtZero + // 4
fmtCommitterEmail + fmtZero + // 5
fmtCommitterTime + fmtZero + // 6
fmtSubject + fmtZero + // 7
fmtBody // 8
cmd := command.New("log",
command.WithFlag("--max-count", "1"),
command.WithFlag("--format="+format),
command.WithArg(rev),
)
if path != "" {
cmd.Add(command.WithPostSepArg(path))
}
output := &bytes.Buffer{}
err := cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(output))
if err != nil {
if strings.Contains(err.Error(), "ambiguous argument") {
return nil, errors.NotFound("revision %q not found", rev)
}
return nil, fmt.Errorf("failed to run git to get commit data: %w", err)
}
commitLine := output.String()
if commitLine == "" {
return nil, errors.InvalidArgument("path %q not found in %s", path, rev)
}
const columnCount = 9
commitData := strings.Split(strings.TrimSpace(commitLine), separatorZero)
if len(commitData) != columnCount {
return nil, fmt.Errorf(
"unexpected git log formatted output, expected %d, but got %d columns", columnCount, len(commitData))
}
sha := commitData[0]
authorName := commitData[1]
authorEmail := commitData[2]
authorTimestamp := commitData[3]
committerName := commitData[4]
committerEmail := commitData[5]
committerTimestamp := commitData[6]
subject := commitData[7]
body := commitData[8]
authorTime, _ := time.Parse(time.RFC3339Nano, authorTimestamp)
committerTime, _ := time.Parse(time.RFC3339Nano, committerTimestamp)
return &types.Commit{
SHA: sha,
Title: subject,
Message: body,
Author: types.Signature{
Identity: types.Identity{
Name: authorName,
Email: authorEmail,
},
When: authorTime,
},
Committer: types.Signature{
Identity: types.Identity{
Name: committerName,
Email: committerEmail,
},
When: committerTime,
},
}, nil
}