drone/git/api/diff.go

717 lines
18 KiB
Go

// Copyright 2023 Harness, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package api
import (
"bufio"
"bytes"
"context"
"fmt"
"io"
"math"
"regexp"
"strconv"
"strings"
"github.com/harness/gitness/errors"
"github.com/harness/gitness/git/command"
"github.com/harness/gitness/git/parser"
"github.com/harness/gitness/git/sha"
"github.com/harness/gitness/types"
)
type FileDiffRequest struct {
Path string `json:"path"`
StartLine int `json:"start_line"`
EndLine int `json:"-"` // warning: changes are possible and this field may not exist in the future
}
type FileDiffRequests []FileDiffRequest
type DiffShortStat struct {
Files int
Additions int
Deletions int
}
// modifyHeader needs to modify diff hunk header with the new start line
// and end line with calculated span.
// if diff hunk header is -100, 50 +100, 50 and startLine = 120, endLine=140
// then we need to modify header to -120,20 +120,20.
// warning: changes are possible and param endLine may not exist in the future.
func modifyHeader(hunk parser.HunkHeader, startLine, endLine int) []byte {
oldStartLine := hunk.OldLine
newStartLine := hunk.NewLine
oldSpan := hunk.OldSpan
newSpan := hunk.NewSpan
oldEndLine := oldStartLine + oldSpan
newEndLine := newStartLine + newSpan
if startLine > 0 {
if startLine < oldEndLine {
oldStartLine = startLine
}
if startLine < newEndLine {
newStartLine = startLine
}
}
if endLine > 0 {
if endLine < oldEndLine {
oldSpan = endLine - startLine
} else if oldEndLine > startLine {
oldSpan = oldEndLine - startLine
}
if endLine < newEndLine {
newSpan = endLine - startLine
} else if newEndLine > startLine {
newSpan = newEndLine - startLine
}
}
return []byte(fmt.Sprintf("@@ -%d,%d +%d,%d @@",
oldStartLine, oldSpan, newStartLine, newSpan))
}
// cutLinesFromFullFileDiff reads from r and writes to w headers and between
// startLine and endLine. if startLine and endLine is equal to 0 then it uses io.Copy
// warning: changes are possible and param endLine may not exist in the future
//
//nolint:gocognit
func cutLinesFromFullFileDiff(w io.Writer, r io.Reader, startLine, endLine int) error {
if startLine < 0 {
startLine = 0
}
if endLine < 0 {
endLine = 0
}
if startLine == 0 && endLine > 0 {
startLine = 1
}
if endLine < startLine {
endLine = 0
}
// no need for processing lines just copy the data
if startLine == 0 && endLine == 0 {
_, err := io.Copy(w, r)
return err
}
linePos := 0
start := false
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := scanner.Bytes()
if start {
linePos++
}
if endLine > 0 && linePos > endLine {
break
}
if linePos > 0 &&
(startLine > 0 && linePos < startLine) {
continue
}
if len(line) >= 2 && bytes.HasPrefix(line, []byte{'@', '@'}) {
hunk, ok := parser.ParseDiffHunkHeader(string(line)) // TBD: maybe reader?
if !ok {
return fmt.Errorf("failed to extract lines from diff, range [%d,%d] : %w",
startLine, endLine, ErrParseDiffHunkHeader)
}
line = modifyHeader(hunk, startLine, endLine)
start = true
}
if _, err := w.Write(line); err != nil {
return err
}
if _, err := w.Write([]byte{'\n'}); err != nil {
return err
}
}
return scanner.Err()
}
//nolint:gocognit
func (g *Git) RawDiff(
ctx context.Context,
w io.Writer,
repoPath string,
baseRef string,
headRef string,
mergeBase bool,
alternates []string,
files ...FileDiffRequest,
) error {
if repoPath == "" {
return ErrRepositoryPathEmpty
}
baseTag, err := g.GetAnnotatedTag(ctx, repoPath, baseRef)
if err == nil {
baseRef = baseTag.TargetSha.String()
}
headTag, err := g.GetAnnotatedTag(ctx, repoPath, headRef)
if err == nil {
headRef = headTag.TargetSha.String()
}
cmd := command.New("diff",
command.WithFlag("-M"),
command.WithFlag("--full-index"),
command.WithAlternateObjectDirs(alternates...),
)
if mergeBase {
cmd.Add(command.WithFlag("--merge-base"))
}
perFileDiffRequired := false
paths := make([]string, 0, len(files))
if len(files) > 0 {
for _, file := range files {
paths = append(paths, file.Path)
if file.StartLine > 0 || file.EndLine > 0 {
perFileDiffRequired = true
}
}
}
processed := 0
again:
startLine := 0
endLine := 0
newCmd := cmd.Clone()
if len(files) > 0 {
startLine = files[processed].StartLine
endLine = files[processed].EndLine
}
if perFileDiffRequired {
if startLine > 0 || endLine > 0 {
newCmd.Add(command.WithFlag("-U" + strconv.Itoa(math.MaxInt32)))
}
paths = []string{files[processed].Path}
}
newCmd.Add(command.WithArg(baseRef, headRef))
if len(paths) > 0 {
newCmd.Add(command.WithPostSepArg(paths...))
}
pipeRead, pipeWrite := io.Pipe()
go func() {
var err error
defer func() {
// If running of the command below fails, make the pipe reader also fail with the same error.
_ = pipeWrite.CloseWithError(err)
}()
err = newCmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(pipeWrite))
if err != nil {
err = processGitErrorf(err, "git diff failed between %q and %q", baseRef, headRef)
if cErr := command.AsError(err); cErr != nil {
if cErr.IsExitCode(128) && cErr.IsBadObject() {
err = errors.NotFound("commit not found")
}
}
}
}()
if err = cutLinesFromFullFileDiff(w, pipeRead, startLine, endLine); err != nil {
return err
}
if perFileDiffRequired {
processed++
if processed < len(files) {
goto again
}
}
return nil
}
// CommitDiff will stream diff for provided ref.
func (g *Git) CommitDiff(
ctx context.Context,
repoPath string,
rev string,
w io.Writer,
) error {
if repoPath == "" {
return ErrRepositoryPathEmpty
}
if rev == "" {
return errors.InvalidArgument("git revision cannot be empty")
}
cmd := command.New("show",
command.WithFlag("--full-index"),
command.WithFlag("--pretty=format:%b"),
command.WithArg(rev),
)
if err := cmd.Run(ctx,
command.WithDir(repoPath),
command.WithStdout(w),
); err != nil {
return processGitErrorf(err, "commit diff error")
}
return nil
}
func (g *Git) DiffShortStat(
ctx context.Context,
repoPath string,
baseRef string,
headRef string,
useMergeBase bool,
) (DiffShortStat, error) {
if repoPath == "" {
return DiffShortStat{}, ErrRepositoryPathEmpty
}
separator := ".."
if useMergeBase {
separator = "..."
}
shortstatArgs := []string{baseRef + separator + headRef}
if len(baseRef) == 0 || baseRef == types.NilSHA {
shortstatArgs = []string{sha.EmptyTree.String(), headRef}
}
stat, err := GetDiffShortStat(ctx, repoPath, shortstatArgs...)
if err != nil {
return DiffShortStat{}, processGitErrorf(err, "failed to get diff short stat between %s and %s",
baseRef, headRef)
}
return stat, nil
}
// GetDiffHunkHeaders for each file in diff output returns file name (old and new to detect renames),
// and all hunk headers. The diffs are generated with unified=0 parameter to create minimum sized hunks.
// Hunks' body is ignored.
// The purpose of this function is to get data based on which code comments could be repositioned.
func (g *Git) GetDiffHunkHeaders(
ctx context.Context,
repoPath string,
targetRef string,
sourceRef string,
) ([]*parser.DiffFileHunkHeaders, error) {
if repoPath == "" {
return nil, ErrRepositoryPathEmpty
}
pipeRead, pipeWrite := io.Pipe()
stderr := &bytes.Buffer{}
go func() {
var err error
defer func() {
// If running of the command below fails, make the pipe reader also fail with the same error.
_ = pipeWrite.CloseWithError(err)
}()
cmd := command.New("diff",
command.WithFlag("--patch"),
command.WithFlag("--full-index"),
command.WithFlag("--no-color"),
command.WithFlag("--unified=0"),
command.WithArg(sourceRef),
command.WithArg(targetRef),
)
err = cmd.Run(ctx,
command.WithDir(repoPath),
command.WithStdout(pipeWrite),
command.WithStderr(stderr), // We capture stderr output in a buffer.
)
}()
fileHunkHeaders, err := parser.GetHunkHeaders(pipeRead)
// First check if there's something in the stderr buffer, if yes that's the error
if errStderr := parseDiffStderr(stderr); errStderr != nil {
return nil, errStderr
}
// Next check if reading the git diff output caused an error
if err != nil {
return nil, err
}
return fileHunkHeaders, nil
}
// DiffCut parses full file git diff output and returns lines specified with the parameters.
// The purpose of this function is to get diff data with which code comments could be generated.
//
//nolint:gocognit
func (g *Git) DiffCut(
ctx context.Context,
repoPath string,
targetRef string,
sourceRef string,
path string,
params parser.DiffCutParams,
) (parser.HunkHeader, parser.Hunk, error) {
if repoPath == "" {
return parser.HunkHeader{}, parser.Hunk{}, ErrRepositoryPathEmpty
}
// first fetch the list of the changed files
pipeRead, pipeWrite := io.Pipe()
go func() {
var err error
defer func() {
// If running of the command below fails, make the pipe reader also fail with the same error.
_ = pipeWrite.CloseWithError(err)
}()
cmd := command.New("diff",
command.WithFlag("--raw"),
command.WithFlag("--merge-base"),
command.WithFlag("-z"),
command.WithFlag("--find-renames"),
command.WithArg(targetRef),
command.WithArg(sourceRef))
err = cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(pipeWrite))
}()
diffEntries, err := parser.DiffRaw(pipeRead)
if err != nil {
return parser.HunkHeader{}, parser.Hunk{}, fmt.Errorf("failed to find the list of changed files: %w", err)
}
var oldSHA, newSHA string
for _, entry := range diffEntries {
if entry.Status == parser.DiffStatusRenamed || entry.Status == parser.DiffStatusCopied {
// Entries with the status 'R' and 'C' output two paths: the old path and the new path.
// Using the params.LineStartNew flag to match the path with the entry's old or new path.
if entry.Path != path && entry.OldPath != path {
continue
}
if params.LineStartNew && path == entry.OldPath {
msg := "for renamed files provide the new file name if commenting the changed lines"
return parser.HunkHeader{}, parser.Hunk{}, errors.InvalidArgument(msg)
}
if !params.LineStartNew && path == entry.Path {
msg := "for renamed files provide the old file name if commenting the old lines"
return parser.HunkHeader{}, parser.Hunk{}, errors.InvalidArgument(msg)
}
} else if entry.Path != path {
// All other statuses output just one path: If the path doesn't match it, proceed with the next entry.
continue
}
rawFileMode := entry.OldFileMode
if params.LineStartNew {
rawFileMode = entry.NewFileMode
}
fileType, _, err := parseTreeNodeMode(rawFileMode)
if err != nil {
return parser.HunkHeader{}, parser.Hunk{},
fmt.Errorf("failed to parse file mode %s for path %s: %w", rawFileMode, path, err)
}
switch fileType {
default:
return parser.HunkHeader{}, parser.Hunk{}, errors.Internal(nil, "unrecognized object type")
case TreeNodeTypeCommit:
msg := "code comment is not allowed on a submodule"
return parser.HunkHeader{}, parser.Hunk{}, errors.InvalidArgument(msg)
case TreeNodeTypeTree:
msg := "code comment is not allowed on a directory"
return parser.HunkHeader{}, parser.Hunk{}, errors.InvalidArgument(msg)
case TreeNodeTypeBlob:
// a blob is what we want
}
var hunkHeader parser.HunkHeader
var hunk parser.Hunk
switch entry.Status {
case parser.DiffStatusRenamed, parser.DiffStatusCopied, parser.DiffStatusModified:
// For modified and renamed compare file blob SHAs directly.
oldSHA = entry.OldBlobSHA
newSHA = entry.NewBlobSHA
hunkHeader, hunk, err = g.diffCutFromHunk(ctx, repoPath, oldSHA, newSHA, params)
case parser.DiffStatusAdded, parser.DiffStatusDeleted, parser.DiffStatusType:
// for added and deleted files read the file content directly
if params.LineStartNew {
hunkHeader, hunk, err = g.diffCutFromBlob(ctx, repoPath, true, entry.NewBlobSHA, params)
} else {
hunkHeader, hunk, err = g.diffCutFromBlob(ctx, repoPath, false, entry.OldBlobSHA, params)
}
default:
return parser.HunkHeader{}, parser.Hunk{},
fmt.Errorf("unrecognized git diff file status=%c for path=%s", entry.Status, path)
}
if err != nil {
return parser.HunkHeader{}, parser.Hunk{},
fmt.Errorf("failed to extract hunk for git diff file status=%c path=%s: %w", entry.Status, path, err)
}
// The returned diff hunk will be stored in the DB and will only be used for display, as a reference.
// Therefore, we trim too long lines to protect the system and the DB.
const maxLineLen = 200
parser.LimitLineLen(&hunk.Lines, maxLineLen)
return hunkHeader, hunk, nil
}
return parser.HunkHeader{}, parser.Hunk{}, errors.NotFound("path not found")
}
func (g *Git) diffCutFromHunk(
ctx context.Context,
repoPath string,
oldSHA string,
newSHA string,
params parser.DiffCutParams,
) (parser.HunkHeader, parser.Hunk, error) {
pipeRead, pipeWrite := io.Pipe()
stderr := bytes.NewBuffer(nil)
go func() {
var err error
defer func() {
// If running of the command below fails, make the pipe reader also fail with the same error.
_ = pipeWrite.CloseWithError(err)
}()
cmd := command.New("diff",
command.WithFlag("--patch"),
command.WithFlag("--full-index"),
command.WithFlag("--no-color"),
command.WithFlag("--unified=100000000"),
command.WithArg(oldSHA),
command.WithArg(newSHA))
err = cmd.Run(ctx,
command.WithDir(repoPath),
command.WithStdout(pipeWrite),
command.WithStderr(stderr))
}()
diffCutHeader, linesHunk, err := parser.DiffCut(pipeRead, params)
if errStderr := parseDiffStderr(stderr); errStderr != nil {
// First check if there's something in the stderr buffer, if yes that's the error
return parser.HunkHeader{}, parser.Hunk{}, errStderr
}
if err != nil {
// Next check if reading the git diff output caused an error
return parser.HunkHeader{}, parser.Hunk{}, err
}
return diffCutHeader, linesHunk, nil
}
func (g *Git) diffCutFromBlob(
ctx context.Context,
repoPath string,
asAdded bool,
sha string,
params parser.DiffCutParams,
) (parser.HunkHeader, parser.Hunk, error) {
pipeRead, pipeWrite := io.Pipe()
go func() {
var err error
defer func() {
// If running of the command below fails, make the pipe reader also fail with the same error.
_ = pipeWrite.CloseWithError(err)
}()
cmd := command.New("cat-file",
command.WithFlag("-p"),
command.WithArg(sha))
err = cmd.Run(ctx,
command.WithDir(repoPath),
command.WithStdout(pipeWrite))
}()
cutHeader, cut, err := parser.BlobCut(pipeRead, params)
if err != nil {
// Next check if reading the git diff output caused an error
return parser.HunkHeader{}, parser.Hunk{}, err
}
// Convert parser.CutHeader to parser.HunkHeader and parser.Cut to parser.Hunk.
var hunkHeader parser.HunkHeader
var hunk parser.Hunk
if asAdded {
for i := range cut.Lines {
cut.Lines[i] = "+" + cut.Lines[i]
}
hunkHeader = parser.HunkHeader{
NewLine: cutHeader.Line,
NewSpan: cutHeader.Span,
}
hunk = parser.Hunk{
HunkHeader: parser.HunkHeader{NewLine: cut.Line, NewSpan: cut.Span},
Lines: cut.Lines,
}
} else {
for i := range cut.Lines {
cut.Lines[i] = "-" + cut.Lines[i]
}
hunkHeader = parser.HunkHeader{
OldLine: cutHeader.Line,
OldSpan: cutHeader.Span,
}
hunk = parser.Hunk{
HunkHeader: parser.HunkHeader{OldLine: cut.Line, OldSpan: cut.Span},
Lines: cut.Lines,
}
}
return hunkHeader, hunk, nil
}
func (g *Git) DiffFileName(ctx context.Context,
repoPath string,
baseRef string,
headRef string,
mergeBase bool,
) ([]string, error) {
cmd := command.New("diff", command.WithFlag("--name-only"))
if mergeBase {
cmd.Add(command.WithFlag("--merge-base"))
}
cmd.Add(command.WithArg(baseRef, headRef))
stdout := &bytes.Buffer{}
err := cmd.Run(ctx,
command.WithDir(repoPath),
command.WithStdout(stdout),
)
if err != nil {
return nil, processGitErrorf(err, "failed to trigger diff command")
}
return parseLinesToSlice(stdout.Bytes()), nil
}
// GetDiffShortStat counts number of changed files, number of additions and deletions.
func GetDiffShortStat(
ctx context.Context,
repoPath string,
args ...string,
) (DiffShortStat, error) {
// Now if we call:
// $ git diff --shortstat 1ebb35b98889ff77299f24d82da426b434b0cca0...788b8b1440462d477f45b0088875
// we get:
// " 9902 files changed, 2034198 insertions(+), 298800 deletions(-)\n"
cmd := command.New("diff",
command.WithFlag("--shortstat"),
command.WithArg(args...),
)
stdout := &bytes.Buffer{}
if err := cmd.Run(ctx,
command.WithDir(repoPath),
command.WithStdout(stdout),
); err != nil {
return DiffShortStat{}, err
}
return parseDiffStat(stdout.String())
}
var shortStatFormat = regexp.MustCompile(
`\s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?`)
func parseDiffStat(stdout string) (stat DiffShortStat, err error) {
if len(stdout) == 0 || stdout == "\n" {
return DiffShortStat{}, nil
}
groups := shortStatFormat.FindStringSubmatch(stdout)
if len(groups) != 4 {
return DiffShortStat{}, fmt.Errorf("unable to parse shortstat: %s groups: %s", stdout, groups)
}
stat.Files, err = strconv.Atoi(groups[1])
if err != nil {
return DiffShortStat{}, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumFiles %w",
stdout, err)
}
if len(groups[2]) != 0 {
stat.Additions, err = strconv.Atoi(groups[2])
if err != nil {
return DiffShortStat{}, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumAdditions %w",
stdout, err)
}
}
if len(groups[3]) != 0 {
stat.Deletions, err = strconv.Atoi(groups[3])
if err != nil {
return DiffShortStat{}, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumDeletions %w",
stdout, err)
}
}
return stat, nil
}
func parseDiffStderr(stderr *bytes.Buffer) error {
errRaw := stderr.String() // assume there will never be a lot of output to stdout
if len(errRaw) == 0 {
return nil
}
if idx := strings.IndexByte(errRaw, '\n'); idx > 0 {
errRaw = errRaw[:idx] // get only the first line of the output
}
errRaw = strings.TrimPrefix(errRaw, "fatal: ") // git errors start with the "fatal: " prefix
if strings.Contains(errRaw, "bad revision") {
return parser.ErrSHADoesNotMatch
}
return errors.New(errRaw)
}