// Copyright 2023 Harness, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package api import ( "bufio" "bytes" "context" "fmt" "io" "math" "regexp" "strconv" "strings" "github.com/harness/gitness/errors" "github.com/harness/gitness/git/command" "github.com/harness/gitness/git/parser" "github.com/harness/gitness/git/sha" "github.com/harness/gitness/types" ) type FileDiffRequest struct { Path string `json:"path"` StartLine int `json:"start_line"` EndLine int `json:"-"` // warning: changes are possible and this field may not exist in the future } type FileDiffRequests []FileDiffRequest type DiffShortStat struct { Files int Additions int Deletions int } // modifyHeader needs to modify diff hunk header with the new start line // and end line with calculated span. // if diff hunk header is -100, 50 +100, 50 and startLine = 120, endLine=140 // then we need to modify header to -120,20 +120,20. // warning: changes are possible and param endLine may not exist in the future. func modifyHeader(hunk parser.HunkHeader, startLine, endLine int) []byte { oldStartLine := hunk.OldLine newStartLine := hunk.NewLine oldSpan := hunk.OldSpan newSpan := hunk.NewSpan oldEndLine := oldStartLine + oldSpan newEndLine := newStartLine + newSpan if startLine > 0 { if startLine < oldEndLine { oldStartLine = startLine } if startLine < newEndLine { newStartLine = startLine } } if endLine > 0 { if endLine < oldEndLine { oldSpan = endLine - startLine } else if oldEndLine > startLine { oldSpan = oldEndLine - startLine } if endLine < newEndLine { newSpan = endLine - startLine } else if newEndLine > startLine { newSpan = newEndLine - startLine } } return []byte(fmt.Sprintf("@@ -%d,%d +%d,%d @@", oldStartLine, oldSpan, newStartLine, newSpan)) } // cutLinesFromFullFileDiff reads from r and writes to w headers and between // startLine and endLine. if startLine and endLine is equal to 0 then it uses io.Copy // warning: changes are possible and param endLine may not exist in the future // //nolint:gocognit func cutLinesFromFullFileDiff(w io.Writer, r io.Reader, startLine, endLine int) error { if startLine < 0 { startLine = 0 } if endLine < 0 { endLine = 0 } if startLine == 0 && endLine > 0 { startLine = 1 } if endLine < startLine { endLine = 0 } // no need for processing lines just copy the data if startLine == 0 && endLine == 0 { _, err := io.Copy(w, r) return err } linePos := 0 start := false scanner := bufio.NewScanner(r) for scanner.Scan() { line := scanner.Bytes() if start { linePos++ } if endLine > 0 && linePos > endLine { break } if linePos > 0 && (startLine > 0 && linePos < startLine) { continue } if len(line) >= 2 && bytes.HasPrefix(line, []byte{'@', '@'}) { hunk, ok := parser.ParseDiffHunkHeader(string(line)) // TBD: maybe reader? if !ok { return fmt.Errorf("failed to extract lines from diff, range [%d,%d] : %w", startLine, endLine, ErrParseDiffHunkHeader) } line = modifyHeader(hunk, startLine, endLine) start = true } if _, err := w.Write(line); err != nil { return err } if _, err := w.Write([]byte{'\n'}); err != nil { return err } } return scanner.Err() } func (g *Git) RawDiff( ctx context.Context, w io.Writer, repoPath string, baseRef string, headRef string, mergeBase bool, alternates []string, files ...FileDiffRequest, ) error { if repoPath == "" { return ErrRepositoryPathEmpty } baseTag, err := g.GetAnnotatedTag(ctx, repoPath, baseRef) if err == nil { baseRef = baseTag.TargetSha.String() } headTag, err := g.GetAnnotatedTag(ctx, repoPath, headRef) if err == nil { headRef = headTag.TargetSha.String() } cmd := command.New("diff", command.WithFlag("-M"), command.WithFlag("--full-index"), command.WithAlternateObjectDirs(alternates...), ) if mergeBase { cmd.Add(command.WithFlag("--merge-base")) } perFileDiffRequired := false paths := make([]string, 0, len(files)) if len(files) > 0 { for _, file := range files { paths = append(paths, file.Path) if file.StartLine > 0 || file.EndLine > 0 { perFileDiffRequired = true } } } processed := 0 again: startLine := 0 endLine := 0 newCmd := cmd.Clone() if len(files) > 0 { startLine = files[processed].StartLine endLine = files[processed].EndLine } if perFileDiffRequired { if startLine > 0 || endLine > 0 { newCmd.Add(command.WithFlag("-U" + strconv.Itoa(math.MaxInt32))) } paths = []string{files[processed].Path} } newCmd.Add(command.WithArg(baseRef, headRef)) if len(paths) > 0 { newCmd.Add(command.WithPostSepArg(paths...)) } pipeRead, pipeWrite := io.Pipe() go func() { var err error defer func() { // If running of the command below fails, make the pipe reader also fail with the same error. _ = pipeWrite.CloseWithError(err) }() if err = newCmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(pipeWrite), ); err != nil { err = processGitErrorf(err, "git diff failed between %q and %q", baseRef, headRef) } }() if err = cutLinesFromFullFileDiff(w, pipeRead, startLine, endLine); err != nil { return err } if perFileDiffRequired { processed++ if processed < len(files) { goto again } } return nil } // CommitDiff will stream diff for provided ref. func (g *Git) CommitDiff( ctx context.Context, repoPath string, rev string, w io.Writer, ) error { if repoPath == "" { return ErrRepositoryPathEmpty } if rev == "" { return errors.InvalidArgument("git revision cannot be empty") } cmd := command.New("show", command.WithFlag("--full-index"), command.WithFlag("--pretty=format:%b"), command.WithArg(rev), ) if err := cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(w), ); err != nil { return processGitErrorf(err, "commit diff error") } return nil } func (g *Git) DiffShortStat( ctx context.Context, repoPath string, baseRef string, headRef string, useMergeBase bool, ) (DiffShortStat, error) { if repoPath == "" { return DiffShortStat{}, ErrRepositoryPathEmpty } separator := ".." if useMergeBase { separator = "..." } shortstatArgs := []string{baseRef + separator + headRef} if len(baseRef) == 0 || baseRef == types.NilSHA { shortstatArgs = []string{sha.EmptyTree, headRef} } stat, err := GetDiffShortStat(ctx, repoPath, shortstatArgs...) if err != nil { return DiffShortStat{}, processGitErrorf(err, "failed to get diff short stat between %s and %s", baseRef, headRef) } return stat, nil } // GetDiffHunkHeaders for each file in diff output returns file name (old and new to detect renames), // and all hunk headers. The diffs are generated with unified=0 parameter to create minimum sized hunks. // Hunks' body is ignored. // The purpose of this function is to get data based on which code comments could be repositioned. func (g *Git) GetDiffHunkHeaders( ctx context.Context, repoPath string, targetRef string, sourceRef string, ) ([]*parser.DiffFileHunkHeaders, error) { if repoPath == "" { return nil, ErrRepositoryPathEmpty } pipeRead, pipeWrite := io.Pipe() stderr := &bytes.Buffer{} go func() { var err error defer func() { // If running of the command below fails, make the pipe reader also fail with the same error. _ = pipeWrite.CloseWithError(err) }() cmd := command.New("diff", command.WithFlag("--patch"), command.WithFlag("--no-color"), command.WithFlag("--unified=0"), command.WithArg(sourceRef), command.WithArg(targetRef), ) err = cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(pipeWrite), command.WithStderr(stderr), // We capture stderr output in a buffer. ) }() fileHunkHeaders, err := parser.GetHunkHeaders(pipeRead) // First check if there's something in the stderr buffer, if yes that's the error if errStderr := parseDiffStderr(stderr); errStderr != nil { return nil, errStderr } // Next check if reading the git diff output caused an error if err != nil { return nil, err } return fileHunkHeaders, nil } // DiffCut parses full file git diff output and returns lines specified with the parameters. // The purpose of this function is to get diff data with which code comments could be generated. // //nolint:gocognit func (g *Git) DiffCut( ctx context.Context, repoPath string, targetRef string, sourceRef string, path string, params parser.DiffCutParams, ) (parser.HunkHeader, parser.Hunk, error) { if repoPath == "" { return parser.HunkHeader{}, parser.Hunk{}, ErrRepositoryPathEmpty } // first fetch the list of the changed files pipeRead, pipeWrite := io.Pipe() go func() { var err error defer func() { // If running of the command below fails, make the pipe reader also fail with the same error. _ = pipeWrite.CloseWithError(err) }() cmd := command.New("diff", command.WithFlag("--raw"), command.WithFlag("--merge-base"), command.WithFlag("-z"), command.WithFlag("--find-renames"), command.WithArg(targetRef), command.WithArg(sourceRef)) err = cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(pipeWrite)) }() diffEntries, err := parser.DiffRaw(pipeRead) if err != nil { return parser.HunkHeader{}, parser.Hunk{}, fmt.Errorf("failed to find the list of changed files: %w", err) } var ( oldSHA, newSHA string filePath string ) for _, entry := range diffEntries { switch entry.Status { case parser.DiffStatusRenamed, parser.DiffStatusCopied: if entry.Path != path && entry.OldPath != path { continue } if params.LineStartNew && path == entry.OldPath { msg := "for renamed files provide the new file name if commenting the changed lines" return parser.HunkHeader{}, parser.Hunk{}, errors.InvalidArgument(msg) } if !params.LineStartNew && path == entry.Path { msg := "for renamed files provide the old file name if commenting the old lines" return parser.HunkHeader{}, parser.Hunk{}, errors.InvalidArgument(msg) } default: if entry.Path != path { continue } } switch entry.Status { case parser.DiffStatusRenamed, parser.DiffStatusCopied, parser.DiffStatusModified: // For modified and renamed compare file blobs directly. oldSHA = entry.OldBlobSHA newSHA = entry.NewBlobSHA case parser.DiffStatusAdded, parser.DiffStatusDeleted: // For added and deleted compare commits, but with the provided path. oldSHA = targetRef newSHA = sourceRef filePath = entry.Path } break } if newSHA == "" { return parser.HunkHeader{}, parser.Hunk{}, errors.NotFound("file %s not found in the diff", path) } // next pull the diff cut for the requested file pipeRead, pipeWrite = io.Pipe() stderr := bytes.NewBuffer(nil) go func() { var err error defer func() { // If running of the command below fails, make the pipe reader also fail with the same error. _ = pipeWrite.CloseWithError(err) }() cmd := command.New("diff", command.WithFlag("--patch"), command.WithFlag("--no-color"), command.WithFlag("--unified=100000000"), command.WithArg(oldSHA), command.WithArg(newSHA)) if filePath != "" { cmd.Add( command.WithFlag("--merge-base"), command.WithPostSepArg(filePath)) } err = cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(pipeWrite), command.WithStderr(stderr)) }() diffCutHeader, linesHunk, err := parser.DiffCut(pipeRead, params) if errStderr := parseDiffStderr(stderr); errStderr != nil { // First check if there's something in the stderr buffer, if yes that's the error return parser.HunkHeader{}, parser.Hunk{}, errStderr } if err != nil { // Next check if reading the git diff output caused an error return parser.HunkHeader{}, parser.Hunk{}, err } return diffCutHeader, linesHunk, nil } func (g *Git) DiffFileName(ctx context.Context, repoPath string, baseRef string, headRef string, mergeBase bool, ) ([]string, error) { cmd := command.New("diff", command.WithFlag("--name-only")) if mergeBase { cmd.Add(command.WithFlag("--merge-base")) } cmd.Add(command.WithArg(baseRef, headRef)) stdout := &bytes.Buffer{} err := cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(stdout), ) if err != nil { return nil, processGitErrorf(err, "failed to trigger diff command") } return parseLinesToSlice(stdout.Bytes()), nil } // GetDiffShortStat counts number of changed files, number of additions and deletions. func GetDiffShortStat( ctx context.Context, repoPath string, args ...string, ) (DiffShortStat, error) { // Now if we call: // $ git diff --shortstat 1ebb35b98889ff77299f24d82da426b434b0cca0...788b8b1440462d477f45b0088875 // we get: // " 9902 files changed, 2034198 insertions(+), 298800 deletions(-)\n" cmd := command.New("diff", command.WithFlag("--shortstat"), command.WithArg(args...), ) stdout := &bytes.Buffer{} if err := cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(stdout), ); err != nil { return DiffShortStat{}, err } return parseDiffStat(stdout.String()) } var shortStatFormat = regexp.MustCompile( `\s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))?`) func parseDiffStat(stdout string) (stat DiffShortStat, err error) { if len(stdout) == 0 || stdout == "\n" { return DiffShortStat{}, nil } groups := shortStatFormat.FindStringSubmatch(stdout) if len(groups) != 4 { return DiffShortStat{}, fmt.Errorf("unable to parse shortstat: %s groups: %s", stdout, groups) } stat.Files, err = strconv.Atoi(groups[1]) if err != nil { return DiffShortStat{}, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumFiles %w", stdout, err) } if len(groups[2]) != 0 { stat.Additions, err = strconv.Atoi(groups[2]) if err != nil { return DiffShortStat{}, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumAdditions %w", stdout, err) } } if len(groups[3]) != 0 { stat.Deletions, err = strconv.Atoi(groups[3]) if err != nil { return DiffShortStat{}, fmt.Errorf("unable to parse shortstat: %s. Error parsing NumDeletions %w", stdout, err) } } return stat, nil } func parseDiffStderr(stderr *bytes.Buffer) error { errRaw := stderr.String() // assume there will never be a lot of output to stdout if len(errRaw) == 0 { return nil } if idx := strings.IndexByte(errRaw, '\n'); idx > 0 { errRaw = errRaw[:idx] // get only the first line of the output } errRaw = strings.TrimPrefix(errRaw, "fatal: ") // git errors start with the "fatal: " prefix if strings.Contains(errRaw, "bad revision") { return parser.ErrSHADoesNotMatch } return errors.New(errRaw) }