improve create code comment API (#1040)

pull/3484/head
Marko Gacesa 2024-02-14 11:10:52 +00:00 committed by Harness
parent fc9e77c91c
commit 2a17b5f243
5 changed files with 236 additions and 32 deletions

View File

@ -25,6 +25,7 @@ import (
"strings"
"github.com/harness/gitness/errors"
"github.com/harness/gitness/git/command"
"github.com/harness/gitness/git/parser"
"github.com/harness/gitness/git/types"
@ -365,6 +366,8 @@ func (a Adapter) GetDiffHunkHeaders(
// DiffCut parses full file git diff output and returns lines specified with the parameters.
// The purpose of this function is to get diff data with which code comments could be generated.
//
//nolint:gocognit
func (a Adapter) DiffCut(
ctx context.Context,
repoPath string,
@ -376,8 +379,10 @@ func (a Adapter) DiffCut(
if repoPath == "" {
return types.HunkHeader{}, types.Hunk{}, ErrRepositoryPathEmpty
}
// first fetch the list of the changed files
pipeRead, pipeWrite := io.Pipe()
stderr := &bytes.Buffer{}
go func() {
var err error
@ -386,25 +391,103 @@ func (a Adapter) DiffCut(
_ = pipeWrite.CloseWithError(err)
}()
cmd := git.NewCommand(ctx,
"diff", "--merge-base", "--patch", "--no-color", "--unified=100000000",
targetRef, sourceRef, "--", path)
err = cmd.Run(&git.RunOpts{
Dir: repoPath,
Stdout: pipeWrite,
Stderr: stderr, // We capture stderr output in a buffer.
})
cmd := command.New("diff",
command.WithFlag("--raw"),
command.WithFlag("--merge-base"),
command.WithFlag("-z"),
command.WithFlag("--find-renames"),
command.WithArg(targetRef),
command.WithArg(sourceRef))
err = cmd.Run(ctx, command.WithDir(repoPath), command.WithStdout(pipeWrite))
}()
diffEntries, err := parser.DiffRaw(pipeRead)
if err != nil {
return types.HunkHeader{}, types.Hunk{}, fmt.Errorf("failed to find the list of changed files: %w", err)
}
var (
oldSHA, newSHA string
filePath string
)
for _, entry := range diffEntries {
switch entry.Status {
case parser.DiffStatusRenamed, parser.DiffStatusCopied:
if entry.Path != path && entry.OldPath != path {
continue
}
if params.LineStartNew && path == entry.OldPath {
msg := "for renamed files provide the new file name if commenting the changed lines"
return types.HunkHeader{}, types.Hunk{}, errors.InvalidArgument(msg)
}
if !params.LineStartNew && path == entry.Path {
msg := "for renamed files provide the old file name if commenting the old lines"
return types.HunkHeader{}, types.Hunk{}, errors.InvalidArgument(msg)
}
default:
if entry.Path != path {
continue
}
}
switch entry.Status {
case parser.DiffStatusRenamed, parser.DiffStatusCopied, parser.DiffStatusModified:
// For modified and renamed compare file blobs directly.
oldSHA = entry.OldBlobSHA
newSHA = entry.NewBlobSHA
case parser.DiffStatusAdded, parser.DiffStatusDeleted:
// For added and deleted compare commits, but with the provided path.
oldSHA = targetRef
newSHA = sourceRef
filePath = entry.Path
}
break
}
if newSHA == "" {
return types.HunkHeader{}, types.Hunk{}, errors.NotFound("file %s not found in the diff", path)
}
// next pull the diff cut for the requested file
pipeRead, pipeWrite = io.Pipe()
stderr := bytes.NewBuffer(nil)
go func() {
var err error
defer func() {
// If running of the command below fails, make the pipe reader also fail with the same error.
_ = pipeWrite.CloseWithError(err)
}()
cmd := command.New("diff",
command.WithFlag("--patch"),
command.WithFlag("--no-color"),
command.WithFlag("--unified=100000000"),
command.WithArg(oldSHA),
command.WithArg(newSHA))
if filePath != "" {
cmd.Add(
command.WithFlag("--merge-base"),
command.WithPostSepArg(filePath))
}
err = cmd.Run(ctx,
command.WithDir(repoPath),
command.WithStdout(pipeWrite),
command.WithStderr(stderr))
}()
diffCutHeader, linesHunk, err := parser.DiffCut(pipeRead, params)
// First check if there's something in the stderr buffer, if yes that's the error
if errStderr := parseDiffStderr(stderr); errStderr != nil {
// First check if there's something in the stderr buffer, if yes that's the error
return types.HunkHeader{}, types.Hunk{}, errStderr
}
// Next check if reading the git diff output caused an error
if err != nil {
// Next check if reading the git diff output caused an error
return types.HunkHeader{}, types.Hunk{}, err
}

View File

@ -26,6 +26,7 @@ import (
"github.com/harness/gitness/errors"
"github.com/harness/gitness/git/command"
"github.com/harness/gitness/git/parser"
"github.com/harness/gitness/git/types"
"github.com/rs/zerolog/log"
@ -53,19 +54,6 @@ func parseTreeNodeMode(s string) (types.TreeNodeType, types.TreeNodeMode, error)
}
}
func scanZeroSeparated(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil // Return nothing if at end of file and no data passed
}
if i := strings.IndexByte(string(data), 0); i >= 0 {
return i + 1, data[0:i], nil // Split at zero byte
}
if atEOF {
return len(data), data, nil // at the end of file return the data
}
return
}
// regexpLsTreeColumns is a regular expression that is used to parse a single line
// of a "git ls-tree" output (which uses the NULL character as the line break).
// The single line mode must be used because output might contain the EOL and other control characters.
@ -105,7 +93,7 @@ func lsTree(
list := make([]types.TreeNode, 0, n)
scan := bufio.NewScanner(output)
scan.Split(scanZeroSeparated)
scan.Split(parser.ScanZeroSeparated)
for scan.Scan() {
line := scan.Text()

101
git/parser/diff_raw.go Normal file
View File

@ -0,0 +1,101 @@
// Copyright 2023 Harness, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package parser
import (
"bufio"
"fmt"
"io"
"regexp"
)
type DiffStatus byte
const (
DiffStatusModified = 'M'
DiffStatusAdded = 'A'
DiffStatusDeleted = 'D'
DiffStatusRenamed = 'R'
DiffStatusCopied = 'C'
DiffStatusType = 'T'
)
type DiffRawFile struct {
OldBlobSHA string
NewBlobSHA string
Status byte
OldPath string
Path string
}
var regexpDiffRaw = regexp.MustCompile(`:\d{6} \d{6} ([0-9a-f]+) ([0-9a-f]+) (\w)(\d*)`)
// DiffRaw parses raw git diff output (git diff --raw). Each entry (a line) is a changed file.
// The format is:
//
// :100644 100644 <old-hash> <new-hash> <status>NULL<file-name>NULL
//
// Old-hash and new-hash are the file object hashes. Status can be A added, D deleted, M modified, R renamed, C copied.
// When the status is A then the old-hash is the zero hash, when the status is D the new-hash is the zero hash.
// If the status is R or C then the output is:
//
// :100644 100644 <old-hash> <new-hash> R<similarity index>NULL<old-name>NULL<new-name>NULL
func DiffRaw(r io.Reader) ([]DiffRawFile, error) {
var result []DiffRawFile
scan := bufio.NewScanner(r)
scan.Split(ScanZeroSeparated)
for scan.Scan() {
s := scan.Text()
groups := regexpDiffRaw.FindStringSubmatch(s)
if groups == nil {
continue
}
var oldPath, path string
if !scan.Scan() {
return nil, fmt.Errorf("failed to get path for the entry: %q; err=%w", s, scan.Err())
}
path = scan.Text()
status := groups[3][0]
switch status {
case DiffStatusRenamed, DiffStatusCopied:
if !scan.Scan() {
return nil, fmt.Errorf("failed to get new path for the entry: %q; err=%w", s, scan.Err())
}
oldPath, path = path, scan.Text()
case DiffStatusAdded, DiffStatusDeleted, DiffStatusModified, DiffStatusType:
default:
return nil, fmt.Errorf("got invalid raw diff status=%c for entry %s %s", status, s, path)
}
result = append(result, DiffRawFile{
OldBlobSHA: groups[1],
NewBlobSHA: groups[2],
Status: status,
OldPath: oldPath,
Path: path,
})
}
if err := scan.Err(); err != nil {
return nil, fmt.Errorf("failed to scan raw diff: %w", scan.Err())
}
return result, nil
}

32
git/parser/scanner.go Normal file
View File

@ -0,0 +1,32 @@
// Copyright 2023 Harness, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package parser
import (
"bytes"
)
func ScanZeroSeparated(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil // Return nothing if at end of file and no data passed
}
if i := bytes.IndexByte(data, 0); i >= 0 {
return i + 1, data[0:i], nil // Split at zero byte
}
if atEOF {
return len(data), data, nil // at the end of file return the data
}
return
}

View File

@ -26,11 +26,11 @@ const (
)
var (
ErrAlreadyExists = errors.New("already exists")
ErrInvalidPath = errors.New("path is invalid")
ErrSHADoesNotMatch = errors.New("sha does not match")
ErrNoDefaultBranch = errors.New("no default branch")
ErrHunkNotFound = errors.New("hunk not found")
ErrAlreadyExists = errors.Conflict("already exists")
ErrInvalidPath = errors.NotFound("path is invalid")
ErrSHADoesNotMatch = errors.InvalidArgument("sha does not match")
ErrNoDefaultBranch = errors.NotFound("no default branch")
ErrHunkNotFound = errors.NotFound("hunk not found")
)
type NotFoundError struct {