mirror of https://github.com/harness/drone.git
Handle symlinks and submodules in diffcut (#1185)
parent
f879f74774
commit
ad5989dae4
164
git/api/diff.go
164
git/api/diff.go
|
@ -407,14 +407,13 @@ func (g *Git) DiffCut(
|
|||
return parser.HunkHeader{}, parser.Hunk{}, fmt.Errorf("failed to find the list of changed files: %w", err)
|
||||
}
|
||||
|
||||
var (
|
||||
oldSHA, newSHA string
|
||||
filePath string
|
||||
)
|
||||
var oldSHA, newSHA string
|
||||
|
||||
for _, entry := range diffEntries {
|
||||
switch entry.Status {
|
||||
case parser.DiffStatusRenamed, parser.DiffStatusCopied:
|
||||
if entry.Status == parser.DiffStatusRenamed || entry.Status == parser.DiffStatusCopied {
|
||||
// Entries with the status 'R' and 'C' output two paths: the old path and the new path.
|
||||
// Using the params.LineStartNew flag to match the path with the entry's old or new path.
|
||||
|
||||
if entry.Path != path && entry.OldPath != path {
|
||||
continue
|
||||
}
|
||||
|
@ -427,34 +426,79 @@ func (g *Git) DiffCut(
|
|||
msg := "for renamed files provide the old file name if commenting the old lines"
|
||||
return parser.HunkHeader{}, parser.Hunk{}, errors.InvalidArgument(msg)
|
||||
}
|
||||
default:
|
||||
if entry.Path != path {
|
||||
continue
|
||||
}
|
||||
} else if entry.Path != path {
|
||||
// All other statuses output just one path: If the path doesn't match it, proceed with the next entry.
|
||||
continue
|
||||
}
|
||||
|
||||
rawFileMode := entry.OldFileMode
|
||||
if params.LineStartNew {
|
||||
rawFileMode = entry.NewFileMode
|
||||
}
|
||||
|
||||
fileType, _, err := parseTreeNodeMode(rawFileMode)
|
||||
if err != nil {
|
||||
return parser.HunkHeader{}, parser.Hunk{},
|
||||
fmt.Errorf("failed to parse file mode %s for path %s: %w", rawFileMode, path, err)
|
||||
}
|
||||
|
||||
switch fileType {
|
||||
default:
|
||||
return parser.HunkHeader{}, parser.Hunk{}, errors.Internal(nil, "unrecognized object type")
|
||||
case TreeNodeTypeCommit:
|
||||
msg := "code comment is not allowed on a submodule"
|
||||
return parser.HunkHeader{}, parser.Hunk{}, errors.InvalidArgument(msg)
|
||||
case TreeNodeTypeTree:
|
||||
msg := "code comment is not allowed on a directory"
|
||||
return parser.HunkHeader{}, parser.Hunk{}, errors.InvalidArgument(msg)
|
||||
case TreeNodeTypeBlob:
|
||||
// a blob is what we want
|
||||
}
|
||||
|
||||
var hunkHeader parser.HunkHeader
|
||||
var hunk parser.Hunk
|
||||
|
||||
switch entry.Status {
|
||||
case parser.DiffStatusRenamed, parser.DiffStatusCopied, parser.DiffStatusModified:
|
||||
// For modified and renamed compare file blobs directly.
|
||||
// For modified and renamed compare file blob SHAs directly.
|
||||
oldSHA = entry.OldBlobSHA
|
||||
newSHA = entry.NewBlobSHA
|
||||
case parser.DiffStatusAdded, parser.DiffStatusDeleted:
|
||||
// For added and deleted compare commits, but with the provided path.
|
||||
oldSHA = targetRef
|
||||
newSHA = sourceRef
|
||||
filePath = entry.Path
|
||||
hunkHeader, hunk, err = g.diffCutFromHunk(ctx, repoPath, oldSHA, newSHA, params)
|
||||
case parser.DiffStatusAdded, parser.DiffStatusDeleted, parser.DiffStatusType:
|
||||
// for added and deleted files read the file content directly
|
||||
if params.LineStartNew {
|
||||
hunkHeader, hunk, err = g.diffCutFromBlob(ctx, repoPath, true, entry.NewBlobSHA, params)
|
||||
} else {
|
||||
hunkHeader, hunk, err = g.diffCutFromBlob(ctx, repoPath, false, entry.OldBlobSHA, params)
|
||||
}
|
||||
default:
|
||||
return parser.HunkHeader{}, parser.Hunk{},
|
||||
fmt.Errorf("unrecognized git diff file status=%c for path=%s", entry.Status, path)
|
||||
}
|
||||
if err != nil {
|
||||
return parser.HunkHeader{}, parser.Hunk{},
|
||||
fmt.Errorf("failed to extract hunk for git diff file status=%c path=%s: %w", entry.Status, path, err)
|
||||
}
|
||||
|
||||
break
|
||||
// The returned diff hunk will be stored in the DB and will only be used for display, as a reference.
|
||||
// Therefore, we trim too long lines to protect the system and the DB.
|
||||
const maxLineLen = 200
|
||||
parser.LimitLineLen(&hunk.Lines, maxLineLen)
|
||||
|
||||
return hunkHeader, hunk, nil
|
||||
}
|
||||
|
||||
if newSHA == "" {
|
||||
return parser.HunkHeader{}, parser.Hunk{}, errors.NotFound("file %s not found in the diff", path)
|
||||
}
|
||||
return parser.HunkHeader{}, parser.Hunk{}, errors.NotFound("path not found")
|
||||
}
|
||||
|
||||
// next pull the diff cut for the requested file
|
||||
|
||||
pipeRead, pipeWrite = io.Pipe()
|
||||
func (g *Git) diffCutFromHunk(
|
||||
ctx context.Context,
|
||||
repoPath string,
|
||||
oldSHA string,
|
||||
newSHA string,
|
||||
params parser.DiffCutParams,
|
||||
) (parser.HunkHeader, parser.Hunk, error) {
|
||||
pipeRead, pipeWrite := io.Pipe()
|
||||
stderr := bytes.NewBuffer(nil)
|
||||
go func() {
|
||||
var err error
|
||||
|
@ -470,11 +514,6 @@ func (g *Git) DiffCut(
|
|||
command.WithFlag("--unified=100000000"),
|
||||
command.WithArg(oldSHA),
|
||||
command.WithArg(newSHA))
|
||||
if filePath != "" {
|
||||
cmd.Add(
|
||||
command.WithFlag("--merge-base"),
|
||||
command.WithPostSepArg(filePath))
|
||||
}
|
||||
|
||||
err = cmd.Run(ctx,
|
||||
command.WithDir(repoPath),
|
||||
|
@ -495,6 +534,75 @@ func (g *Git) DiffCut(
|
|||
return diffCutHeader, linesHunk, nil
|
||||
}
|
||||
|
||||
func (g *Git) diffCutFromBlob(
|
||||
ctx context.Context,
|
||||
repoPath string,
|
||||
asAdded bool,
|
||||
sha string,
|
||||
params parser.DiffCutParams,
|
||||
) (parser.HunkHeader, parser.Hunk, error) {
|
||||
pipeRead, pipeWrite := io.Pipe()
|
||||
go func() {
|
||||
var err error
|
||||
|
||||
defer func() {
|
||||
// If running of the command below fails, make the pipe reader also fail with the same error.
|
||||
_ = pipeWrite.CloseWithError(err)
|
||||
}()
|
||||
|
||||
cmd := command.New("cat-file",
|
||||
command.WithFlag("-p"),
|
||||
command.WithArg(sha))
|
||||
|
||||
err = cmd.Run(ctx,
|
||||
command.WithDir(repoPath),
|
||||
command.WithStdout(pipeWrite))
|
||||
}()
|
||||
|
||||
cutHeader, cut, err := parser.BlobCut(pipeRead, params)
|
||||
if err != nil {
|
||||
// Next check if reading the git diff output caused an error
|
||||
return parser.HunkHeader{}, parser.Hunk{}, err
|
||||
}
|
||||
|
||||
// Convert parser.CutHeader to parser.HunkHeader and parser.Cut to parser.Hunk.
|
||||
|
||||
var hunkHeader parser.HunkHeader
|
||||
var hunk parser.Hunk
|
||||
|
||||
if asAdded {
|
||||
for i := range cut.Lines {
|
||||
cut.Lines[i] = "+ " + cut.Lines[i]
|
||||
}
|
||||
|
||||
hunkHeader = parser.HunkHeader{
|
||||
NewLine: cutHeader.Line,
|
||||
NewSpan: cutHeader.Span,
|
||||
}
|
||||
|
||||
hunk = parser.Hunk{
|
||||
HunkHeader: parser.HunkHeader{NewLine: cut.Line, NewSpan: cut.Span},
|
||||
Lines: cut.Lines,
|
||||
}
|
||||
} else {
|
||||
for i := range cut.Lines {
|
||||
cut.Lines[i] = "- " + cut.Lines[i]
|
||||
}
|
||||
|
||||
hunkHeader = parser.HunkHeader{
|
||||
OldLine: cutHeader.Line,
|
||||
OldSpan: cutHeader.Span,
|
||||
}
|
||||
|
||||
hunk = parser.Hunk{
|
||||
HunkHeader: parser.HunkHeader{OldLine: cut.Line, OldSpan: cut.Span},
|
||||
Lines: cut.Lines,
|
||||
}
|
||||
}
|
||||
|
||||
return hunkHeader, hunk, nil
|
||||
}
|
||||
|
||||
func (g *Git) DiffFileName(ctx context.Context,
|
||||
repoPath string,
|
||||
baseRef string,
|
||||
|
|
|
@ -17,7 +17,9 @@ package parser
|
|||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type DiffFileHeader struct {
|
||||
|
@ -239,6 +241,97 @@ again:
|
|||
return
|
||||
}
|
||||
|
||||
// BlobCut parses raw file and returns lines specified with the parameter.
|
||||
func BlobCut(r io.Reader, params DiffCutParams) (CutHeader, Cut, error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
|
||||
var (
|
||||
err error
|
||||
lineNumber int
|
||||
inCut bool
|
||||
cutStart, cutSpan int
|
||||
cutLines []string
|
||||
)
|
||||
|
||||
extStart := params.LineStart - params.BeforeLines
|
||||
extEnd := params.LineEnd + params.AfterLines
|
||||
linesNeeded := params.LineEnd - params.LineStart + 1
|
||||
|
||||
for {
|
||||
if !scanner.Scan() {
|
||||
err = scanner.Err()
|
||||
break
|
||||
}
|
||||
|
||||
lineNumber++
|
||||
line := scanner.Text()
|
||||
|
||||
if !utf8.ValidString(line) {
|
||||
return CutHeader{}, Cut{}, ErrBinaryFile
|
||||
}
|
||||
|
||||
if lineNumber > extEnd {
|
||||
break // exceeded the requested line range
|
||||
}
|
||||
|
||||
if lineNumber < extStart {
|
||||
// not yet in the requested line range
|
||||
continue
|
||||
}
|
||||
|
||||
if !inCut {
|
||||
cutStart = lineNumber
|
||||
inCut = true
|
||||
}
|
||||
cutLines = append(cutLines, line)
|
||||
cutSpan++
|
||||
|
||||
if lineNumber >= params.LineStart && lineNumber <= params.LineEnd {
|
||||
linesNeeded--
|
||||
}
|
||||
|
||||
if len(cutLines) >= params.LineLimit {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if errors.Is(err, bufio.ErrTooLong) {
|
||||
// By default, the max token size is 65536 (bufio.MaxScanTokenSize).
|
||||
// If the file contains a line that is longer than this we treat it as a binary file.
|
||||
return CutHeader{}, Cut{}, ErrBinaryFile
|
||||
}
|
||||
|
||||
if err != nil && !errors.Is(err, io.EOF) {
|
||||
return CutHeader{}, Cut{}, fmt.Errorf("failed to parse blob cut: %w", err)
|
||||
}
|
||||
|
||||
if !inCut || linesNeeded > 0 {
|
||||
return CutHeader{}, Cut{}, ErrHunkNotFound
|
||||
}
|
||||
|
||||
// the cut header is hunk-like header (with Line and Span) that describes the requested lines exactly
|
||||
ch := CutHeader{Line: params.LineStart, Span: params.LineEnd - params.LineStart + 1}
|
||||
|
||||
// the cut includes the requested lines and few more lines specified with the BeforeLines and AfterLines.
|
||||
c := Cut{CutHeader: CutHeader{Line: cutStart, Span: cutSpan}, Lines: cutLines}
|
||||
|
||||
return ch, c, nil
|
||||
}
|
||||
|
||||
func LimitLineLen(lines *[]string, maxLen int) {
|
||||
outer:
|
||||
for idxLine, line := range *lines {
|
||||
var l int
|
||||
for idxRune := range line {
|
||||
l++
|
||||
if l > maxLen {
|
||||
(*lines)[idxLine] = line[:idxRune] + "…" // append the ellipsis to indicate that the line was trimmed.
|
||||
continue outer
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type strCircBuf struct {
|
||||
head int
|
||||
entries []string
|
||||
|
|
|
@ -237,6 +237,88 @@ index af7864ba..541cb64f 100644
|
|||
}
|
||||
}
|
||||
|
||||
func TestBlobCut(t *testing.T) {
|
||||
const input = `1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6`
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
params DiffCutParams
|
||||
expCutHeader CutHeader
|
||||
expCut Cut
|
||||
expError error
|
||||
}{
|
||||
{
|
||||
name: "first 3 lines",
|
||||
params: DiffCutParams{LineStart: 1, LineEnd: 3, LineLimit: 40},
|
||||
expCutHeader: CutHeader{Line: 1, Span: 3},
|
||||
expCut: Cut{CutHeader: CutHeader{Line: 1, Span: 3}, Lines: []string{"1", "2", "3"}},
|
||||
},
|
||||
{
|
||||
name: "last 2 lines",
|
||||
params: DiffCutParams{LineStart: 5, LineEnd: 6, LineLimit: 40},
|
||||
expCutHeader: CutHeader{Line: 5, Span: 2},
|
||||
expCut: Cut{CutHeader: CutHeader{Line: 5, Span: 2}, Lines: []string{"5", "6"}},
|
||||
},
|
||||
{
|
||||
name: "first 3 lines with 1 more",
|
||||
params: DiffCutParams{LineStart: 1, LineEnd: 3, BeforeLines: 1, AfterLines: 1, LineLimit: 40},
|
||||
expCutHeader: CutHeader{Line: 1, Span: 3},
|
||||
expCut: Cut{CutHeader: CutHeader{Line: 1, Span: 4}, Lines: []string{"1", "2", "3", "4"}},
|
||||
},
|
||||
{
|
||||
name: "last 2 lines with 2 more",
|
||||
params: DiffCutParams{LineStart: 5, LineEnd: 6, BeforeLines: 2, AfterLines: 2, LineLimit: 40},
|
||||
expCutHeader: CutHeader{Line: 5, Span: 2},
|
||||
expCut: Cut{CutHeader: CutHeader{Line: 3, Span: 4}, Lines: []string{"3", "4", "5", "6"}},
|
||||
},
|
||||
{
|
||||
name: "mid range",
|
||||
params: DiffCutParams{LineStart: 2, LineEnd: 4, BeforeLines: 100, AfterLines: 100, LineLimit: 40},
|
||||
expCutHeader: CutHeader{Line: 2, Span: 3},
|
||||
expCut: Cut{CutHeader: CutHeader{Line: 1, Span: 6}, Lines: []string{"1", "2", "3", "4", "5", "6"}},
|
||||
},
|
||||
{
|
||||
name: "out of range 1",
|
||||
params: DiffCutParams{LineStart: 15, LineEnd: 17, LineLimit: 40},
|
||||
expError: ErrHunkNotFound,
|
||||
},
|
||||
{
|
||||
name: "out of range 2",
|
||||
params: DiffCutParams{LineStart: 5, LineEnd: 7, BeforeLines: 1, AfterLines: 1, LineLimit: 40},
|
||||
expError: ErrHunkNotFound,
|
||||
},
|
||||
{
|
||||
name: "limited",
|
||||
params: DiffCutParams{LineStart: 3, LineEnd: 4, BeforeLines: 1, AfterLines: 1, LineLimit: 3},
|
||||
expCutHeader: CutHeader{Line: 3, Span: 2},
|
||||
expCut: Cut{CutHeader: CutHeader{Line: 2, Span: 3}, Lines: []string{"2", "3", "4"}},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
ch, c, err := BlobCut(strings.NewReader(input), test.params)
|
||||
if err != test.expError { //nolint:errorlint // it's a unit test and no errors are wrapped
|
||||
t.Errorf("test failed with error: %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
if want, got := test.expCutHeader, ch; want != got {
|
||||
t.Errorf("cut header: want=%+v got: %+v", want, got)
|
||||
}
|
||||
|
||||
if want, got := test.expCut, c; !reflect.DeepEqual(want, got) {
|
||||
t.Errorf("cut: want=%+v got: %+v", want, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestStrCircBuf(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
|
|
@ -24,23 +24,29 @@ import (
|
|||
type DiffStatus byte
|
||||
|
||||
const (
|
||||
DiffStatusModified = 'M'
|
||||
DiffStatusAdded = 'A'
|
||||
DiffStatusDeleted = 'D'
|
||||
DiffStatusRenamed = 'R'
|
||||
DiffStatusCopied = 'C'
|
||||
DiffStatusType = 'T'
|
||||
DiffStatusModified DiffStatus = 'M'
|
||||
DiffStatusAdded DiffStatus = 'A'
|
||||
DiffStatusDeleted DiffStatus = 'D'
|
||||
DiffStatusRenamed DiffStatus = 'R'
|
||||
DiffStatusCopied DiffStatus = 'C'
|
||||
DiffStatusType DiffStatus = 'T'
|
||||
)
|
||||
|
||||
type DiffRawFile struct {
|
||||
OldBlobSHA string
|
||||
NewBlobSHA string
|
||||
Status byte
|
||||
OldPath string
|
||||
Path string
|
||||
func (s DiffStatus) String() string {
|
||||
return fmt.Sprintf("%c", s)
|
||||
}
|
||||
|
||||
var regexpDiffRaw = regexp.MustCompile(`:\d{6} \d{6} ([0-9a-f]+) ([0-9a-f]+) (\w)(\d*)`)
|
||||
type DiffRawFile struct {
|
||||
OldFileMode string
|
||||
NewFileMode string
|
||||
OldBlobSHA string
|
||||
NewBlobSHA string
|
||||
Status DiffStatus
|
||||
OldPath string
|
||||
Path string
|
||||
}
|
||||
|
||||
var regexpDiffRaw = regexp.MustCompile(`:(\d{6}) (\d{6}) ([0-9a-f]+) ([0-9a-f]+) (\w)(\d*)`)
|
||||
|
||||
// DiffRaw parses raw git diff output (git diff --raw). Each entry (a line) is a changed file.
|
||||
// The format is:
|
||||
|
@ -72,7 +78,7 @@ func DiffRaw(r io.Reader) ([]DiffRawFile, error) {
|
|||
|
||||
path = scan.Text()
|
||||
|
||||
status := groups[3][0]
|
||||
status := DiffStatus(groups[5][0])
|
||||
switch status {
|
||||
case DiffStatusRenamed, DiffStatusCopied:
|
||||
if !scan.Scan() {
|
||||
|
@ -86,11 +92,13 @@ func DiffRaw(r io.Reader) ([]DiffRawFile, error) {
|
|||
}
|
||||
|
||||
result = append(result, DiffRawFile{
|
||||
OldBlobSHA: groups[1],
|
||||
NewBlobSHA: groups[2],
|
||||
Status: status,
|
||||
OldPath: oldPath,
|
||||
Path: path,
|
||||
OldFileMode: groups[1],
|
||||
NewFileMode: groups[2],
|
||||
OldBlobSHA: groups[3],
|
||||
NewBlobSHA: groups[4],
|
||||
Status: status,
|
||||
OldPath: oldPath,
|
||||
Path: path,
|
||||
})
|
||||
}
|
||||
if err := scan.Err(); err != nil {
|
||||
|
|
|
@ -14,9 +14,10 @@
|
|||
|
||||
package parser
|
||||
|
||||
import "errors"
|
||||
import "github.com/harness/gitness/errors"
|
||||
|
||||
var (
|
||||
ErrSHADoesNotMatch = errors.New("sha does not match")
|
||||
ErrHunkNotFound = errors.New("hunk not found")
|
||||
ErrSHADoesNotMatch = errors.InvalidArgument("sha does not match")
|
||||
ErrHunkNotFound = errors.NotFound("hunk not found")
|
||||
ErrBinaryFile = errors.InvalidArgument("can't handle a binary file")
|
||||
)
|
||||
|
|
|
@ -33,6 +33,16 @@ type HunkHeader struct {
|
|||
Text string
|
||||
}
|
||||
|
||||
type Cut struct {
|
||||
CutHeader
|
||||
Lines []string
|
||||
}
|
||||
|
||||
type CutHeader struct {
|
||||
Line int
|
||||
Span int
|
||||
}
|
||||
|
||||
var regExpHunkHeader = regexp.MustCompile(`^@@ -([0-9]+)(,([0-9]+))? \+([0-9]+)(,([0-9]+))? @@( (.+))?$`)
|
||||
|
||||
func (h *HunkHeader) IsZero() bool {
|
||||
|
|
Loading…
Reference in New Issue