diff --git a/git/parser/diff_headers.go b/git/parser/diff_headers.go index 9bdb4a3f0..5db2deb77 100644 --- a/git/parser/diff_headers.go +++ b/git/parser/diff_headers.go @@ -43,13 +43,22 @@ func ParseDiffFileHeader(line string) (DiffFileHeader, bool) { // GetHunkHeaders parses git diff output and returns all diff headers for all files. // See for documentation: https://git-scm.com/docs/git-diff#generate_patch_text_with_p func GetHunkHeaders(r io.Reader) ([]*DiffFileHunkHeaders, error) { - scanner := bufio.NewScanner(r) + bufrd := bufio.NewReader(r) var currentFile *DiffFileHunkHeaders var result []*DiffFileHunkHeaders - for scanner.Scan() { - line := scanner.Text() + for { + // Consume the line but get only the first 4K of it... + // We're interested only in the hunk headers anyway, and they are never longer than this. + line, err := readLinePrefix(bufrd, 4096) + if err != nil && err != io.EOF { //nolint:errorlint + return nil, err + } + + if len(line) == 0 { + break + } if h, ok := ParseDiffFileHeader(line); ok { if currentFile != nil { @@ -79,13 +88,38 @@ func GetHunkHeaders(r io.Reader) ([]*DiffFileHunkHeaders, error) { } } - if err := scanner.Err(); err != nil { - return nil, err - } - if currentFile != nil { result = append(result, currentFile) } return result, nil } + +// readLinePrefix will consume the entire line from the reader, +// but will return only the first maxLen bytes from it - the rest is discarded. +// Returns io.EOF when the end of the input has been reached. +func readLinePrefix(br *bufio.Reader, maxLen int) (line string, err error) { + for { + var raw []byte + var isPrefix bool + + raw, isPrefix, err = br.ReadLine() + if err != nil && err != io.EOF { //nolint:errorlint + return "", err + } + + if needMore := maxLen - len(line); needMore > 0 { + if len(raw) > needMore { + line += string(raw[:needMore]) + } else { + line += string(raw) + } + } + + if !isPrefix || len(raw) == 0 { + break + } + } + + return line, err +} diff --git a/git/parser/diff_headers_test.go b/git/parser/diff_headers_test.go index f61bd58d7..f76b9c42f 100644 --- a/git/parser/diff_headers_test.go +++ b/git/parser/diff_headers_test.go @@ -15,6 +15,8 @@ package parser import ( + "bufio" + "io" "strings" "testing" @@ -106,3 +108,120 @@ index f043b93..0000000 t.Errorf(diff) } } + +func TestReadLinePrefix(t *testing.T) { + const maxLen = 256 + tests := []struct { + name string + wf func(w io.Writer) + expLens []int + }{ + { + name: "empty", + wf: func(io.Writer) {}, + expLens: nil, + }, + { + name: "single", + wf: func(w io.Writer) { + _, _ = w.Write([]byte("aaa")) + }, + expLens: []int{3}, + }, + { + name: "single-eol", + wf: func(w io.Writer) { + _, _ = w.Write([]byte("aaa\n")) + }, + expLens: []int{3}, + }, + { + name: "two-lines", + wf: func(w io.Writer) { + _, _ = w.Write([]byte("aa\nbb")) + }, + expLens: []int{2, 2}, + }, + { + name: "two-lines-crlf", + wf: func(w io.Writer) { + _, _ = w.Write([]byte("aa\r\nbb\r\n")) + }, + expLens: []int{2, 2}, + }, + { + name: "empty-line", + wf: func(w io.Writer) { + _, _ = w.Write([]byte("aa\n\ncc")) + }, + expLens: []int{2, 0, 2}, + }, + { + name: "too-long", + wf: func(w io.Writer) { + for i := 0; i < maxLen; i++ { + _, _ = w.Write([]byte("a")) + } + _, _ = w.Write([]byte("\n")) + for i := 0; i < maxLen*2; i++ { + _, _ = w.Write([]byte("b")) + } + _, _ = w.Write([]byte("\n")) + for i := 0; i < maxLen/2; i++ { + _, _ = w.Write([]byte("c")) + } + _, _ = w.Write([]byte("\n")) + }, + expLens: []int{maxLen, maxLen, maxLen / 2}, + }, + { + name: "overflow-buffer", + wf: func(w io.Writer) { + for i := 0; i < bufio.MaxScanTokenSize+1; i++ { + _, _ = w.Write([]byte("a")) + } + _, _ = w.Write([]byte("\n")) + for i := 0; i < bufio.MaxScanTokenSize*2; i++ { + _, _ = w.Write([]byte("b")) + } + _, _ = w.Write([]byte("\n")) + for i := 0; i < bufio.MaxScanTokenSize; i++ { + _, _ = w.Write([]byte("c")) + } + }, + expLens: []int{maxLen, maxLen, maxLen}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pr, pw := io.Pipe() + defer pr.Close() + + go func() { + test.wf(pw) + _ = pw.Close() + }() + + br := bufio.NewReader(pr) + + for i, expLen := range test.expLens { + expLine := strings.Repeat(string(rune('a'+i)), expLen) + line, err := readLinePrefix(br, maxLen) + if err != nil && err != io.EOF { //nolint:errorlint + t.Errorf("got error: %s", err.Error()) + return + } + if want, got := expLine, line; want != got { + t.Errorf("line %d mismatch want=%s got=%s", i, want, got) + return + } + } + + line, err := readLinePrefix(br, maxLen) + if line != "" || err != io.EOF { //nolint:errorlint + t.Errorf("expected empty line and EOF but got: line=%s err=%v", line, err) + } + }) + } +}