mirror of https://github.com/harness/drone.git
144 lines
3.7 KiB
Go
144 lines
3.7 KiB
Go
// Copyright 2023 Harness, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package parser
|
|
|
|
import (
|
|
"bufio"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
var (
|
|
ErrLineTooLong = errors.New("line too long")
|
|
)
|
|
|
|
func newUTF8Scanner(inner Scanner, modifier func([]byte) []byte) *utf8Scanner {
|
|
return &utf8Scanner{
|
|
scanner: inner,
|
|
modifier: modifier,
|
|
}
|
|
}
|
|
|
|
// utf8Scanner is wrapping the provided scanner with UTF-8 checks and a modifier function.
|
|
type utf8Scanner struct {
|
|
nextLine []byte
|
|
nextErr error
|
|
|
|
modifier func([]byte) []byte
|
|
scanner Scanner
|
|
}
|
|
|
|
func (s *utf8Scanner) Scan() bool {
|
|
scanOut := s.scanner.Scan()
|
|
if !scanOut {
|
|
s.nextLine = nil
|
|
s.nextErr = s.scanner.Err()
|
|
|
|
// to stay consistent with diff parser, treat bufio.ErrTooLong as binary file
|
|
if errors.Is(s.nextErr, bufio.ErrTooLong) {
|
|
s.nextErr = ErrBinaryFile
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// finalize next bytes
|
|
original := s.scanner.Bytes()
|
|
|
|
// Git is using first 8000 chars, but for now we stay consistent with diff parser
|
|
// https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.30.0#n187
|
|
if !utf8.Valid(original) {
|
|
s.nextLine = nil
|
|
s.nextErr = ErrBinaryFile
|
|
|
|
return false
|
|
}
|
|
|
|
// copy bytes to ensure nothing happens during modification
|
|
cpy := make([]byte, len(original))
|
|
copy(cpy, original)
|
|
if s.modifier != nil {
|
|
cpy = s.modifier(cpy)
|
|
}
|
|
|
|
s.nextLine = cpy
|
|
s.nextErr = nil
|
|
|
|
return true
|
|
}
|
|
|
|
func (s *utf8Scanner) Err() error {
|
|
return s.nextErr
|
|
}
|
|
|
|
func (s *utf8Scanner) Bytes() []byte {
|
|
return s.nextLine
|
|
}
|
|
|
|
func (s *utf8Scanner) Text() string {
|
|
return string(s.nextLine)
|
|
}
|
|
|
|
// ReadTextFile returns a Scanner that reads the provided text file line by line.
|
|
//
|
|
// The returned Scanner fulfills the following:
|
|
// - If any line is larger than 64kb, the scanning fails with ErrBinaryFile
|
|
// - If the reader returns invalid UTF-8, the scanning fails with ErrBinaryFile
|
|
// - Line endings are returned as-is, unless overwriteLE is provided
|
|
func ReadTextFile(r io.Reader, overwriteLE *string) (Scanner, string, error) {
|
|
scanner := NewScannerWithPeek(r, ScanLinesWithEOF)
|
|
peekOut := scanner.Peek()
|
|
if !peekOut && scanner.Err() != nil {
|
|
return nil, "", fmt.Errorf("unknown error while peeking first line: %w", scanner.Err())
|
|
}
|
|
|
|
// get raw bytes as we don't modify the slice
|
|
firstLine := scanner.Bytes()
|
|
|
|
// Heuristic - get line ending of file by first line, default to LF if there's no line endings in the file
|
|
lineEnding := "\n"
|
|
if HasLineEndingCRLF(firstLine) {
|
|
lineEnding = "\r\n"
|
|
}
|
|
|
|
return newUTF8Scanner(scanner, func(line []byte) []byte {
|
|
// overwrite line ending if requested (unless there's no line ending - e.g. last line)
|
|
if overwriteLE != nil {
|
|
if HasLineEndingCRLF(line) {
|
|
return append(line[:len(line)-2], []byte(*overwriteLE)...)
|
|
} else if HasLineEndingLF(line) {
|
|
return append(line[:len(line)-1], []byte(*overwriteLE)...)
|
|
}
|
|
}
|
|
|
|
return line
|
|
}), lineEnding, nil
|
|
}
|
|
|
|
func HasLineEnding(line []byte) bool {
|
|
// HasLineEndingLF is superset of HasLineEndingCRLF
|
|
return HasLineEndingLF(line)
|
|
}
|
|
|
|
func HasLineEndingLF(line []byte) bool {
|
|
return len(line) >= 1 && line[len(line)-1] == '\n'
|
|
}
|
|
|
|
func HasLineEndingCRLF(line []byte) bool {
|
|
return len(line) >= 2 && line[len(line)-2] == '\r' && line[len(line)-1] == '\n'
|
|
}
|