// Copyright 2023 Harness, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package parser

import (
	"bufio"
	"errors"
	"fmt"
	"io"
	"unicode/utf8"
)

var (
	ErrLineTooLong = errors.New("line too long")
)

func newUTF8Scanner(inner Scanner, modifier func([]byte) []byte) *utf8Scanner {
	return &utf8Scanner{
		scanner:  inner,
		modifier: modifier,
	}
}

// utf8Scanner is wrapping the provided scanner with UTF-8 checks and a modifier function.
type utf8Scanner struct {
	nextLine []byte
	nextErr  error

	modifier func([]byte) []byte
	scanner  Scanner
}

func (s *utf8Scanner) Scan() bool {
	scanOut := s.scanner.Scan()
	if !scanOut {
		s.nextLine = nil
		s.nextErr = s.scanner.Err()

		// to stay consistent with diff parser, treat bufio.ErrTooLong as binary file
		if errors.Is(s.nextErr, bufio.ErrTooLong) {
			s.nextErr = ErrBinaryFile
		}

		return false
	}

	// finalize next bytes
	original := s.scanner.Bytes()

	// Git is using first 8000 chars, but for now we stay consistent with diff parser
	// https://git.kernel.org/pub/scm/git/git.git/tree/xdiff-interface.c?h=v2.30.0#n187
	if !utf8.Valid(original) {
		s.nextLine = nil
		s.nextErr = ErrBinaryFile

		return false
	}

	// copy bytes to ensure nothing happens during modification
	cpy := make([]byte, len(original))
	copy(cpy, original)
	if s.modifier != nil {
		cpy = s.modifier(cpy)
	}

	s.nextLine = cpy
	s.nextErr = nil

	return true
}

func (s *utf8Scanner) Err() error {
	return s.nextErr
}

func (s *utf8Scanner) Bytes() []byte {
	return s.nextLine
}

func (s *utf8Scanner) Text() string {
	return string(s.nextLine)
}

// ReadTextFile returns a Scanner that reads the provided text file line by line.
//
// The returned Scanner fulfills the following:
//   - If any line is larger than 64kb, the scanning fails with ErrBinaryFile
//   - If the reader returns invalid UTF-8, the scanning fails with ErrBinaryFile
//   - Line endings are returned as-is, unless overwriteLE is provided
func ReadTextFile(r io.Reader, overwriteLE *string) (Scanner, string, error) {
	scanner := NewScannerWithPeek(r, ScanLinesWithEOF)
	peekOut := scanner.Peek()
	if !peekOut && scanner.Err() != nil {
		return nil, "", fmt.Errorf("unknown error while peeking first line: %w", scanner.Err())
	}

	// get raw bytes as we don't modify the slice
	firstLine := scanner.Bytes()

	// Heuristic - get line ending of file by first line, default to LF if there's no line endings in the file
	lineEnding := "\n"
	if HasLineEndingCRLF(firstLine) {
		lineEnding = "\r\n"
	}

	return newUTF8Scanner(scanner, func(line []byte) []byte {
		// overwrite line ending if requested (unless there's no line ending - e.g. last line)
		if overwriteLE != nil {
			if HasLineEndingCRLF(line) {
				return append(line[:len(line)-2], []byte(*overwriteLE)...)
			} else if HasLineEndingLF(line) {
				return append(line[:len(line)-1], []byte(*overwriteLE)...)
			}
		}

		return line
	}), lineEnding, nil
}

func HasLineEnding(line []byte) bool {
	// HasLineEndingLF is superset of HasLineEndingCRLF
	return HasLineEndingLF(line)
}

func HasLineEndingLF(line []byte) bool {
	return len(line) >= 1 && line[len(line)-1] == '\n'
}

func HasLineEndingCRLF(line []byte) bool {
	return len(line) >= 2 && line[len(line)-2] == '\r' && line[len(line)-1] == '\n'
}