added git FS; removed clone from generate pipeline (#1219)

ritik/code-1773
Marko Gacesa 2024-04-16 08:02:21 +00:00 committed by Harness
parent 671413ae64
commit c28a618dd4
7 changed files with 292 additions and 38 deletions

240
git/api/fs.go Normal file
View File

@ -0,0 +1,240 @@
// Copyright 2023 Harness, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package api
import (
"context"
"fmt"
"io"
"io/fs"
"time"
"github.com/harness/gitness/errors"
"github.com/harness/gitness/git/command"
"github.com/harness/gitness/git/sha"
"github.com/bmatcuk/doublestar/v4"
)
// FS represents a git filesystem.
type FS struct {
ctx context.Context
rev string
dir string
}
// NewFS creates a new git filesystem.
func NewFS(ctx context.Context, rev, dir string) *FS {
return &FS{
ctx: ctx,
rev: rev,
dir: dir,
}
}
func (f *FS) Open(path string) (fs.File, error) {
treeNode, err := GetTreeNode(f.ctx, f.dir, f.rev, path, true)
if err != nil {
return nil, err
}
if treeNode.IsDir() {
return nil, errors.InvalidArgument("can't open a directory")
}
if treeNode.IsLink() {
return nil, errors.InvalidArgument("can't open a link")
}
if treeNode.IsSubmodule() {
return nil, errors.InvalidArgument("can't open a submodule")
}
pipeRead, pipeWrite := io.Pipe()
ctx, cancelFn := context.WithCancel(f.ctx)
go func() {
var err error
defer func() {
// If running of the command below fails, make the pipe reader also fail with the same error.
_ = pipeWrite.CloseWithError(err)
}()
cmd := command.New("cat-file", command.WithFlag("-p"), command.WithArg(treeNode.SHA.String()))
err = cmd.Run(f.ctx, command.WithDir(f.dir), command.WithStdout(pipeWrite))
}()
pathFile := &file{
ctx: ctx,
cancelFn: cancelFn,
path: path,
blobSHA: treeNode.SHA,
mode: treeNode.Mode,
size: treeNode.Size,
reader: pipeRead,
}
return pathFile, nil
}
// ReadDir returns all entries for a directory.
// It is part of the fs.ReadDirFS interface.
func (f *FS) ReadDir(name string) ([]fs.DirEntry, error) {
treeNodes, err := ListTreeNodes(f.ctx, f.dir, f.rev, name, true)
if err != nil {
return nil, fmt.Errorf("failed to read git directory: %w", err)
}
result := make([]fs.DirEntry, len(treeNodes))
for i, treeNode := range treeNodes {
result[i] = entry{treeNode}
}
return result, nil
}
// Glob returns all file names that match the pattern.
// It is part of the fs.GlobFS interface.
func (f *FS) Glob(pattern string) ([]string, error) {
return doublestar.Glob(f, pattern)
}
// Stat returns entry file info for a file path.
// It is part of the fs.StatFS interface.
func (f *FS) Stat(name string) (fs.FileInfo, error) {
treeInfo, err := GetTreeNode(f.ctx, f.dir, f.rev, name, true)
if err != nil {
return nil, fmt.Errorf("failed to read git directory: %w", err)
}
return entry{*treeInfo}, nil
}
type file struct {
ctx context.Context
cancelFn context.CancelFunc
path string
blobSHA sha.SHA
mode TreeNodeMode
size int64
reader io.Reader
}
func (f *file) Stat() (fs.FileInfo, error) {
return f, nil
}
// Read bytes from the file.
func (f *file) Read(bytes []byte) (int, error) {
return f.reader.Read(bytes)
}
// Close closes the file.
func (f *file) Close() error {
f.cancelFn()
return nil
}
// Name returns the name of the file.
// It is part of the fs.FileInfo interface.
func (f *file) Name() string {
return f.path
}
// Size returns file size - the size of the git blob object.
// It is part of the fs.FileInfo interface.
func (f *file) Size() int64 {
return f.size
}
// Mode always returns 0 because a git blob is an ordinary file.
// It is part of the fs.FileInfo interface.
func (f *file) Mode() fs.FileMode {
return 0
}
// ModTime is unimplemented.
// It is part of the fs.FileInfo interface.
func (f *file) ModTime() time.Time {
// Git doesn't store file modification time directly.
// It's possible to find the last commit (and thus the commit time)
// that modified touched the file, but this is out of scope for this implementation.
return time.Time{}
}
// IsDir implementation for the file struct always returns false.
// It is part of the fs.FileInfo interface.
func (f *file) IsDir() bool {
return false
}
// Sys is unimplemented.
// It is part of the fs.FileInfo interface.
func (f *file) Sys() any {
return nil
}
type entry struct {
TreeNode
}
// Name returns name of a git tree entry.
// It is part of the fs.DirEntry interface.
func (e entry) Name() string {
return e.TreeNode.Name
}
// IsDir returns if a git tree entry is a directory.
// It is part of the fs.FileInfo and fs.DirEntry interfaces.
func (e entry) IsDir() bool {
return e.TreeNode.IsDir()
}
// Type returns the type of git tree entry.
// It is part of the fs.DirEntry interface.
func (e entry) Type() fs.FileMode {
if e.TreeNode.IsDir() {
return fs.ModeDir
}
return 0
}
// Info returns FileInfo for a git tree entry.
// It is part of the fs.DirEntry interface.
func (e entry) Info() (fs.FileInfo, error) {
return e, nil
}
// Size returns file size - the size of the git blob object.
// It is part of the fs.FileInfo interface.
func (e entry) Size() int64 {
return e.TreeNode.Size
}
// Mode always returns 0 because a git blob is an ordinary file.
// It is part of the fs.FileInfo interface.
func (e entry) Mode() fs.FileMode {
return 0
}
// ModTime is unimplemented. Git doesn't store file modification time directly.
// It's possible to find the last commit (and thus the commit time)
// that modified touched the file, but this is out of scope for this implementation.
// It is part of the fs.FileInfo interface.
func (e entry) ModTime() time.Time { return time.Time{} }
// Sys is unimplemented.
// It is part of the fs.FileInfo interface.
func (e entry) Sys() any { return nil }

View File

@ -35,7 +35,7 @@ func (g *Git) MatchFiles(
pattern string,
maxSize int,
) ([]FileContent, error) {
nodes, err := lsDirectory(ctx, repoPath, rev, treePath)
nodes, err := lsDirectory(ctx, repoPath, rev, treePath, false)
if err != nil {
return nil, fmt.Errorf("failed to list files in match files: %w", err)
}

View File

@ -44,6 +44,7 @@ type TreeNode struct {
SHA sha.SHA
Name string
Path string
Size int64
}
func (n *TreeNode) IsExecutable() bool {
@ -58,6 +59,10 @@ func (n *TreeNode) IsLink() bool {
return n.Mode == TreeNodeModeSymlink
}
func (n *TreeNode) IsSubmodule() bool {
return n.Mode == TreeNodeModeCommit
}
// TreeNodeType specifies the different types of nodes in a git tree.
// IMPORTANT: has to be consistent with rpc.TreeNodeType (proto).
type TreeNodeType int
@ -122,13 +127,14 @@ func parseTreeNodeMode(s string) (TreeNodeType, TreeNodeMode, error) {
// regexpLsTreeColumns is a regular expression that is used to parse a single line
// of a "git ls-tree" output (which uses the NULL character as the line break).
// The single line mode must be used because output might contain the EOL and other control characters.
var regexpLsTreeColumns = regexp.MustCompile(`(?s)^(\d{6})\s+(\w+)\s+(\w+)\t(.+)`)
var regexpLsTreeColumns = regexp.MustCompile(`(?s)^(\d{6})\s+(\w+)\s+(\w+)(?:\s+(\d+|-))?\t(.+)`)
func lsTree(
ctx context.Context,
repoPath string,
rev string,
treePath string,
fetchSizes bool,
) ([]TreeNode, error) {
if repoPath == "" {
return nil, ErrRepositoryPathEmpty
@ -138,6 +144,10 @@ func lsTree(
command.WithArg(rev),
command.WithArg(treePath),
)
if fetchSizes {
cmd.Add(command.WithFlag("-l"))
}
output := &bytes.Buffer{}
err := cmd.Run(ctx,
command.WithDir(repoPath),
@ -183,7 +193,19 @@ func lsTree(
}
nodeSha := sha.Must(columns[3])
nodePath := columns[4]
var size int64
if columns[4] != "" && columns[4] != "-" {
size, err = strconv.ParseInt(columns[4], 10, 64)
if err != nil {
log.Ctx(ctx).Error().
Str("line", line).
Msg("failed to parse file size")
return nil, fmt.Errorf("failed to parse file size in the git directory listing: %q", line)
}
}
nodePath := columns[5]
nodeName := path.Base(nodePath)
list = append(list, TreeNode{
@ -192,6 +214,7 @@ func lsTree(
SHA: nodeSha,
Name: nodeName,
Path: nodePath,
Size: size,
})
}
@ -204,6 +227,7 @@ func lsDirectory(
repoPath string,
rev string,
treePath string,
fetchSizes bool,
) ([]TreeNode, error) {
treePath = path.Clean(treePath)
if treePath == "" {
@ -212,7 +236,7 @@ func lsDirectory(
treePath += "/"
}
return lsTree(ctx, repoPath, rev, treePath)
return lsTree(ctx, repoPath, rev, treePath, fetchSizes)
}
// lsFile returns one tree node entry.
@ -221,10 +245,11 @@ func lsFile(
repoPath string,
rev string,
treePath string,
fetchSize bool,
) (TreeNode, error) {
treePath = cleanTreePath(treePath)
list, err := lsTree(ctx, repoPath, rev, treePath)
list, err := lsTree(ctx, repoPath, rev, treePath, fetchSize)
if err != nil {
return TreeNode{}, fmt.Errorf("failed to ls file: %w", err)
}
@ -237,18 +262,18 @@ func lsFile(
// GetTreeNode returns the tree node at the given path as found for the provided reference.
func (g *Git) GetTreeNode(ctx context.Context, repoPath, rev, treePath string) (*TreeNode, error) {
return GetTreeNode(ctx, repoPath, rev, treePath)
return GetTreeNode(ctx, repoPath, rev, treePath, false)
}
// GetTreeNode returns the tree node at the given path as found for the provided reference.
func GetTreeNode(ctx context.Context, repoPath, rev, treePath string) (*TreeNode, error) {
func GetTreeNode(ctx context.Context, repoPath, rev, treePath string, fetchSize bool) (*TreeNode, error) {
if repoPath == "" {
return nil, ErrRepositoryPathEmpty
}
// anything that's not the root path is a simple call
if treePath != "" {
treeNode, err := lsFile(ctx, repoPath, rev, treePath)
treeNode, err := lsFile(ctx, repoPath, rev, treePath, fetchSize)
if err != nil {
return nil, fmt.Errorf("failed to get tree node: %w", err)
}
@ -285,7 +310,12 @@ func GetTreeNode(ctx context.Context, repoPath, rev, treePath string) (*TreeNode
// ListTreeNodes lists the child nodes of a tree reachable from ref via the specified path.
func (g *Git) ListTreeNodes(ctx context.Context, repoPath, rev, treePath string) ([]TreeNode, error) {
list, err := lsDirectory(ctx, repoPath, rev, treePath)
return ListTreeNodes(ctx, repoPath, rev, treePath, false)
}
// ListTreeNodes lists the child nodes of a tree reachable from ref via the specified path.
func ListTreeNodes(ctx context.Context, repoPath, rev, treePath string, fetchSizes bool) ([]TreeNode, error) {
list, err := lsDirectory(ctx, repoPath, rev, treePath, fetchSizes)
if err != nil {
return nil, fmt.Errorf("failed to list tree nodes: %w", err)
}

View File

@ -17,13 +17,10 @@ package git
import (
"context"
"fmt"
"os"
"github.com/harness/gitness/git/api"
"github.com/drone/go-generate/builder"
"github.com/drone/go-generate/chroot"
"github.com/rs/zerolog/log"
)
type GeneratePipelineParams struct {
@ -42,34 +39,20 @@ func (s *Service) GeneratePipeline(ctx context.Context,
}
repoPath := getFullPathForRepo(s.reposRoot, params.RepoUID)
tempDir, err := os.MkdirTemp(s.tmpDir, "*-"+params.RepoUID)
sha, err := s.git.ResolveRev(ctx, repoPath, "HEAD")
if err != nil {
return GeneratePipelinesOutput{}, fmt.Errorf("error creating temp dir for repo %s: %w", params.RepoUID, err)
}
defer func(path string) {
// when repo is successfully created remove temp dir
errRm := os.RemoveAll(path)
if errRm != nil {
log.Err(errRm).Msg("failed to cleanup temporary dir.")
}
}(tempDir)
// Clone repository to temp dir
if err = s.git.Clone(ctx, repoPath, tempDir, api.CloneRepoOptions{Depth: 1}); err != nil {
return GeneratePipelinesOutput{}, fmt.Errorf("failed to clone repo: %w", err)
return GeneratePipelinesOutput{}, fmt.Errorf("failed to resolve HEAD revision: %w", err)
}
// create a chroot virtual filesystem that we
// pass to the builder for isolation purposes.
chroot, err := chroot.New(tempDir)
if err != nil {
return GeneratePipelinesOutput{}, fmt.Errorf("failed to set the temp directory as active directory: %w", err)
}
ctxFS, cancelFn := context.WithCancel(ctx)
defer cancelFn()
gitFS := api.NewFS(ctxFS, sha.String(), repoPath)
// builds the pipeline configuration based on
// the contents of the virtual filesystem.
builder := builder.New()
out, err := builder.Build(chroot)
out, err := builder.Build(gitFS)
if err != nil {
return GeneratePipelinesOutput{}, fmt.Errorf("failed to build pipeline: %w", err)
}

View File

@ -91,7 +91,8 @@ func (s *SHA) UnmarshalJSON(content []byte) error {
if err != nil {
return err
}
sha, err := New(str)
sha, err := NewOrEmpty(str)
if err != nil {
return err
}

View File

@ -76,10 +76,10 @@ func TestSHA_UnmarshalJSON(t *testing.T) {
wantErr: false,
},
{
name: "empty content return error",
name: "empty content returns empty",
input: []byte("\"\""),
expected: SHA{},
wantErr: true,
wantErr: false,
},
}
for _, tt := range tests {

View File

@ -581,7 +581,7 @@ func (r *SharedRepo) getFileEntry(
objectSHA sha.SHA,
path string,
) (*api.TreeNode, error) {
entry, err := api.GetTreeNode(ctx, r.repoPath, treeishSHA.String(), path)
entry, err := api.GetTreeNode(ctx, r.repoPath, treeishSHA.String(), path, false)
if errors.IsNotFound(err) {
return nil, errors.NotFound("path %s not found", path)
}
@ -615,7 +615,7 @@ func (r *SharedRepo) checkPathAvailability(
for index, part := range parts {
subTreePath = path.Join(subTreePath, part)
entry, err := api.GetTreeNode(ctx, r.repoPath, treeishSHA.String(), subTreePath)
entry, err := api.GetTreeNode(ctx, r.repoPath, treeishSHA.String(), subTreePath, false)
if err != nil {
if errors.IsNotFound(err) {
// Means there is no item with that name, so we're good