diff --git a/git/api/fs.go b/git/api/fs.go new file mode 100644 index 000000000..31dc44147 --- /dev/null +++ b/git/api/fs.go @@ -0,0 +1,240 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package api + +import ( + "context" + "fmt" + "io" + "io/fs" + "time" + + "github.com/harness/gitness/errors" + "github.com/harness/gitness/git/command" + "github.com/harness/gitness/git/sha" + + "github.com/bmatcuk/doublestar/v4" +) + +// FS represents a git filesystem. +type FS struct { + ctx context.Context + rev string + dir string +} + +// NewFS creates a new git filesystem. +func NewFS(ctx context.Context, rev, dir string) *FS { + return &FS{ + ctx: ctx, + rev: rev, + dir: dir, + } +} + +func (f *FS) Open(path string) (fs.File, error) { + treeNode, err := GetTreeNode(f.ctx, f.dir, f.rev, path, true) + if err != nil { + return nil, err + } + + if treeNode.IsDir() { + return nil, errors.InvalidArgument("can't open a directory") + } + if treeNode.IsLink() { + return nil, errors.InvalidArgument("can't open a link") + } + if treeNode.IsSubmodule() { + return nil, errors.InvalidArgument("can't open a submodule") + } + + pipeRead, pipeWrite := io.Pipe() + ctx, cancelFn := context.WithCancel(f.ctx) + go func() { + var err error + + defer func() { + // If running of the command below fails, make the pipe reader also fail with the same error. + _ = pipeWrite.CloseWithError(err) + }() + + cmd := command.New("cat-file", command.WithFlag("-p"), command.WithArg(treeNode.SHA.String())) + err = cmd.Run(f.ctx, command.WithDir(f.dir), command.WithStdout(pipeWrite)) + }() + + pathFile := &file{ + ctx: ctx, + cancelFn: cancelFn, + path: path, + blobSHA: treeNode.SHA, + mode: treeNode.Mode, + size: treeNode.Size, + reader: pipeRead, + } + + return pathFile, nil +} + +// ReadDir returns all entries for a directory. +// It is part of the fs.ReadDirFS interface. +func (f *FS) ReadDir(name string) ([]fs.DirEntry, error) { + treeNodes, err := ListTreeNodes(f.ctx, f.dir, f.rev, name, true) + if err != nil { + return nil, fmt.Errorf("failed to read git directory: %w", err) + } + + result := make([]fs.DirEntry, len(treeNodes)) + for i, treeNode := range treeNodes { + result[i] = entry{treeNode} + } + + return result, nil +} + +// Glob returns all file names that match the pattern. +// It is part of the fs.GlobFS interface. +func (f *FS) Glob(pattern string) ([]string, error) { + return doublestar.Glob(f, pattern) +} + +// Stat returns entry file info for a file path. +// It is part of the fs.StatFS interface. +func (f *FS) Stat(name string) (fs.FileInfo, error) { + treeInfo, err := GetTreeNode(f.ctx, f.dir, f.rev, name, true) + if err != nil { + return nil, fmt.Errorf("failed to read git directory: %w", err) + } + + return entry{*treeInfo}, nil +} + +type file struct { + ctx context.Context + cancelFn context.CancelFunc + + path string + blobSHA sha.SHA + mode TreeNodeMode + size int64 + + reader io.Reader +} + +func (f *file) Stat() (fs.FileInfo, error) { + return f, nil +} + +// Read bytes from the file. +func (f *file) Read(bytes []byte) (int, error) { + return f.reader.Read(bytes) +} + +// Close closes the file. +func (f *file) Close() error { + f.cancelFn() + return nil +} + +// Name returns the name of the file. +// It is part of the fs.FileInfo interface. +func (f *file) Name() string { + return f.path +} + +// Size returns file size - the size of the git blob object. +// It is part of the fs.FileInfo interface. +func (f *file) Size() int64 { + return f.size +} + +// Mode always returns 0 because a git blob is an ordinary file. +// It is part of the fs.FileInfo interface. +func (f *file) Mode() fs.FileMode { + return 0 +} + +// ModTime is unimplemented. +// It is part of the fs.FileInfo interface. +func (f *file) ModTime() time.Time { + // Git doesn't store file modification time directly. + // It's possible to find the last commit (and thus the commit time) + // that modified touched the file, but this is out of scope for this implementation. + return time.Time{} +} + +// IsDir implementation for the file struct always returns false. +// It is part of the fs.FileInfo interface. +func (f *file) IsDir() bool { + return false +} + +// Sys is unimplemented. +// It is part of the fs.FileInfo interface. +func (f *file) Sys() any { + return nil +} + +type entry struct { + TreeNode +} + +// Name returns name of a git tree entry. +// It is part of the fs.DirEntry interface. +func (e entry) Name() string { + return e.TreeNode.Name +} + +// IsDir returns if a git tree entry is a directory. +// It is part of the fs.FileInfo and fs.DirEntry interfaces. +func (e entry) IsDir() bool { + return e.TreeNode.IsDir() +} + +// Type returns the type of git tree entry. +// It is part of the fs.DirEntry interface. +func (e entry) Type() fs.FileMode { + if e.TreeNode.IsDir() { + return fs.ModeDir + } + return 0 +} + +// Info returns FileInfo for a git tree entry. +// It is part of the fs.DirEntry interface. +func (e entry) Info() (fs.FileInfo, error) { + return e, nil +} + +// Size returns file size - the size of the git blob object. +// It is part of the fs.FileInfo interface. +func (e entry) Size() int64 { + return e.TreeNode.Size +} + +// Mode always returns 0 because a git blob is an ordinary file. +// It is part of the fs.FileInfo interface. +func (e entry) Mode() fs.FileMode { + return 0 +} + +// ModTime is unimplemented. Git doesn't store file modification time directly. +// It's possible to find the last commit (and thus the commit time) +// that modified touched the file, but this is out of scope for this implementation. +// It is part of the fs.FileInfo interface. +func (e entry) ModTime() time.Time { return time.Time{} } + +// Sys is unimplemented. +// It is part of the fs.FileInfo interface. +func (e entry) Sys() any { return nil } diff --git a/git/api/match_files.go b/git/api/match_files.go index f1e2c3bf7..dfef27b9b 100644 --- a/git/api/match_files.go +++ b/git/api/match_files.go @@ -35,7 +35,7 @@ func (g *Git) MatchFiles( pattern string, maxSize int, ) ([]FileContent, error) { - nodes, err := lsDirectory(ctx, repoPath, rev, treePath) + nodes, err := lsDirectory(ctx, repoPath, rev, treePath, false) if err != nil { return nil, fmt.Errorf("failed to list files in match files: %w", err) } diff --git a/git/api/tree.go b/git/api/tree.go index 8ca5adfbc..c21a51178 100644 --- a/git/api/tree.go +++ b/git/api/tree.go @@ -44,6 +44,7 @@ type TreeNode struct { SHA sha.SHA Name string Path string + Size int64 } func (n *TreeNode) IsExecutable() bool { @@ -58,6 +59,10 @@ func (n *TreeNode) IsLink() bool { return n.Mode == TreeNodeModeSymlink } +func (n *TreeNode) IsSubmodule() bool { + return n.Mode == TreeNodeModeCommit +} + // TreeNodeType specifies the different types of nodes in a git tree. // IMPORTANT: has to be consistent with rpc.TreeNodeType (proto). type TreeNodeType int @@ -122,13 +127,14 @@ func parseTreeNodeMode(s string) (TreeNodeType, TreeNodeMode, error) { // regexpLsTreeColumns is a regular expression that is used to parse a single line // of a "git ls-tree" output (which uses the NULL character as the line break). // The single line mode must be used because output might contain the EOL and other control characters. -var regexpLsTreeColumns = regexp.MustCompile(`(?s)^(\d{6})\s+(\w+)\s+(\w+)\t(.+)`) +var regexpLsTreeColumns = regexp.MustCompile(`(?s)^(\d{6})\s+(\w+)\s+(\w+)(?:\s+(\d+|-))?\t(.+)`) func lsTree( ctx context.Context, repoPath string, rev string, treePath string, + fetchSizes bool, ) ([]TreeNode, error) { if repoPath == "" { return nil, ErrRepositoryPathEmpty @@ -138,6 +144,10 @@ func lsTree( command.WithArg(rev), command.WithArg(treePath), ) + if fetchSizes { + cmd.Add(command.WithFlag("-l")) + } + output := &bytes.Buffer{} err := cmd.Run(ctx, command.WithDir(repoPath), @@ -183,7 +193,19 @@ func lsTree( } nodeSha := sha.Must(columns[3]) - nodePath := columns[4] + + var size int64 + if columns[4] != "" && columns[4] != "-" { + size, err = strconv.ParseInt(columns[4], 10, 64) + if err != nil { + log.Ctx(ctx).Error(). + Str("line", line). + Msg("failed to parse file size") + return nil, fmt.Errorf("failed to parse file size in the git directory listing: %q", line) + } + } + + nodePath := columns[5] nodeName := path.Base(nodePath) list = append(list, TreeNode{ @@ -192,6 +214,7 @@ func lsTree( SHA: nodeSha, Name: nodeName, Path: nodePath, + Size: size, }) } @@ -204,6 +227,7 @@ func lsDirectory( repoPath string, rev string, treePath string, + fetchSizes bool, ) ([]TreeNode, error) { treePath = path.Clean(treePath) if treePath == "" { @@ -212,7 +236,7 @@ func lsDirectory( treePath += "/" } - return lsTree(ctx, repoPath, rev, treePath) + return lsTree(ctx, repoPath, rev, treePath, fetchSizes) } // lsFile returns one tree node entry. @@ -221,10 +245,11 @@ func lsFile( repoPath string, rev string, treePath string, + fetchSize bool, ) (TreeNode, error) { treePath = cleanTreePath(treePath) - list, err := lsTree(ctx, repoPath, rev, treePath) + list, err := lsTree(ctx, repoPath, rev, treePath, fetchSize) if err != nil { return TreeNode{}, fmt.Errorf("failed to ls file: %w", err) } @@ -237,18 +262,18 @@ func lsFile( // GetTreeNode returns the tree node at the given path as found for the provided reference. func (g *Git) GetTreeNode(ctx context.Context, repoPath, rev, treePath string) (*TreeNode, error) { - return GetTreeNode(ctx, repoPath, rev, treePath) + return GetTreeNode(ctx, repoPath, rev, treePath, false) } // GetTreeNode returns the tree node at the given path as found for the provided reference. -func GetTreeNode(ctx context.Context, repoPath, rev, treePath string) (*TreeNode, error) { +func GetTreeNode(ctx context.Context, repoPath, rev, treePath string, fetchSize bool) (*TreeNode, error) { if repoPath == "" { return nil, ErrRepositoryPathEmpty } // anything that's not the root path is a simple call if treePath != "" { - treeNode, err := lsFile(ctx, repoPath, rev, treePath) + treeNode, err := lsFile(ctx, repoPath, rev, treePath, fetchSize) if err != nil { return nil, fmt.Errorf("failed to get tree node: %w", err) } @@ -285,7 +310,12 @@ func GetTreeNode(ctx context.Context, repoPath, rev, treePath string) (*TreeNode // ListTreeNodes lists the child nodes of a tree reachable from ref via the specified path. func (g *Git) ListTreeNodes(ctx context.Context, repoPath, rev, treePath string) ([]TreeNode, error) { - list, err := lsDirectory(ctx, repoPath, rev, treePath) + return ListTreeNodes(ctx, repoPath, rev, treePath, false) +} + +// ListTreeNodes lists the child nodes of a tree reachable from ref via the specified path. +func ListTreeNodes(ctx context.Context, repoPath, rev, treePath string, fetchSizes bool) ([]TreeNode, error) { + list, err := lsDirectory(ctx, repoPath, rev, treePath, fetchSizes) if err != nil { return nil, fmt.Errorf("failed to list tree nodes: %w", err) } diff --git a/git/pipeline.go b/git/pipeline.go index 6e9aa457f..df34289ac 100644 --- a/git/pipeline.go +++ b/git/pipeline.go @@ -17,13 +17,10 @@ package git import ( "context" "fmt" - "os" "github.com/harness/gitness/git/api" "github.com/drone/go-generate/builder" - "github.com/drone/go-generate/chroot" - "github.com/rs/zerolog/log" ) type GeneratePipelineParams struct { @@ -42,34 +39,20 @@ func (s *Service) GeneratePipeline(ctx context.Context, } repoPath := getFullPathForRepo(s.reposRoot, params.RepoUID) - tempDir, err := os.MkdirTemp(s.tmpDir, "*-"+params.RepoUID) + sha, err := s.git.ResolveRev(ctx, repoPath, "HEAD") if err != nil { - return GeneratePipelinesOutput{}, fmt.Errorf("error creating temp dir for repo %s: %w", params.RepoUID, err) - } - defer func(path string) { - // when repo is successfully created remove temp dir - errRm := os.RemoveAll(path) - if errRm != nil { - log.Err(errRm).Msg("failed to cleanup temporary dir.") - } - }(tempDir) - - // Clone repository to temp dir - if err = s.git.Clone(ctx, repoPath, tempDir, api.CloneRepoOptions{Depth: 1}); err != nil { - return GeneratePipelinesOutput{}, fmt.Errorf("failed to clone repo: %w", err) + return GeneratePipelinesOutput{}, fmt.Errorf("failed to resolve HEAD revision: %w", err) } - // create a chroot virtual filesystem that we - // pass to the builder for isolation purposes. - chroot, err := chroot.New(tempDir) - if err != nil { - return GeneratePipelinesOutput{}, fmt.Errorf("failed to set the temp directory as active directory: %w", err) - } + ctxFS, cancelFn := context.WithCancel(ctx) + defer cancelFn() + + gitFS := api.NewFS(ctxFS, sha.String(), repoPath) // builds the pipeline configuration based on // the contents of the virtual filesystem. builder := builder.New() - out, err := builder.Build(chroot) + out, err := builder.Build(gitFS) if err != nil { return GeneratePipelinesOutput{}, fmt.Errorf("failed to build pipeline: %w", err) } diff --git a/git/sha/sha.go b/git/sha/sha.go index dc0c2f1db..746a4e989 100644 --- a/git/sha/sha.go +++ b/git/sha/sha.go @@ -91,7 +91,8 @@ func (s *SHA) UnmarshalJSON(content []byte) error { if err != nil { return err } - sha, err := New(str) + + sha, err := NewOrEmpty(str) if err != nil { return err } diff --git a/git/sha/sha_test.go b/git/sha/sha_test.go index c90f5ff33..23564f8df 100644 --- a/git/sha/sha_test.go +++ b/git/sha/sha_test.go @@ -76,10 +76,10 @@ func TestSHA_UnmarshalJSON(t *testing.T) { wantErr: false, }, { - name: "empty content return error", + name: "empty content returns empty", input: []byte("\"\""), expected: SHA{}, - wantErr: true, + wantErr: false, }, } for _, tt := range tests { diff --git a/git/sharedrepo/sharedrepo.go b/git/sharedrepo/sharedrepo.go index a13d4a58d..e634ca294 100644 --- a/git/sharedrepo/sharedrepo.go +++ b/git/sharedrepo/sharedrepo.go @@ -581,7 +581,7 @@ func (r *SharedRepo) getFileEntry( objectSHA sha.SHA, path string, ) (*api.TreeNode, error) { - entry, err := api.GetTreeNode(ctx, r.repoPath, treeishSHA.String(), path) + entry, err := api.GetTreeNode(ctx, r.repoPath, treeishSHA.String(), path, false) if errors.IsNotFound(err) { return nil, errors.NotFound("path %s not found", path) } @@ -615,7 +615,7 @@ func (r *SharedRepo) checkPathAvailability( for index, part := range parts { subTreePath = path.Join(subTreePath, part) - entry, err := api.GetTreeNode(ctx, r.repoPath, treeishSHA.String(), subTreePath) + entry, err := api.GetTreeNode(ctx, r.repoPath, treeishSHA.String(), subTreePath, false) if err != nil { if errors.IsNotFound(err) { // Means there is no item with that name, so we're good