diff --git a/app/api/controller/githook/pre_receive_scan_secrets.go b/app/api/controller/githook/pre_receive_scan_secrets.go index 18fcdd1b3..74b3fe137 100644 --- a/app/api/controller/githook/pre_receive_scan_secrets.go +++ b/app/api/controller/githook/pre_receive_scan_secrets.go @@ -21,7 +21,6 @@ import ( "github.com/harness/gitness/app/services/settings" "github.com/harness/gitness/git" - "github.com/harness/gitness/git/api" "github.com/harness/gitness/git/hook" "github.com/harness/gitness/logging" "github.com/harness/gitness/types" @@ -31,7 +30,7 @@ import ( ) type secretFinding struct { - api.Finding + git.ScanSecretsFinding Ref string } @@ -136,8 +135,9 @@ func scanSecretsInternal(ctx context.Context, RepoUID: repo.GitUID, AlternateObjectDirs: in.Environment.AlternateObjectDirs, }, - BaseRev: baseRev, - Rev: rev, + BaseRev: baseRev, + Rev: rev, + GitleaksIgnorePath: git.DefaultGitleaksIgnorePath, }) if err != nil { return nil, fmt.Errorf("failed to detect secret leaks: %w", err) @@ -152,8 +152,8 @@ func scanSecretsInternal(ctx context.Context, for _, finding := range scanSecretsOut.Findings { findings = append(findings, secretFinding{ - Finding: finding, - Ref: refUpdate.Ref, + ScanSecretsFinding: finding, + Ref: refUpdate.Ref, }) } } diff --git a/app/api/controller/githook/print.go b/app/api/controller/githook/print.go index 5de720872..af6689fc4 100644 --- a/app/api/controller/githook/print.go +++ b/app/api/controller/githook/print.go @@ -70,9 +70,10 @@ func printScanSecretsFindings( output.Messages = append( output.Messages, fmt.Sprintf(" %s", headerTxt), - fmt.Sprintf(" Secret: %s", finding.Secret), - fmt.Sprintf(" Commit: %s", finding.Commit), - fmt.Sprintf(" Details: %s", finding.Description), + fmt.Sprintf(" Secret: %s", finding.Secret), + fmt.Sprintf(" Commit: %s", finding.Commit), + fmt.Sprintf(" Details: %s", finding.Description), + fmt.Sprintf(" Fingerprint: %s", finding.Fingerprint), "", // add empty line for making it visually more consumable ) } diff --git a/git/api/scan_secrets.go b/git/api/scan_secrets.go deleted file mode 100644 index 6106b7462..000000000 --- a/git/api/scan_secrets.go +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2023 Harness, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package api - -import ( - "fmt" - - "github.com/zricethezav/gitleaks/v8/detect" - "github.com/zricethezav/gitleaks/v8/report" - "github.com/zricethezav/gitleaks/v8/sources" -) - -// Finding contains gitleaks report.finding information. -type Finding struct { - Description string `json:"description"` - StartLine int64 `json:"start_line"` - EndLine int64 `json:"end_line"` - StartColumn int64 `json:"start_column"` - EndColumn int64 `json:"end_column"` - Match string `json:"match"` - Secret string `json:"secret"` - File string `json:"file"` - SymlinkFile string `json:"symlink_file"` - Commit string `json:"commit"` - Entropy float64 `json:"entropy"` - Author string `json:"author"` - Email string `json:"email"` - Date string `json:"date"` - Message string `json:"message"` - Tags []string `json:"tags"` - RuleID string `json:"rule_id"` - // unique identifier - Fingerprint string `json:"fingerprint"` -} - -// ScanSecrets detects secret leakage using gitleaks on the provided revision, -// or on the diff with baseRev if provided (baseRev is optional). -func (g *Git) ScanSecrets( - repoPath string, - baseRev string, - rev string, -) ([]Finding, error) { - if repoPath == "" { - return nil, ErrRepositoryPathEmpty - } - - detector, err := detect.NewDetectorDefaultConfig() - if err != nil { - return nil, fmt.Errorf("failed to create a new gitleaks detector with default config: %w", err) - } - - var logOpts string - logOpts = fmt.Sprintf("--no-merges --first-parent %s", rev) - - if baseRev != "" { - logOpts = fmt.Sprintf("--no-merges --first-parent %s..%s", baseRev, rev) - } - - gitCmd, err := sources.NewGitLogCmd(repoPath, logOpts) - if err != nil { - return nil, fmt.Errorf("failed to create a new git log cmd with diff: %w", err) - } - - res, err := detector.DetectGit(gitCmd) - if err != nil { - return nil, fmt.Errorf("failed to detect git leaks on diff: %w", err) - } - - return mapFindings(res), nil -} - -func mapFindings(reports []report.Finding) []Finding { - var findings []Finding - for _, f := range reports { - findings = append(findings, Finding{ - Description: f.Description, - StartLine: int64(f.StartLine), - EndLine: int64(f.EndLine), - StartColumn: int64(f.StartColumn), - EndColumn: int64(f.EndColumn), - Match: f.Match, - Secret: f.Secret, - File: f.File, - SymlinkFile: f.SymlinkFile, - Commit: f.Commit, - Entropy: float64(f.Entropy), - Author: f.Author, - Email: f.Email, - Date: f.Date, - Message: f.Message, - Tags: f.Tags, - RuleID: f.RuleID, - Fingerprint: f.Fingerprint, - }) - } - return findings -} diff --git a/git/scan_secrets.go b/git/scan_secrets.go index 4ba4e2a62..9aa55040d 100644 --- a/git/scan_secrets.go +++ b/git/scan_secrets.go @@ -17,9 +17,22 @@ package git import ( "context" "fmt" + "io" + "os" + "path" + "github.com/harness/gitness/errors" "github.com/harness/gitness/git/api" "github.com/harness/gitness/git/sharedrepo" + + "github.com/rs/zerolog/log" + "github.com/zricethezav/gitleaks/v8/detect" + "github.com/zricethezav/gitleaks/v8/report" + "github.com/zricethezav/gitleaks/v8/sources" +) + +const ( + DefaultGitleaksIgnorePath = ".gitleaksignore" ) type ScanSecretsParams struct { @@ -27,10 +40,34 @@ type ScanSecretsParams struct { BaseRev string // optional to scan for secrets on diff between baseRev and Rev Rev string + + GitleaksIgnorePath string // optional, keep empty to skip using .gitleaksignore file. } type ScanSecretsOutput struct { - Findings []api.Finding + Findings []ScanSecretsFinding +} + +type ScanSecretsFinding struct { + Description string `json:"description"` + StartLine int64 `json:"start_line"` + EndLine int64 `json:"end_line"` + StartColumn int64 `json:"start_column"` + EndColumn int64 `json:"end_column"` + Match string `json:"match"` + Secret string `json:"secret"` + File string `json:"file"` + SymlinkFile string `json:"symlink_file"` + Commit string `json:"commit"` + Entropy float64 `json:"entropy"` + Author string `json:"author"` + Email string `json:"email"` + Date string `json:"date"` + Message string `json:"message"` + Tags []string `json:"tags"` + RuleID string `json:"rule_id"` + // Fingerprint is the unique identifier of the secret that can be used in .gitleaksignore files. + Fingerprint string `json:"fingerprint"` } func (s *Service) ScanSecrets(ctx context.Context, params *ScanSecretsParams) (*ScanSecretsOutput, error) { @@ -40,12 +77,47 @@ func (s *Service) ScanSecrets(ctx context.Context, params *ScanSecretsParams) (* repoPath := getFullPathForRepo(s.reposRoot, params.RepoUID) - var findings []api.Finding - + var findings []ScanSecretsFinding err := sharedrepo.Run(ctx, nil, s.tmpDir, repoPath, func(sharedRepo *sharedrepo.SharedRepo) error { - var err error - findings, err = s.git.ScanSecrets(sharedRepo.Directory(), params.BaseRev, params.Rev) - return err + fsGitleaksIgnorePath, err := s.setupGitleaksIgnoreInSharedRepo( + ctx, + sharedRepo, + params.Rev, + params.GitleaksIgnorePath, + ) + if err != nil { + return fmt.Errorf("failed to setup .gitleaksignore file in share repo: %w", err) + } + + detector, err := detect.NewDetectorDefaultConfig() + if err != nil { + return fmt.Errorf("failed to create a new gitleaks detector with default config: %w", err) + } + if fsGitleaksIgnorePath != "" { + if err := detector.AddGitleaksIgnore(fsGitleaksIgnorePath); err != nil { + return fmt.Errorf("failed to load .gitleaksignore file from path %q: %w", fsGitleaksIgnorePath, err) + } + } + + // TODO: fix issue where secrets in second-parent commits are not detected + logOpts := fmt.Sprintf("--no-merges --first-parent %s", params.Rev) + if params.BaseRev != "" { + logOpts = fmt.Sprintf("--no-merges --first-parent %s..%s", params.BaseRev, params.Rev) + } + + gitCmd, err := sources.NewGitLogCmd(sharedRepo.Directory(), logOpts) + if err != nil { + return fmt.Errorf("failed to create a new git log cmd with diff: %w", err) + } + + res, err := detector.DetectGit(gitCmd) + if err != nil { + return fmt.Errorf("failed to detect git leaks on diff: %w", err) + } + + findings = mapFindings(res) + + return nil }, params.AlternateObjectDirs...) if err != nil { return nil, fmt.Errorf("failed to get leaks on diff: %w", err) @@ -55,3 +127,103 @@ func (s *Service) ScanSecrets(ctx context.Context, params *ScanSecretsParams) (* Findings: findings, }, nil } + +func (s *Service) setupGitleaksIgnoreInSharedRepo( + ctx context.Context, + sharedRepo *sharedrepo.SharedRepo, + rev string, + treePath string, +) (string, error) { + if treePath == "" { + log.Ctx(ctx).Debug().Msgf("no path to .gitleaksignore file provided, run without") + return "", nil + } + + // ensure file exists in tree for the provided revision, otherwise ignore and continue + node, err := s.git.GetTreeNode(ctx, sharedRepo.Directory(), rev, treePath) + if errors.IsNotFound(err) { + log.Ctx(ctx).Debug().Msgf("no .gitleaksignore file found at %q, run without", treePath) + return "", nil + } + if err != nil { + return "", fmt.Errorf("failed to get tree node for .gitleaksignore file at path %q: %w", treePath, err) + } + + // if node isn't of type blob it won't contain any gitleaks related content - run without .gitleaksignore. + // NOTE: We don't do any further checks for binary files or handle symlinks. + if node.NodeType != api.TreeNodeTypeBlob { + log.Ctx(ctx).Warn().Msgf( + "tree node at path %q is of type %d instead of %d (blob), run without .gitleaksignore", + treePath, + node.NodeType, + api.TreeNodeTypeBlob, + ) + return "", nil + } + + log.Ctx(ctx).Debug().Msgf(".gitleaksignore file found in tree at %q", treePath) + + // read blob data from bare repo + blob, err := api.GetBlob( + ctx, + sharedRepo.Directory(), + nil, + node.SHA, + 0, + ) + if err != nil { + return "", fmt.Errorf( + "failed to get blob for .gitleaksignore file at path %q sha %q: %w", treePath, node.SHA, err) + } + defer func() { + if err := blob.Content.Close(); err != nil { + log.Ctx(ctx).Warn().Err(err).Msgf("failed to close blob content reader for .gitleaksignore file") + } + }() + + // write file to root of .git folder of the shared repo for easy cleanup (it's a bare repo so otherwise no impact) + filePath := path.Join(sharedRepo.Directory(), DefaultGitleaksIgnorePath) + f, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY, 0o600) + if err != nil { + return "", fmt.Errorf("failed to create tmp .gitleaksignore file: %w", err) + } + defer func() { + if err := f.Close(); err != nil { + log.Ctx(ctx).Warn().Err(err).Msgf("failed to close tmp .gitleaksignore file at %q", filePath) + } + }() + + _, err = io.Copy(f, blob.Content) + if err != nil { + return "", fmt.Errorf("failed to copy .gitleaksignore file from blob to %q: %w", filePath, err) + } + + return filePath, nil +} + +func mapFindings(reports []report.Finding) []ScanSecretsFinding { + var findings []ScanSecretsFinding + for _, f := range reports { + findings = append(findings, ScanSecretsFinding{ + Description: f.Description, + StartLine: int64(f.StartLine), + EndLine: int64(f.EndLine), + StartColumn: int64(f.StartColumn), + EndColumn: int64(f.EndColumn), + Match: f.Match, + Secret: f.Secret, + File: f.File, + SymlinkFile: f.SymlinkFile, + Commit: f.Commit, + Entropy: float64(f.Entropy), + Author: f.Author, + Email: f.Email, + Date: f.Date, + Message: f.Message, + Tags: f.Tags, + RuleID: f.RuleID, + Fingerprint: f.Fingerprint, + }) + } + return findings +}