diff --git a/app/api/controller/keywordsearch/controller.go b/app/api/controller/keywordsearch/controller.go new file mode 100644 index 000000000..4cc8dec46 --- /dev/null +++ b/app/api/controller/keywordsearch/controller.go @@ -0,0 +1,43 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keywordsearch + +import ( + "github.com/harness/gitness/app/api/controller/repo" + "github.com/harness/gitness/app/api/controller/space" + "github.com/harness/gitness/app/auth/authz" + "github.com/harness/gitness/app/services/keywordsearch" +) + +type Controller struct { + authorizer authz.Authorizer + repoCtrl *repo.Controller + searcher keywordsearch.Searcher + spaceCtrl *space.Controller +} + +func NewController( + authorizer authz.Authorizer, + searcher keywordsearch.Searcher, + repoCtrl *repo.Controller, + spaceCtrl *space.Controller, +) *Controller { + return &Controller{ + authorizer: authorizer, + searcher: searcher, + repoCtrl: repoCtrl, + spaceCtrl: spaceCtrl, + } +} diff --git a/app/api/controller/keywordsearch/search.go b/app/api/controller/keywordsearch/search.go new file mode 100644 index 000000000..64a62ce41 --- /dev/null +++ b/app/api/controller/keywordsearch/search.go @@ -0,0 +1,145 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keywordsearch + +import ( + "context" + "fmt" + "math" + + "github.com/harness/gitness/app/api/usererror" + "github.com/harness/gitness/app/auth" + "github.com/harness/gitness/types" + "github.com/harness/gitness/types/enum" +) + +func (c *Controller) Search( + ctx context.Context, + session *auth.Session, + in types.SearchInput, +) (types.SearchResult, error) { + if in.Query == "" { + return types.SearchResult{}, usererror.BadRequest("query cannot be empty.") + } + + if len(in.RepoPaths) == 0 && len(in.SpacePaths) == 0 { + return types.SearchResult{}, usererror.BadRequest( + "either repo paths or space paths need to be set.") + } + + repoIDToPathMap, err := c.getReposByPath(ctx, session, in.RepoPaths) + if err != nil { + return types.SearchResult{}, fmt.Errorf("failed to search repos by path: %w", err) + } + + if len(repoIDToPathMap) == 0 { + repoIDToPathMap, err = c.getReposBySpacePaths(ctx, session, in.SpacePaths) + if err != nil { + return types.SearchResult{}, fmt.Errorf("failed to search repos by space path: %w", err) + } + } + + if len(repoIDToPathMap) == 0 { + return types.SearchResult{}, fmt.Errorf( + "no repositories found for the given paths") + } + + repoIDs := make([]int64, 0, len(repoIDToPathMap)) + for repoID := range repoIDToPathMap { + repoIDs = append(repoIDs, repoID) + } + + result, err := c.searcher.Search(ctx, repoIDs, in.Query, in.MaxResultCount) + if err != nil { + return types.SearchResult{}, fmt.Errorf("failed to search: %w", err) + } + + for idx, fileMatch := range result.FileMatches { + result.FileMatches[idx].RepoPath = repoIDToPathMap[fileMatch.RepoID] + } + return result, nil +} + +// getReposByPath returns a list of repo IDs that the user has access to for input repo paths. +func (c *Controller) getReposByPath( + ctx context.Context, + session *auth.Session, + repoPaths []string, +) (map[int64]string, error) { + repoIDToPathMap := make(map[int64]string) + if len(repoPaths) == 0 { + return repoIDToPathMap, nil + } + + for _, repoPath := range repoPaths { + if repoPath == "" { + continue + } + + repo, err := c.repoCtrl.Find(ctx, session, repoPath) + if err != nil { + return nil, fmt.Errorf("failed to find repository: %w", err) + } + repoIDToPathMap[repo.ID] = repoPath + } + return repoIDToPathMap, nil +} + +func (c *Controller) getReposBySpacePaths( + ctx context.Context, + session *auth.Session, + spacePaths []string, +) (map[int64]string, error) { + repoIDToPathMap := make(map[int64]string) + for _, spacePath := range spacePaths { + m, err := c.getReposBySpacePath(ctx, session, spacePath) + if err != nil { + return nil, fmt.Errorf("failed to search repos by space path: %w", err) + } + + for repoID, repoPath := range m { + repoIDToPathMap[repoID] = repoPath + } + } + return repoIDToPathMap, nil +} + +func (c *Controller) getReposBySpacePath( + ctx context.Context, + session *auth.Session, + spacePath string, +) (map[int64]string, error) { + repoIDToPathMap := make(map[int64]string) + if spacePath == "" { + return repoIDToPathMap, nil + } + + filter := &types.RepoFilter{ + Page: 1, + Size: int(math.MaxInt), + Query: "", + Order: enum.OrderAsc, + Sort: enum.RepoAttrNone, + } + repos, _, err := c.spaceCtrl.ListRepositories(ctx, session, spacePath, filter) + if err != nil { + return nil, fmt.Errorf("failed to list space repositories: %w", err) + } + + for _, repo := range repos { + repoIDToPathMap[repo.ID] = repo.Path + } + return repoIDToPathMap, nil +} diff --git a/app/api/controller/keywordsearch/wire.go b/app/api/controller/keywordsearch/wire.go new file mode 100644 index 000000000..b08912672 --- /dev/null +++ b/app/api/controller/keywordsearch/wire.go @@ -0,0 +1,38 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keywordsearch + +import ( + "github.com/harness/gitness/app/api/controller/repo" + "github.com/harness/gitness/app/api/controller/space" + "github.com/harness/gitness/app/auth/authz" + "github.com/harness/gitness/app/services/keywordsearch" + + "github.com/google/wire" +) + +// WireSet provides a wire set for this package. +var WireSet = wire.NewSet( + ProvideController, +) + +func ProvideController( + authorizer authz.Authorizer, + searcher keywordsearch.Searcher, + repoCtrl *repo.Controller, + spaceCtrl *space.Controller, +) *Controller { + return NewController(authorizer, searcher, repoCtrl, spaceCtrl) +} diff --git a/app/api/controller/repo/controller.go b/app/api/controller/repo/controller.go index a7e744066..e531fdac5 100644 --- a/app/api/controller/repo/controller.go +++ b/app/api/controller/repo/controller.go @@ -27,6 +27,7 @@ import ( repoevents "github.com/harness/gitness/app/events/repo" "github.com/harness/gitness/app/services/codeowners" "github.com/harness/gitness/app/services/importer" + "github.com/harness/gitness/app/services/keywordsearch" "github.com/harness/gitness/app/services/protection" "github.com/harness/gitness/app/store" "github.com/harness/gitness/app/url" @@ -53,6 +54,7 @@ type Controller struct { importer *importer.Repository codeOwners *codeowners.Service eventReporter *repoevents.Reporter + indexer keywordsearch.Indexer } func NewController( @@ -71,6 +73,7 @@ func NewController( importer *importer.Repository, codeOwners *codeowners.Service, eventReporter *repoevents.Reporter, + indexer keywordsearch.Indexer, ) *Controller { return &Controller{ defaultBranch: defaultBranch, @@ -88,6 +91,7 @@ func NewController( importer: importer, codeOwners: codeOwners, eventReporter: eventReporter, + indexer: indexer, } } diff --git a/app/api/controller/repo/create.go b/app/api/controller/repo/create.go index 87af9d1c7..729f5cb40 100644 --- a/app/api/controller/repo/create.go +++ b/app/api/controller/repo/create.go @@ -94,6 +94,14 @@ func (c *Controller) Create(ctx context.Context, session *auth.Session, in *Crea // backfil GitURL repo.GitURL = c.urlProvider.GenerateGITCloneURL(repo.Path) + // index repository if files are created + if in.Readme || in.GitIgnore != "" || (in.License != "" && in.License != "none") { + err = c.indexer.Index(ctx, repo) + if err != nil { + log.Ctx(ctx).Warn().Err(err).Int64("repo_id", repo.ID).Msg("failed to index repo") + } + } + return repo, nil } diff --git a/app/api/controller/repo/wire.go b/app/api/controller/repo/wire.go index 5e65cae3b..b75b778d4 100644 --- a/app/api/controller/repo/wire.go +++ b/app/api/controller/repo/wire.go @@ -19,6 +19,7 @@ import ( repoevents "github.com/harness/gitness/app/events/repo" "github.com/harness/gitness/app/services/codeowners" "github.com/harness/gitness/app/services/importer" + "github.com/harness/gitness/app/services/keywordsearch" "github.com/harness/gitness/app/services/protection" "github.com/harness/gitness/app/store" "github.com/harness/gitness/app/url" @@ -51,10 +52,11 @@ func ProvideController( importer *importer.Repository, codeOwners *codeowners.Service, reporeporter *repoevents.Reporter, + indexer keywordsearch.Indexer, ) *Controller { return NewController(config.Git.DefaultBranch, tx, urlProvider, uidCheck, authorizer, repoStore, spaceStore, pipelineStore, principalStore, ruleStore, protectionManager, - rpcClient, importer, codeOwners, reporeporter) + rpcClient, importer, codeOwners, reporeporter, indexer) } diff --git a/app/api/handler/keywordsearch/search.go b/app/api/handler/keywordsearch/search.go new file mode 100644 index 000000000..b34dd7186 --- /dev/null +++ b/app/api/handler/keywordsearch/search.go @@ -0,0 +1,48 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keywordsearch + +import ( + "encoding/json" + "net/http" + + "github.com/harness/gitness/app/api/controller/keywordsearch" + "github.com/harness/gitness/app/api/render" + "github.com/harness/gitness/app/api/request" + "github.com/harness/gitness/types" +) + +// HandleSearch returns keyword search results on repositories. +func HandleSearch(ctrl *keywordsearch.Controller) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + session, _ := request.AuthSessionFrom(ctx) + + searchInput := types.SearchInput{} + err := json.NewDecoder(r.Body).Decode(&searchInput) + if err != nil { + render.BadRequestf(w, "invalid Request Body: %s.", err) + return + } + + result, err := ctrl.Search(ctx, session, searchInput) + if err != nil { + render.TranslatedUserError(w, err) + return + } + + render.JSON(w, http.StatusOK, result) + } +} diff --git a/app/router/api.go b/app/router/api.go index fb5af9c7e..e25fc81d9 100644 --- a/app/router/api.go +++ b/app/router/api.go @@ -23,6 +23,7 @@ import ( "github.com/harness/gitness/app/api/controller/connector" "github.com/harness/gitness/app/api/controller/execution" controllergithook "github.com/harness/gitness/app/api/controller/githook" + "github.com/harness/gitness/app/api/controller/keywordsearch" "github.com/harness/gitness/app/api/controller/logs" "github.com/harness/gitness/app/api/controller/pipeline" "github.com/harness/gitness/app/api/controller/plugin" @@ -43,6 +44,7 @@ import ( handlerconnector "github.com/harness/gitness/app/api/handler/connector" handlerexecution "github.com/harness/gitness/app/api/handler/execution" handlergithook "github.com/harness/gitness/app/api/handler/githook" + handlerkeywordsearch "github.com/harness/gitness/app/api/handler/keywordsearch" handlerlogs "github.com/harness/gitness/app/api/handler/logs" handlerpipeline "github.com/harness/gitness/app/api/handler/pipeline" handlerplugin "github.com/harness/gitness/app/api/handler/plugin" @@ -112,6 +114,7 @@ func NewAPIHandler( checkCtrl *check.Controller, sysCtrl *system.Controller, uploadCtrl *upload.Controller, + searchCtrl *keywordsearch.Controller, ) APIHandler { // Use go-chi router for inner routing. r := chi.NewRouter() @@ -136,7 +139,8 @@ func NewAPIHandler( r.Route("/v1", func(r chi.Router) { setupRoutesV1(r, appCtx, config, repoCtrl, executionCtrl, triggerCtrl, logCtrl, pipelineCtrl, connectorCtrl, templateCtrl, pluginCtrl, secretCtrl, spaceCtrl, pullreqCtrl, - webhookCtrl, githookCtrl, saCtrl, userCtrl, principalCtrl, checkCtrl, sysCtrl, uploadCtrl) + webhookCtrl, githookCtrl, saCtrl, userCtrl, principalCtrl, checkCtrl, sysCtrl, uploadCtrl, + searchCtrl) }) // wrap router in terminatedPath encoder. @@ -179,6 +183,7 @@ func setupRoutesV1(r chi.Router, checkCtrl *check.Controller, sysCtrl *system.Controller, uploadCtrl *upload.Controller, + searchCtrl *keywordsearch.Controller, ) { setupSpaces(r, appCtx, spaceCtrl) setupRepos(r, repoCtrl, pipelineCtrl, executionCtrl, triggerCtrl, logCtrl, pullreqCtrl, webhookCtrl, checkCtrl, @@ -195,6 +200,7 @@ func setupRoutesV1(r chi.Router, setupSystem(r, sysCtrl) setupResources(r) setupPlugins(r, pluginCtrl) + setupKeywordSearch(r, searchCtrl) } // nolint: revive // it's the app context, it shouldn't be the first argument @@ -632,6 +638,10 @@ func setupPrincipals(r chi.Router, principalCtrl principal.Controller) { }) } +func setupKeywordSearch(r chi.Router, searchCtrl *keywordsearch.Controller) { + r.Post("/search", handlerkeywordsearch.HandleSearch(searchCtrl)) +} + func setupAdmin(r chi.Router, userCtrl *user.Controller) { r.Route("/admin", func(r chi.Router) { r.Use(middlewareprincipal.RestrictToAdmin()) diff --git a/app/router/wire.go b/app/router/wire.go index b262e7612..e86c13198 100644 --- a/app/router/wire.go +++ b/app/router/wire.go @@ -22,6 +22,7 @@ import ( "github.com/harness/gitness/app/api/controller/connector" "github.com/harness/gitness/app/api/controller/execution" "github.com/harness/gitness/app/api/controller/githook" + "github.com/harness/gitness/app/api/controller/keywordsearch" "github.com/harness/gitness/app/api/controller/logs" "github.com/harness/gitness/app/api/controller/pipeline" "github.com/harness/gitness/app/api/controller/plugin" @@ -107,11 +108,12 @@ func ProvideAPIHandler( checkCtrl *check.Controller, sysCtrl *system.Controller, blobCtrl *upload.Controller, + searchCtrl *keywordsearch.Controller, ) APIHandler { return NewAPIHandler(appCtx, config, authenticator, repoCtrl, executionCtrl, logCtrl, spaceCtrl, pipelineCtrl, secretCtrl, triggerCtrl, connectorCtrl, templateCtrl, pluginCtrl, pullreqCtrl, webhookCtrl, - githookCtrl, saCtrl, userCtrl, principalCtrl, checkCtrl, sysCtrl, blobCtrl) + githookCtrl, saCtrl, userCtrl, principalCtrl, checkCtrl, sysCtrl, blobCtrl, searchCtrl) } func ProvideWebHandler(config *types.Config) WebHandler { diff --git a/app/services/importer/repository.go b/app/services/importer/repository.go index 2a97ea88e..229e11dbb 100644 --- a/app/services/importer/repository.go +++ b/app/services/importer/repository.go @@ -27,6 +27,7 @@ import ( "github.com/harness/gitness/app/bootstrap" "github.com/harness/gitness/app/githook" "github.com/harness/gitness/app/services/job" + "github.com/harness/gitness/app/services/keywordsearch" "github.com/harness/gitness/app/sse" "github.com/harness/gitness/app/store" gitnessurl "github.com/harness/gitness/app/url" @@ -61,6 +62,7 @@ type Repository struct { encrypter encrypt.Encrypter scheduler *job.Scheduler sseStreamer sse.Streamer + indexer keywordsearch.Indexer } var _ job.Handler = (*Repository)(nil) @@ -316,6 +318,11 @@ func (r *Repository) Handle(ctx context.Context, data string, _ job.ProgressRepo log.Warn().Err(err).Msg("failed to publish import completion SSE") } + err = r.indexer.Index(ctx, repo) + if err != nil { + log.Warn().Err(err).Msg("failed to index repository") + } + log.Info().Msg("completed repository import") return "", nil diff --git a/app/services/importer/wire.go b/app/services/importer/wire.go index e10d5cea4..e4debeb8d 100644 --- a/app/services/importer/wire.go +++ b/app/services/importer/wire.go @@ -16,6 +16,7 @@ package importer import ( "github.com/harness/gitness/app/services/job" + "github.com/harness/gitness/app/services/keywordsearch" "github.com/harness/gitness/app/sse" "github.com/harness/gitness/app/store" "github.com/harness/gitness/app/url" @@ -43,6 +44,7 @@ func ProvideRepoImporter( scheduler *job.Scheduler, executor *job.Executor, sseStreamer sse.Streamer, + indexer keywordsearch.Indexer, ) (*Repository, error) { importer := &Repository{ defaultBranch: config.Git.DefaultBranch, @@ -55,6 +57,7 @@ func ProvideRepoImporter( encrypter: encrypter, scheduler: scheduler, sseStreamer: sseStreamer, + indexer: indexer, } err := executor.Register(jobType, importer) diff --git a/app/services/keywordsearch/handler_branch.go b/app/services/keywordsearch/handler_branch.go new file mode 100644 index 000000000..d018e82d0 --- /dev/null +++ b/app/services/keywordsearch/handler_branch.go @@ -0,0 +1,76 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keywordsearch + +import ( + "context" + "fmt" + "strings" + + gitevents "github.com/harness/gitness/app/events/git" + "github.com/harness/gitness/events" +) + +func (s *Service) handleEventBranchCreated(ctx context.Context, + event *events.Event[*gitevents.BranchCreatedPayload]) error { + return s.indexRepo(ctx, event.Payload.RepoID, event.Payload.Ref) +} + +func (s *Service) handleEventBranchUpdated(ctx context.Context, + event *events.Event[*gitevents.BranchUpdatedPayload]) error { + return s.indexRepo(ctx, event.Payload.RepoID, event.Payload.Ref) +} + +func (s *Service) indexRepo( + ctx context.Context, + repoID int64, + ref string, +) error { + repo, err := s.repoStore.Find(ctx, repoID) + if err != nil { + return fmt.Errorf("failed to find repository in db: %w", err) + } + + branch, err := getBranchFromRef(ref) + if err != nil { + return events.NewDiscardEventError( + fmt.Errorf("failed to parse branch name from ref: %w", err)) + } + + // we only maintain the index on the default branch + if repo.DefaultBranch != branch { + return nil + } + + err = s.indexer.Index(ctx, repo) + if err != nil { + return fmt.Errorf("index update failed for repo %d: %w", repo.ID, err) + } + + return nil +} + +func getBranchFromRef(ref string) (string, error) { + const refPrefix = "refs/heads/" + if !strings.HasPrefix(ref, refPrefix) { + return "", fmt.Errorf("failed to get branch name from branch ref %s", ref) + } + + branch := ref[len(refPrefix):] + if len(branch) == 0 { + return "", fmt.Errorf("got an empty branch name from branch ref %s", ref) + } + return branch, nil +} diff --git a/app/services/keywordsearch/index_searcher.go b/app/services/keywordsearch/index_searcher.go new file mode 100644 index 000000000..4cc7ef08c --- /dev/null +++ b/app/services/keywordsearch/index_searcher.go @@ -0,0 +1,30 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keywordsearch + +import ( + "context" + + "github.com/harness/gitness/types" +) + +type Indexer interface { + Index(ctx context.Context, repo *types.Repository) error +} + +type Searcher interface { + Search(ctx context.Context, repoIDs []int64, query string, maxResultCount int) ( + types.SearchResult, error) +} diff --git a/app/services/keywordsearch/local_index_searcher.go b/app/services/keywordsearch/local_index_searcher.go new file mode 100644 index 000000000..819a338c7 --- /dev/null +++ b/app/services/keywordsearch/local_index_searcher.go @@ -0,0 +1,42 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keywordsearch + +import ( + "context" + "fmt" + + "github.com/harness/gitness/types" +) + +type LocalIndexSearcher struct { +} + +func NewLocalIndexSearcher() *LocalIndexSearcher { + return &LocalIndexSearcher{} +} + +func (s *LocalIndexSearcher) Search( + _ context.Context, + _ []int64, + _ string, + _ int, +) (types.SearchResult, error) { + return types.SearchResult{}, fmt.Errorf("not implemented") +} + +func (s *LocalIndexSearcher) Index(_ context.Context, _ *types.Repository) error { + return fmt.Errorf("not implemented") +} diff --git a/app/services/keywordsearch/service.go b/app/services/keywordsearch/service.go new file mode 100644 index 000000000..fcab3d40b --- /dev/null +++ b/app/services/keywordsearch/service.go @@ -0,0 +1,99 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keywordsearch + +import ( + "context" + "errors" + "fmt" + "time" + + gitevents "github.com/harness/gitness/app/events/git" + "github.com/harness/gitness/app/store" + "github.com/harness/gitness/events" + "github.com/harness/gitness/stream" +) + +const ( + eventsReaderGroupName = "gitness:keywordsearch" +) + +type Config struct { + EventReaderName string + Concurrency int + MaxRetries int +} + +func (c *Config) Prepare() error { + if c == nil { + return errors.New("config is required") + } + if c.EventReaderName == "" { + return errors.New("config.EventReaderName is required") + } + if c.Concurrency < 1 { + return errors.New("config.Concurrency has to be a positive number") + } + if c.MaxRetries < 0 { + return errors.New("config.MaxRetries can't be negative") + } + return nil +} + +// Service is responsible for indexing of repository for keyword search. +type Service struct { + config Config + indexer Indexer + repoStore store.RepoStore +} + +func NewService( + ctx context.Context, + config Config, + gitReaderFactory *events.ReaderFactory[*gitevents.Reader], + repoStore store.RepoStore, + indexer Indexer, +) (*Service, error) { + if err := config.Prepare(); err != nil { + return nil, fmt.Errorf("provided codesearch service config is invalid: %w", err) + } + service := &Service{ + config: config, + repoStore: repoStore, + indexer: indexer, + } + + _, err := gitReaderFactory.Launch(ctx, eventsReaderGroupName, config.EventReaderName, + func(r *gitevents.Reader) error { + const idleTimeout = 1 * time.Minute + r.Configure( + stream.WithConcurrency(config.Concurrency), + stream.WithHandlerOptions( + stream.WithIdleTimeout(idleTimeout), + stream.WithMaxRetries(config.MaxRetries), + )) + + // register events + _ = r.RegisterBranchCreated(service.handleEventBranchCreated) + _ = r.RegisterBranchUpdated(service.handleEventBranchUpdated) + + return nil + }) + if err != nil { + return nil, fmt.Errorf("failed to launch git event reader for webhooks: %w", err) + } + + return service, nil +} diff --git a/app/services/keywordsearch/wire.go b/app/services/keywordsearch/wire.go new file mode 100644 index 000000000..9b0f0a7d9 --- /dev/null +++ b/app/services/keywordsearch/wire.go @@ -0,0 +1,58 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keywordsearch + +import ( + "context" + + gitevents "github.com/harness/gitness/app/events/git" + "github.com/harness/gitness/app/store" + "github.com/harness/gitness/events" + + "github.com/google/wire" +) + +// WireSet provides a wire set for this package. +var WireSet = wire.NewSet( + ProvideLocalIndexSearcher, + ProvideIndexer, + ProvideSearcher, + ProvideService, +) + +func ProvideService(ctx context.Context, + config Config, + gitReaderFactory *events.ReaderFactory[*gitevents.Reader], + repoStore store.RepoStore, + indexer Indexer, +) (*Service, error) { + return NewService(ctx, + config, + gitReaderFactory, + repoStore, + indexer) +} + +func ProvideLocalIndexSearcher() *LocalIndexSearcher { + return NewLocalIndexSearcher() +} + +func ProvideIndexer(l *LocalIndexSearcher) Indexer { + return l +} + +func ProvideSearcher(l *LocalIndexSearcher) Searcher { + return l +} diff --git a/app/services/wire.go b/app/services/wire.go index 4c21887a9..fa7319a9c 100644 --- a/app/services/wire.go +++ b/app/services/wire.go @@ -17,6 +17,7 @@ package services import ( "github.com/harness/gitness/app/services/cleanup" "github.com/harness/gitness/app/services/job" + "github.com/harness/gitness/app/services/keywordsearch" "github.com/harness/gitness/app/services/metric" "github.com/harness/gitness/app/services/pullreq" "github.com/harness/gitness/app/services/trigger" @@ -36,6 +37,7 @@ type Services struct { JobScheduler *job.Scheduler MetricCollector *metric.Collector Cleanup *cleanup.Service + Keywordsearch *keywordsearch.Service } func ProvideServices( @@ -45,6 +47,7 @@ func ProvideServices( jobScheduler *job.Scheduler, metricCollector *metric.Collector, cleanupSvc *cleanup.Service, + keywordsearchSvc *keywordsearch.Service, ) Services { return Services{ Webhook: webhooksSvc, @@ -53,5 +56,6 @@ func ProvideServices( JobScheduler: jobScheduler, MetricCollector: metricCollector, Cleanup: cleanupSvc, + Keywordsearch: keywordsearchSvc, } } diff --git a/cli/server/config.go b/cli/server/config.go index ef8d47da8..43244c541 100644 --- a/cli/server/config.go +++ b/cli/server/config.go @@ -24,6 +24,7 @@ import ( "github.com/harness/gitness/app/services/cleanup" "github.com/harness/gitness/app/services/codeowners" + "github.com/harness/gitness/app/services/keywordsearch" "github.com/harness/gitness/app/services/trigger" "github.com/harness/gitness/app/services/webhook" "github.com/harness/gitness/blob" @@ -326,3 +327,12 @@ func ProvideCodeOwnerConfig(config *types.Config) codeowners.Config { FilePaths: config.CodeOwners.FilePaths, } } + +// ProvideKeywordSearchConfig loads the keyword search service config from the main config. +func ProvideKeywordSearchConfig(config *types.Config) keywordsearch.Config { + return keywordsearch.Config{ + EventReaderName: config.InstanceID, + Concurrency: config.KeywordSearch.Concurrency, + MaxRetries: config.KeywordSearch.MaxRetries, + } +} diff --git a/cmd/gitness/wire.go b/cmd/gitness/wire.go index 3b29c97f7..75bad0f59 100644 --- a/cmd/gitness/wire.go +++ b/cmd/gitness/wire.go @@ -14,6 +14,7 @@ import ( "github.com/harness/gitness/app/api/controller/connector" "github.com/harness/gitness/app/api/controller/execution" "github.com/harness/gitness/app/api/controller/githook" + controllerkeywordsearch "github.com/harness/gitness/app/api/controller/keywordsearch" controllerlogs "github.com/harness/gitness/app/api/controller/logs" "github.com/harness/gitness/app/api/controller/pipeline" "github.com/harness/gitness/app/api/controller/plugin" @@ -53,6 +54,7 @@ import ( "github.com/harness/gitness/app/services/exporter" "github.com/harness/gitness/app/services/importer" "github.com/harness/gitness/app/services/job" + "github.com/harness/gitness/app/services/keywordsearch" "github.com/harness/gitness/app/services/metric" "github.com/harness/gitness/app/services/protection" pullreqservice "github.com/harness/gitness/app/services/pullreq" @@ -159,6 +161,9 @@ func initSystem(ctx context.Context, config *types.Config) (*cliserver.System, e metric.WireSet, cliserver.ProvideCodeOwnerConfig, codeowners.WireSet, + cliserver.ProvideKeywordSearchConfig, + keywordsearch.WireSet, + controllerkeywordsearch.WireSet, ) return &cliserver.System{}, nil } diff --git a/cmd/gitness/wire_gen.go b/cmd/gitness/wire_gen.go index a4a21e69f..075ab6f2d 100644 --- a/cmd/gitness/wire_gen.go +++ b/cmd/gitness/wire_gen.go @@ -13,6 +13,7 @@ import ( "github.com/harness/gitness/app/api/controller/connector" "github.com/harness/gitness/app/api/controller/execution" "github.com/harness/gitness/app/api/controller/githook" + keywordsearch2 "github.com/harness/gitness/app/api/controller/keywordsearch" logs2 "github.com/harness/gitness/app/api/controller/logs" "github.com/harness/gitness/app/api/controller/pipeline" "github.com/harness/gitness/app/api/controller/plugin" @@ -52,6 +53,7 @@ import ( "github.com/harness/gitness/app/services/exporter" "github.com/harness/gitness/app/services/importer" "github.com/harness/gitness/app/services/job" + "github.com/harness/gitness/app/services/keywordsearch" "github.com/harness/gitness/app/services/metric" "github.com/harness/gitness/app/services/protection" "github.com/harness/gitness/app/services/pullreq" @@ -152,7 +154,9 @@ func initSystem(ctx context.Context, config *types.Config) (*server.System, erro return nil, err } streamer := sse.ProvideEventsStreaming(pubSub) - repository, err := importer.ProvideRepoImporter(config, provider, gitInterface, transactor, repoStore, pipelineStore, triggerStore, encrypter, jobScheduler, executor, streamer) + localIndexSearcher := keywordsearch.ProvideLocalIndexSearcher() + indexer := keywordsearch.ProvideIndexer(localIndexSearcher) + repository, err := importer.ProvideRepoImporter(config, provider, gitInterface, transactor, repoStore, pipelineStore, triggerStore, encrypter, jobScheduler, executor, streamer, indexer) if err != nil { return nil, err } @@ -167,7 +171,7 @@ func initSystem(ctx context.Context, config *types.Config) (*server.System, erro if err != nil { return nil, err } - repoController := repo.ProvideController(config, transactor, provider, pathUID, authorizer, repoStore, spaceStore, pipelineStore, principalStore, ruleStore, protectionManager, gitInterface, repository, codeownersService, reporter) + repoController := repo.ProvideController(config, transactor, provider, pathUID, authorizer, repoStore, spaceStore, pipelineStore, principalStore, ruleStore, protectionManager, gitInterface, repository, codeownersService, reporter, indexer) executionStore := database.ProvideExecutionStore(db) checkStore := database.ProvideCheckStore(db, principalInfoCache) stageStore := database.ProvideStageStore(db) @@ -251,7 +255,9 @@ func initSystem(ctx context.Context, config *types.Config) (*server.System, erro return nil, err } uploadController := upload.ProvideController(authorizer, repoStore, blobStore) - apiHandler := router.ProvideAPIHandler(ctx, config, authenticator, repoController, executionController, logsController, spaceController, pipelineController, secretController, triggerController, connectorController, templateController, pluginController, pullreqController, webhookController, githookController, serviceaccountController, controller, principalController, checkController, systemController, uploadController) + searcher := keywordsearch.ProvideSearcher(localIndexSearcher) + keywordsearchController := keywordsearch2.ProvideController(authorizer, searcher, repoController, spaceController) + apiHandler := router.ProvideAPIHandler(ctx, config, authenticator, repoController, executionController, logsController, spaceController, pipelineController, secretController, triggerController, connectorController, templateController, pluginController, pullreqController, webhookController, githookController, serviceaccountController, controller, principalController, checkController, systemController, uploadController, keywordsearchController) gitHandler := router.ProvideGitHandler(provider, authenticator, repoController) webHandler := router.ProvideWebHandler(config) routerRouter := router.ProvideRouter(apiHandler, gitHandler, webHandler, provider) @@ -278,7 +284,12 @@ func initSystem(ctx context.Context, config *types.Config) (*server.System, erro if err != nil { return nil, err } - servicesServices := services.ProvideServices(webhookService, pullreqService, triggerService, jobScheduler, collector, cleanupService) + keywordsearchConfig := server.ProvideKeywordSearchConfig(config) + keywordsearchService, err := keywordsearch.ProvideService(ctx, keywordsearchConfig, readerFactory, repoStore, indexer) + if err != nil { + return nil, err + } + servicesServices := services.ProvideServices(webhookService, pullreqService, triggerService, jobScheduler, collector, cleanupService, keywordsearchService) serverSystem := server.NewSystem(bootstrapBootstrap, serverServer, poller, pluginManager, servicesServices) return serverSystem, nil } diff --git a/types/config.go b/types/config.go index 870550926..b9ddb1992 100644 --- a/types/config.go +++ b/types/config.go @@ -320,4 +320,9 @@ type Config struct { FromMail string `envconfig:"GITNESS_SMTP_FROM_MAIL"` Insecure bool `envconfig:"GITNESS_SMTP_INSECURE"` } + + KeywordSearch struct { + Concurrency int `envconfig:"GITNESS_KEYWORD_SEARCH_CONCURRENCY" default:"4"` + MaxRetries int `envconfig:"GITNESS_KEYWORD_SEARCH_MAX_RETRIES" default:"3"` + } } diff --git a/types/search.go b/types/search.go new file mode 100644 index 000000000..bbc33f853 --- /dev/null +++ b/types/search.go @@ -0,0 +1,71 @@ +// Copyright 2023 Harness, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +type ( + SearchInput struct { + Query string `json:"query"` + + // RepoPaths contains the paths of repositories to search in + // If both space path and repo path are provided, the repo path will be used + RepoPaths []string `json:"repo_paths"` + + // SpacePaths contains the paths of spaces to search in + SpacePaths []string `json:"space_paths"` + + // MaxResultCount is the maximum number of results to return + MaxResultCount int `json:"max_result_count"` + } + + SearchResult struct { + FileMatches []FileMatch `json:"file_matches"` + Stats SearchStats `json:"stats"` + } + + SearchStats struct { + TotalFiles int `json:"total_files"` + TotalMatches int `json:"total_matches"` + } + + FileMatch struct { + FileName string `json:"file_name"` + RepoID int64 `json:"-"` + RepoPath string `json:"repo_path"` + Language string `json:"language"` + Matches []Match `json:"matches"` + } + + // Match holds the per line data. + Match struct { + // LineNum is the line number of the match + LineNum int `json:"line_num"` + + // Fragments holds the matched fragments within the line + Fragments []Fragment `json:"fragments"` + + // Before holds the content from the line immediately preceding the line where the match was found + Before string `json:"before"` + + // After holds the content from the line immediately following the line where the match was found + After string `json:"after"` + } + + // Fragment holds data of a single contiguous match within a line. + Fragment struct { + Pre string `json:"pre"` // the string before the match within the line + Match string `json:"match"` // the matched string + Post string `json:"post"` // the string after the match within the line + } +)