Add cache for common package queries (#22491)

This adds a cache for common package queries in `GetPackageDescriptor`.
Code which needs to process a list of packages benefits from this
change. This skips 350 queries in the package integration tests for
example.

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
KN4CK3R 2025-04-13 11:40:36 +02:00 committed by GitHub
parent 4dca869ed1
commit a2651c14ce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 150 additions and 208 deletions

View File

@ -11,6 +11,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo" repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user" user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/packages/alpine" "code.gitea.io/gitea/modules/packages/alpine"
"code.gitea.io/gitea/modules/packages/arch" "code.gitea.io/gitea/modules/packages/arch"
@ -102,22 +103,26 @@ func (pd *PackageDescriptor) CalculateBlobSize() int64 {
// GetPackageDescriptor gets the package description for a version // GetPackageDescriptor gets the package description for a version
func GetPackageDescriptor(ctx context.Context, pv *PackageVersion) (*PackageDescriptor, error) { func GetPackageDescriptor(ctx context.Context, pv *PackageVersion) (*PackageDescriptor, error) {
p, err := GetPackageByID(ctx, pv.PackageID) return getPackageDescriptor(ctx, pv, cache.NewEphemeralCache())
}
func getPackageDescriptor(ctx context.Context, pv *PackageVersion, c *cache.EphemeralCache) (*PackageDescriptor, error) {
p, err := cache.GetWithEphemeralCache(ctx, c, "package", pv.PackageID, GetPackageByID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
o, err := user_model.GetUserByID(ctx, p.OwnerID) o, err := cache.GetWithEphemeralCache(ctx, c, "user", p.OwnerID, user_model.GetUserByID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
var repository *repo_model.Repository var repository *repo_model.Repository
if p.RepoID > 0 { if p.RepoID > 0 {
repository, err = repo_model.GetRepositoryByID(ctx, p.RepoID) repository, err = cache.GetWithEphemeralCache(ctx, c, "repo", p.RepoID, repo_model.GetRepositoryByID)
if err != nil && !repo_model.IsErrRepoNotExist(err) { if err != nil && !repo_model.IsErrRepoNotExist(err) {
return nil, err return nil, err
} }
} }
creator, err := user_model.GetUserByID(ctx, pv.CreatorID) creator, err := cache.GetWithEphemeralCache(ctx, c, "user", pv.CreatorID, user_model.GetUserByID)
if err != nil { if err != nil {
if errors.Is(err, util.ErrNotExist) { if errors.Is(err, util.ErrNotExist) {
creator = user_model.NewGhostUser() creator = user_model.NewGhostUser()
@ -145,10 +150,14 @@ func GetPackageDescriptor(ctx context.Context, pv *PackageVersion) (*PackageDesc
return nil, err return nil, err
} }
pfds, err := GetPackageFileDescriptors(ctx, pfs) pfds := make([]*PackageFileDescriptor, 0, len(pfs))
for _, pf := range pfs {
pfd, err := getPackageFileDescriptor(ctx, pf, c)
if err != nil { if err != nil {
return nil, err return nil, err
} }
pfds = append(pfds, pfd)
}
var metadata any var metadata any
switch p.Type { switch p.Type {
@ -221,7 +230,11 @@ func GetPackageDescriptor(ctx context.Context, pv *PackageVersion) (*PackageDesc
// GetPackageFileDescriptor gets a package file descriptor for a package file // GetPackageFileDescriptor gets a package file descriptor for a package file
func GetPackageFileDescriptor(ctx context.Context, pf *PackageFile) (*PackageFileDescriptor, error) { func GetPackageFileDescriptor(ctx context.Context, pf *PackageFile) (*PackageFileDescriptor, error) {
pb, err := GetBlobByID(ctx, pf.BlobID) return getPackageFileDescriptor(ctx, pf, cache.NewEphemeralCache())
}
func getPackageFileDescriptor(ctx context.Context, pf *PackageFile, c *cache.EphemeralCache) (*PackageFileDescriptor, error) {
pb, err := cache.GetWithEphemeralCache(ctx, c, "package_file_blob", pf.BlobID, GetBlobByID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -251,9 +264,13 @@ func GetPackageFileDescriptors(ctx context.Context, pfs []*PackageFile) ([]*Pack
// GetPackageDescriptors gets the package descriptions for the versions // GetPackageDescriptors gets the package descriptions for the versions
func GetPackageDescriptors(ctx context.Context, pvs []*PackageVersion) ([]*PackageDescriptor, error) { func GetPackageDescriptors(ctx context.Context, pvs []*PackageVersion) ([]*PackageDescriptor, error) {
return getPackageDescriptors(ctx, pvs, cache.NewEphemeralCache())
}
func getPackageDescriptors(ctx context.Context, pvs []*PackageVersion, c *cache.EphemeralCache) ([]*PackageDescriptor, error) {
pds := make([]*PackageDescriptor, 0, len(pvs)) pds := make([]*PackageDescriptor, 0, len(pvs))
for _, pv := range pvs { for _, pv := range pvs {
pd, err := GetPackageDescriptor(ctx, pv) pd, err := getPackageDescriptor(ctx, pv, c)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -5,176 +5,39 @@ package cache
import ( import (
"context" "context"
"sync"
"time" "time"
"code.gitea.io/gitea/modules/log"
) )
// cacheContext is a context that can be used to cache data in a request level context type cacheContextKeyType struct{}
// This is useful for caching data that is expensive to calculate and is likely to be
// used multiple times in a request.
type cacheContext struct {
data map[any]map[any]any
lock sync.RWMutex
created time.Time
discard bool
}
func (cc *cacheContext) Get(tp, key any) any { var cacheContextKey = cacheContextKeyType{}
cc.lock.RLock()
defer cc.lock.RUnlock()
return cc.data[tp][key]
}
func (cc *cacheContext) Put(tp, key, value any) { // contextCacheLifetime is the max lifetime of context cache.
cc.lock.Lock() // Since context cache is used to cache data in a request level context, 5 minutes is enough.
defer cc.lock.Unlock() // If a context cache is used more than 5 minutes, it's probably abused.
const contextCacheLifetime = 5 * time.Minute
if cc.discard {
return
}
d := cc.data[tp]
if d == nil {
d = make(map[any]any)
cc.data[tp] = d
}
d[key] = value
}
func (cc *cacheContext) Delete(tp, key any) {
cc.lock.Lock()
defer cc.lock.Unlock()
delete(cc.data[tp], key)
}
func (cc *cacheContext) Discard() {
cc.lock.Lock()
defer cc.lock.Unlock()
cc.data = nil
cc.discard = true
}
func (cc *cacheContext) isDiscard() bool {
cc.lock.RLock()
defer cc.lock.RUnlock()
return cc.discard
}
// cacheContextLifetime is the max lifetime of cacheContext.
// Since cacheContext is used to cache data in a request level context, 5 minutes is enough.
// If a cacheContext is used more than 5 minutes, it's probably misuse.
const cacheContextLifetime = 5 * time.Minute
var timeNow = time.Now
func (cc *cacheContext) Expired() bool {
return timeNow().Sub(cc.created) > cacheContextLifetime
}
var cacheContextKey = struct{}{}
/*
Since there are both WithCacheContext and WithNoCacheContext,
it may be confusing when there is nesting.
Some cases to explain the design:
When:
- A, B or C means a cache context.
- A', B' or C' means a discard cache context.
- ctx means context.Backgrand().
- A(ctx) means a cache context with ctx as the parent context.
- B(A(ctx)) means a cache context with A(ctx) as the parent context.
- With is alias of WithCacheContext.
- WithNo is alias of WithNoCacheContext.
So:
- With(ctx) -> A(ctx)
- With(With(ctx)) -> A(ctx), not B(A(ctx)), always reuse parent cache context if possible.
- With(With(With(ctx))) -> A(ctx), not C(B(A(ctx))), ditto.
- WithNo(ctx) -> ctx, not A'(ctx), don't create new cache context if we don't have to.
- WithNo(With(ctx)) -> A'(ctx)
- WithNo(WithNo(With(ctx))) -> A'(ctx), not B'(A'(ctx)), don't create new cache context if we don't have to.
- With(WithNo(With(ctx))) -> B(A'(ctx)), not A(ctx), never reuse a discard cache context.
- WithNo(With(WithNo(With(ctx)))) -> B'(A'(ctx))
- With(WithNo(With(WithNo(With(ctx))))) -> C(B'(A'(ctx))), so there's always only one not-discard cache context.
*/
func WithCacheContext(ctx context.Context) context.Context { func WithCacheContext(ctx context.Context) context.Context {
if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok { if c := GetContextCache(ctx); c != nil {
if !c.isDiscard() {
// reuse parent context
return ctx return ctx
} }
} return context.WithValue(ctx, cacheContextKey, NewEphemeralCache(contextCacheLifetime))
// FIXME: review the use of this nolint directive
return context.WithValue(ctx, cacheContextKey, &cacheContext{ //nolint:staticcheck
data: make(map[any]map[any]any),
created: timeNow(),
})
} }
func WithNoCacheContext(ctx context.Context) context.Context { func GetContextCache(ctx context.Context) *EphemeralCache {
if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok { c, _ := ctx.Value(cacheContextKey).(*EphemeralCache)
// The caller want to run long-life tasks, but the parent context is a cache context. return c
// So we should disable and clean the cache data, or it will be kept in memory for a long time.
c.Discard()
return ctx
}
return ctx
}
func GetContextData(ctx context.Context, tp, key any) any {
if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok {
if c.Expired() {
// The warning means that the cache context is misused for long-life task,
// it can be resolved with WithNoCacheContext(ctx).
log.Warn("cache context is expired, is highly likely to be misused for long-life tasks: %v", c)
return nil
}
return c.Get(tp, key)
}
return nil
}
func SetContextData(ctx context.Context, tp, key, value any) {
if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok {
if c.Expired() {
// The warning means that the cache context is misused for long-life task,
// it can be resolved with WithNoCacheContext(ctx).
log.Warn("cache context is expired, is highly likely to be misused for long-life tasks: %v", c)
return
}
c.Put(tp, key, value)
return
}
}
func RemoveContextData(ctx context.Context, tp, key any) {
if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok {
if c.Expired() {
// The warning means that the cache context is misused for long-life task,
// it can be resolved with WithNoCacheContext(ctx).
log.Warn("cache context is expired, is highly likely to be misused for long-life tasks: %v", c)
return
}
c.Delete(tp, key)
}
} }
// GetWithContextCache returns the cache value of the given key in the given context. // GetWithContextCache returns the cache value of the given key in the given context.
// FIXME: in some cases, the "context cache" should not be used, because it has uncontrollable behaviors
// For example, these calls:
// * GetWithContextCache(TargetID) -> OtherCodeCreateModel(TargetID) -> GetWithContextCache(TargetID)
// Will cause the second call is not able to get the correct created target.
// UNLESS it is certain that the target won't be changed during the request, DO NOT use it.
func GetWithContextCache[T, K any](ctx context.Context, groupKey string, targetKey K, f func(context.Context, K) (T, error)) (T, error) { func GetWithContextCache[T, K any](ctx context.Context, groupKey string, targetKey K, f func(context.Context, K) (T, error)) (T, error) {
v := GetContextData(ctx, groupKey, targetKey) if c := GetContextCache(ctx); c != nil {
if vv, ok := v.(T); ok { return GetWithEphemeralCache(ctx, c, groupKey, targetKey, f)
return vv, nil
} }
t, err := f(ctx, targetKey) return f(ctx, targetKey)
if err != nil {
return t, err
}
SetContextData(ctx, groupKey, targetKey, t)
return t, nil
} }

View File

@ -8,27 +8,29 @@ import (
"testing" "testing"
"time" "time"
"code.gitea.io/gitea/modules/test"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func TestWithCacheContext(t *testing.T) { func TestWithCacheContext(t *testing.T) {
ctx := WithCacheContext(t.Context()) ctx := WithCacheContext(t.Context())
c := GetContextCache(ctx)
v := GetContextData(ctx, "empty_field", "my_config1") v, _ := c.Get("empty_field", "my_config1")
assert.Nil(t, v) assert.Nil(t, v)
const field = "system_setting" const field = "system_setting"
v = GetContextData(ctx, field, "my_config1") v, _ = c.Get(field, "my_config1")
assert.Nil(t, v) assert.Nil(t, v)
SetContextData(ctx, field, "my_config1", 1) c.Put(field, "my_config1", 1)
v = GetContextData(ctx, field, "my_config1") v, _ = c.Get(field, "my_config1")
assert.NotNil(t, v) assert.NotNil(t, v)
assert.Equal(t, 1, v.(int)) assert.Equal(t, 1, v.(int))
RemoveContextData(ctx, field, "my_config1") c.Delete(field, "my_config1")
RemoveContextData(ctx, field, "my_config2") // remove a non-exist key c.Delete(field, "my_config2") // remove a non-exist key
v = GetContextData(ctx, field, "my_config1") v, _ = c.Get(field, "my_config1")
assert.Nil(t, v) assert.Nil(t, v)
vInt, err := GetWithContextCache(ctx, field, "my_config1", func(context.Context, string) (int, error) { vInt, err := GetWithContextCache(ctx, field, "my_config1", func(context.Context, string) (int, error) {
@ -37,42 +39,12 @@ func TestWithCacheContext(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, 1, vInt) assert.Equal(t, 1, vInt)
v = GetContextData(ctx, field, "my_config1") v, _ = c.Get(field, "my_config1")
assert.EqualValues(t, 1, v) assert.EqualValues(t, 1, v)
now := timeNow defer test.MockVariableValue(&timeNow, func() time.Time {
defer func() { return time.Now().Add(5 * time.Minute)
timeNow = now })()
}() v, _ = c.Get(field, "my_config1")
timeNow = func() time.Time {
return now().Add(5 * time.Minute)
}
v = GetContextData(ctx, field, "my_config1")
assert.Nil(t, v) assert.Nil(t, v)
} }
func TestWithNoCacheContext(t *testing.T) {
ctx := t.Context()
const field = "system_setting"
v := GetContextData(ctx, field, "my_config1")
assert.Nil(t, v)
SetContextData(ctx, field, "my_config1", 1)
v = GetContextData(ctx, field, "my_config1")
assert.Nil(t, v) // still no cache
ctx = WithCacheContext(ctx)
v = GetContextData(ctx, field, "my_config1")
assert.Nil(t, v)
SetContextData(ctx, field, "my_config1", 1)
v = GetContextData(ctx, field, "my_config1")
assert.NotNil(t, v)
ctx = WithNoCacheContext(ctx)
v = GetContextData(ctx, field, "my_config1")
assert.Nil(t, v)
SetContextData(ctx, field, "my_config1", 1)
v = GetContextData(ctx, field, "my_config1")
assert.Nil(t, v) // still no cache
}

90
modules/cache/ephemeral.go vendored Normal file
View File

@ -0,0 +1,90 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package cache
import (
"context"
"sync"
"time"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"
)
// EphemeralCache is a cache that can be used to store data in a request level context
// This is useful for caching data that is expensive to calculate and is likely to be
// used multiple times in a request.
type EphemeralCache struct {
data map[any]map[any]any
lock sync.RWMutex
created time.Time
checkLifeTime time.Duration
}
var timeNow = time.Now
func NewEphemeralCache(checkLifeTime ...time.Duration) *EphemeralCache {
return &EphemeralCache{
data: make(map[any]map[any]any),
created: timeNow(),
checkLifeTime: util.OptionalArg(checkLifeTime, 0),
}
}
func (cc *EphemeralCache) checkExceededLifeTime(tp, key any) bool {
if cc.checkLifeTime > 0 && timeNow().Sub(cc.created) > cc.checkLifeTime {
log.Warn("EphemeralCache is expired, is highly likely to be abused for long-life tasks: %v, %v", tp, key)
return true
}
return false
}
func (cc *EphemeralCache) Get(tp, key any) (any, bool) {
if cc.checkExceededLifeTime(tp, key) {
return nil, false
}
cc.lock.RLock()
defer cc.lock.RUnlock()
ret, ok := cc.data[tp][key]
return ret, ok
}
func (cc *EphemeralCache) Put(tp, key, value any) {
if cc.checkExceededLifeTime(tp, key) {
return
}
cc.lock.Lock()
defer cc.lock.Unlock()
d := cc.data[tp]
if d == nil {
d = make(map[any]any)
cc.data[tp] = d
}
d[key] = value
}
func (cc *EphemeralCache) Delete(tp, key any) {
if cc.checkExceededLifeTime(tp, key) {
return
}
cc.lock.Lock()
defer cc.lock.Unlock()
delete(cc.data[tp], key)
}
func GetWithEphemeralCache[T, K any](ctx context.Context, c *EphemeralCache, groupKey string, targetKey K, f func(context.Context, K) (T, error)) (T, error) {
v, has := c.Get(groupKey, targetKey)
if vv, ok := v.(T); has && ok {
return vv, nil
}
t, err := f(ctx, targetKey)
if err != nil {
return t, err
}
c.Put(groupKey, targetKey, t)
return t, nil
}