From a2651c14ce5d9752c043b50157e83e30bf46d958 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Sun, 13 Apr 2025 11:40:36 +0200 Subject: [PATCH] Add cache for common package queries (#22491) This adds a cache for common package queries in `GetPackageDescriptor`. Code which needs to process a list of packages benefits from this change. This skips 350 queries in the package integration tests for example. --------- Co-authored-by: wxiaoguang --- models/packages/descriptor.go | 35 +++++-- modules/cache/context.go | 175 ++++------------------------------ modules/cache/context_test.go | 58 +++-------- modules/cache/ephemeral.go | 90 +++++++++++++++++ 4 files changed, 150 insertions(+), 208 deletions(-) create mode 100644 modules/cache/ephemeral.go diff --git a/models/packages/descriptor.go b/models/packages/descriptor.go index c97bd46c9e..1ea181c723 100644 --- a/models/packages/descriptor.go +++ b/models/packages/descriptor.go @@ -11,6 +11,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/cache" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/packages/alpine" "code.gitea.io/gitea/modules/packages/arch" @@ -102,22 +103,26 @@ func (pd *PackageDescriptor) CalculateBlobSize() int64 { // GetPackageDescriptor gets the package description for a version func GetPackageDescriptor(ctx context.Context, pv *PackageVersion) (*PackageDescriptor, error) { - p, err := GetPackageByID(ctx, pv.PackageID) + return getPackageDescriptor(ctx, pv, cache.NewEphemeralCache()) +} + +func getPackageDescriptor(ctx context.Context, pv *PackageVersion, c *cache.EphemeralCache) (*PackageDescriptor, error) { + p, err := cache.GetWithEphemeralCache(ctx, c, "package", pv.PackageID, GetPackageByID) if err != nil { return nil, err } - o, err := user_model.GetUserByID(ctx, p.OwnerID) + o, err := cache.GetWithEphemeralCache(ctx, c, "user", p.OwnerID, user_model.GetUserByID) if err != nil { return nil, err } var repository *repo_model.Repository if p.RepoID > 0 { - repository, err = repo_model.GetRepositoryByID(ctx, p.RepoID) + repository, err = cache.GetWithEphemeralCache(ctx, c, "repo", p.RepoID, repo_model.GetRepositoryByID) if err != nil && !repo_model.IsErrRepoNotExist(err) { return nil, err } } - creator, err := user_model.GetUserByID(ctx, pv.CreatorID) + creator, err := cache.GetWithEphemeralCache(ctx, c, "user", pv.CreatorID, user_model.GetUserByID) if err != nil { if errors.Is(err, util.ErrNotExist) { creator = user_model.NewGhostUser() @@ -145,9 +150,13 @@ func GetPackageDescriptor(ctx context.Context, pv *PackageVersion) (*PackageDesc return nil, err } - pfds, err := GetPackageFileDescriptors(ctx, pfs) - if err != nil { - return nil, err + pfds := make([]*PackageFileDescriptor, 0, len(pfs)) + for _, pf := range pfs { + pfd, err := getPackageFileDescriptor(ctx, pf, c) + if err != nil { + return nil, err + } + pfds = append(pfds, pfd) } var metadata any @@ -221,7 +230,11 @@ func GetPackageDescriptor(ctx context.Context, pv *PackageVersion) (*PackageDesc // GetPackageFileDescriptor gets a package file descriptor for a package file func GetPackageFileDescriptor(ctx context.Context, pf *PackageFile) (*PackageFileDescriptor, error) { - pb, err := GetBlobByID(ctx, pf.BlobID) + return getPackageFileDescriptor(ctx, pf, cache.NewEphemeralCache()) +} + +func getPackageFileDescriptor(ctx context.Context, pf *PackageFile, c *cache.EphemeralCache) (*PackageFileDescriptor, error) { + pb, err := cache.GetWithEphemeralCache(ctx, c, "package_file_blob", pf.BlobID, GetBlobByID) if err != nil { return nil, err } @@ -251,9 +264,13 @@ func GetPackageFileDescriptors(ctx context.Context, pfs []*PackageFile) ([]*Pack // GetPackageDescriptors gets the package descriptions for the versions func GetPackageDescriptors(ctx context.Context, pvs []*PackageVersion) ([]*PackageDescriptor, error) { + return getPackageDescriptors(ctx, pvs, cache.NewEphemeralCache()) +} + +func getPackageDescriptors(ctx context.Context, pvs []*PackageVersion, c *cache.EphemeralCache) ([]*PackageDescriptor, error) { pds := make([]*PackageDescriptor, 0, len(pvs)) for _, pv := range pvs { - pd, err := GetPackageDescriptor(ctx, pv) + pd, err := getPackageDescriptor(ctx, pv, c) if err != nil { return nil, err } diff --git a/modules/cache/context.go b/modules/cache/context.go index 85eb9e6790..23f7c23a52 100644 --- a/modules/cache/context.go +++ b/modules/cache/context.go @@ -5,176 +5,39 @@ package cache import ( "context" - "sync" "time" - - "code.gitea.io/gitea/modules/log" ) -// cacheContext is a context that can be used to cache data in a request level context -// This is useful for caching data that is expensive to calculate and is likely to be -// used multiple times in a request. -type cacheContext struct { - data map[any]map[any]any - lock sync.RWMutex - created time.Time - discard bool -} +type cacheContextKeyType struct{} -func (cc *cacheContext) Get(tp, key any) any { - cc.lock.RLock() - defer cc.lock.RUnlock() - return cc.data[tp][key] -} +var cacheContextKey = cacheContextKeyType{} -func (cc *cacheContext) Put(tp, key, value any) { - cc.lock.Lock() - defer cc.lock.Unlock() - - if cc.discard { - return - } - - d := cc.data[tp] - if d == nil { - d = make(map[any]any) - cc.data[tp] = d - } - d[key] = value -} - -func (cc *cacheContext) Delete(tp, key any) { - cc.lock.Lock() - defer cc.lock.Unlock() - delete(cc.data[tp], key) -} - -func (cc *cacheContext) Discard() { - cc.lock.Lock() - defer cc.lock.Unlock() - cc.data = nil - cc.discard = true -} - -func (cc *cacheContext) isDiscard() bool { - cc.lock.RLock() - defer cc.lock.RUnlock() - return cc.discard -} - -// cacheContextLifetime is the max lifetime of cacheContext. -// Since cacheContext is used to cache data in a request level context, 5 minutes is enough. -// If a cacheContext is used more than 5 minutes, it's probably misuse. -const cacheContextLifetime = 5 * time.Minute - -var timeNow = time.Now - -func (cc *cacheContext) Expired() bool { - return timeNow().Sub(cc.created) > cacheContextLifetime -} - -var cacheContextKey = struct{}{} - -/* -Since there are both WithCacheContext and WithNoCacheContext, -it may be confusing when there is nesting. - -Some cases to explain the design: - -When: -- A, B or C means a cache context. -- A', B' or C' means a discard cache context. -- ctx means context.Backgrand(). -- A(ctx) means a cache context with ctx as the parent context. -- B(A(ctx)) means a cache context with A(ctx) as the parent context. -- With is alias of WithCacheContext. -- WithNo is alias of WithNoCacheContext. - -So: -- With(ctx) -> A(ctx) -- With(With(ctx)) -> A(ctx), not B(A(ctx)), always reuse parent cache context if possible. -- With(With(With(ctx))) -> A(ctx), not C(B(A(ctx))), ditto. -- WithNo(ctx) -> ctx, not A'(ctx), don't create new cache context if we don't have to. -- WithNo(With(ctx)) -> A'(ctx) -- WithNo(WithNo(With(ctx))) -> A'(ctx), not B'(A'(ctx)), don't create new cache context if we don't have to. -- With(WithNo(With(ctx))) -> B(A'(ctx)), not A(ctx), never reuse a discard cache context. -- WithNo(With(WithNo(With(ctx)))) -> B'(A'(ctx)) -- With(WithNo(With(WithNo(With(ctx))))) -> C(B'(A'(ctx))), so there's always only one not-discard cache context. -*/ +// contextCacheLifetime is the max lifetime of context cache. +// Since context cache is used to cache data in a request level context, 5 minutes is enough. +// If a context cache is used more than 5 minutes, it's probably abused. +const contextCacheLifetime = 5 * time.Minute func WithCacheContext(ctx context.Context) context.Context { - if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok { - if !c.isDiscard() { - // reuse parent context - return ctx - } - } - // FIXME: review the use of this nolint directive - return context.WithValue(ctx, cacheContextKey, &cacheContext{ //nolint:staticcheck - data: make(map[any]map[any]any), - created: timeNow(), - }) -} - -func WithNoCacheContext(ctx context.Context) context.Context { - if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok { - // The caller want to run long-life tasks, but the parent context is a cache context. - // So we should disable and clean the cache data, or it will be kept in memory for a long time. - c.Discard() + if c := GetContextCache(ctx); c != nil { return ctx } - - return ctx + return context.WithValue(ctx, cacheContextKey, NewEphemeralCache(contextCacheLifetime)) } -func GetContextData(ctx context.Context, tp, key any) any { - if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok { - if c.Expired() { - // The warning means that the cache context is misused for long-life task, - // it can be resolved with WithNoCacheContext(ctx). - log.Warn("cache context is expired, is highly likely to be misused for long-life tasks: %v", c) - return nil - } - return c.Get(tp, key) - } - return nil -} - -func SetContextData(ctx context.Context, tp, key, value any) { - if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok { - if c.Expired() { - // The warning means that the cache context is misused for long-life task, - // it can be resolved with WithNoCacheContext(ctx). - log.Warn("cache context is expired, is highly likely to be misused for long-life tasks: %v", c) - return - } - c.Put(tp, key, value) - return - } -} - -func RemoveContextData(ctx context.Context, tp, key any) { - if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok { - if c.Expired() { - // The warning means that the cache context is misused for long-life task, - // it can be resolved with WithNoCacheContext(ctx). - log.Warn("cache context is expired, is highly likely to be misused for long-life tasks: %v", c) - return - } - c.Delete(tp, key) - } +func GetContextCache(ctx context.Context) *EphemeralCache { + c, _ := ctx.Value(cacheContextKey).(*EphemeralCache) + return c } // GetWithContextCache returns the cache value of the given key in the given context. +// FIXME: in some cases, the "context cache" should not be used, because it has uncontrollable behaviors +// For example, these calls: +// * GetWithContextCache(TargetID) -> OtherCodeCreateModel(TargetID) -> GetWithContextCache(TargetID) +// Will cause the second call is not able to get the correct created target. +// UNLESS it is certain that the target won't be changed during the request, DO NOT use it. func GetWithContextCache[T, K any](ctx context.Context, groupKey string, targetKey K, f func(context.Context, K) (T, error)) (T, error) { - v := GetContextData(ctx, groupKey, targetKey) - if vv, ok := v.(T); ok { - return vv, nil + if c := GetContextCache(ctx); c != nil { + return GetWithEphemeralCache(ctx, c, groupKey, targetKey, f) } - t, err := f(ctx, targetKey) - if err != nil { - return t, err - } - SetContextData(ctx, groupKey, targetKey, t) - return t, nil + return f(ctx, targetKey) } diff --git a/modules/cache/context_test.go b/modules/cache/context_test.go index 23dd789dbc..8371c2b908 100644 --- a/modules/cache/context_test.go +++ b/modules/cache/context_test.go @@ -8,27 +8,29 @@ import ( "testing" "time" + "code.gitea.io/gitea/modules/test" + "github.com/stretchr/testify/assert" ) func TestWithCacheContext(t *testing.T) { ctx := WithCacheContext(t.Context()) - - v := GetContextData(ctx, "empty_field", "my_config1") + c := GetContextCache(ctx) + v, _ := c.Get("empty_field", "my_config1") assert.Nil(t, v) const field = "system_setting" - v = GetContextData(ctx, field, "my_config1") + v, _ = c.Get(field, "my_config1") assert.Nil(t, v) - SetContextData(ctx, field, "my_config1", 1) - v = GetContextData(ctx, field, "my_config1") + c.Put(field, "my_config1", 1) + v, _ = c.Get(field, "my_config1") assert.NotNil(t, v) assert.Equal(t, 1, v.(int)) - RemoveContextData(ctx, field, "my_config1") - RemoveContextData(ctx, field, "my_config2") // remove a non-exist key + c.Delete(field, "my_config1") + c.Delete(field, "my_config2") // remove a non-exist key - v = GetContextData(ctx, field, "my_config1") + v, _ = c.Get(field, "my_config1") assert.Nil(t, v) vInt, err := GetWithContextCache(ctx, field, "my_config1", func(context.Context, string) (int, error) { @@ -37,42 +39,12 @@ func TestWithCacheContext(t *testing.T) { assert.NoError(t, err) assert.Equal(t, 1, vInt) - v = GetContextData(ctx, field, "my_config1") + v, _ = c.Get(field, "my_config1") assert.EqualValues(t, 1, v) - now := timeNow - defer func() { - timeNow = now - }() - timeNow = func() time.Time { - return now().Add(5 * time.Minute) - } - v = GetContextData(ctx, field, "my_config1") + defer test.MockVariableValue(&timeNow, func() time.Time { + return time.Now().Add(5 * time.Minute) + })() + v, _ = c.Get(field, "my_config1") assert.Nil(t, v) } - -func TestWithNoCacheContext(t *testing.T) { - ctx := t.Context() - - const field = "system_setting" - - v := GetContextData(ctx, field, "my_config1") - assert.Nil(t, v) - SetContextData(ctx, field, "my_config1", 1) - v = GetContextData(ctx, field, "my_config1") - assert.Nil(t, v) // still no cache - - ctx = WithCacheContext(ctx) - v = GetContextData(ctx, field, "my_config1") - assert.Nil(t, v) - SetContextData(ctx, field, "my_config1", 1) - v = GetContextData(ctx, field, "my_config1") - assert.NotNil(t, v) - - ctx = WithNoCacheContext(ctx) - v = GetContextData(ctx, field, "my_config1") - assert.Nil(t, v) - SetContextData(ctx, field, "my_config1", 1) - v = GetContextData(ctx, field, "my_config1") - assert.Nil(t, v) // still no cache -} diff --git a/modules/cache/ephemeral.go b/modules/cache/ephemeral.go new file mode 100644 index 0000000000..6996010ac4 --- /dev/null +++ b/modules/cache/ephemeral.go @@ -0,0 +1,90 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package cache + +import ( + "context" + "sync" + "time" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/util" +) + +// EphemeralCache is a cache that can be used to store data in a request level context +// This is useful for caching data that is expensive to calculate and is likely to be +// used multiple times in a request. +type EphemeralCache struct { + data map[any]map[any]any + lock sync.RWMutex + created time.Time + checkLifeTime time.Duration +} + +var timeNow = time.Now + +func NewEphemeralCache(checkLifeTime ...time.Duration) *EphemeralCache { + return &EphemeralCache{ + data: make(map[any]map[any]any), + created: timeNow(), + checkLifeTime: util.OptionalArg(checkLifeTime, 0), + } +} + +func (cc *EphemeralCache) checkExceededLifeTime(tp, key any) bool { + if cc.checkLifeTime > 0 && timeNow().Sub(cc.created) > cc.checkLifeTime { + log.Warn("EphemeralCache is expired, is highly likely to be abused for long-life tasks: %v, %v", tp, key) + return true + } + return false +} + +func (cc *EphemeralCache) Get(tp, key any) (any, bool) { + if cc.checkExceededLifeTime(tp, key) { + return nil, false + } + cc.lock.RLock() + defer cc.lock.RUnlock() + ret, ok := cc.data[tp][key] + return ret, ok +} + +func (cc *EphemeralCache) Put(tp, key, value any) { + if cc.checkExceededLifeTime(tp, key) { + return + } + + cc.lock.Lock() + defer cc.lock.Unlock() + + d := cc.data[tp] + if d == nil { + d = make(map[any]any) + cc.data[tp] = d + } + d[key] = value +} + +func (cc *EphemeralCache) Delete(tp, key any) { + if cc.checkExceededLifeTime(tp, key) { + return + } + + cc.lock.Lock() + defer cc.lock.Unlock() + delete(cc.data[tp], key) +} + +func GetWithEphemeralCache[T, K any](ctx context.Context, c *EphemeralCache, groupKey string, targetKey K, f func(context.Context, K) (T, error)) (T, error) { + v, has := c.Get(groupKey, targetKey) + if vv, ok := v.(T); has && ok { + return vv, nil + } + t, err := f(ctx, targetKey) + if err != nil { + return t, err + } + c.Put(groupKey, targetKey, t) + return t, nil +}