Add cache for common package queries (#22491)

This adds a cache for common package queries in `GetPackageDescriptor`.
Code which needs to process a list of packages benefits from this
change. This skips 350 queries in the package integration tests for
example.

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
KN4CK3R 2025-04-13 11:40:36 +02:00 committed by GitHub
parent 4dca869ed1
commit a2651c14ce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 150 additions and 208 deletions

View File

@ -11,6 +11,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/packages/alpine"
"code.gitea.io/gitea/modules/packages/arch"
@ -102,22 +103,26 @@ func (pd *PackageDescriptor) CalculateBlobSize() int64 {
// GetPackageDescriptor gets the package description for a version
func GetPackageDescriptor(ctx context.Context, pv *PackageVersion) (*PackageDescriptor, error) {
p, err := GetPackageByID(ctx, pv.PackageID)
return getPackageDescriptor(ctx, pv, cache.NewEphemeralCache())
}
func getPackageDescriptor(ctx context.Context, pv *PackageVersion, c *cache.EphemeralCache) (*PackageDescriptor, error) {
p, err := cache.GetWithEphemeralCache(ctx, c, "package", pv.PackageID, GetPackageByID)
if err != nil {
return nil, err
}
o, err := user_model.GetUserByID(ctx, p.OwnerID)
o, err := cache.GetWithEphemeralCache(ctx, c, "user", p.OwnerID, user_model.GetUserByID)
if err != nil {
return nil, err
}
var repository *repo_model.Repository
if p.RepoID > 0 {
repository, err = repo_model.GetRepositoryByID(ctx, p.RepoID)
repository, err = cache.GetWithEphemeralCache(ctx, c, "repo", p.RepoID, repo_model.GetRepositoryByID)
if err != nil && !repo_model.IsErrRepoNotExist(err) {
return nil, err
}
}
creator, err := user_model.GetUserByID(ctx, pv.CreatorID)
creator, err := cache.GetWithEphemeralCache(ctx, c, "user", pv.CreatorID, user_model.GetUserByID)
if err != nil {
if errors.Is(err, util.ErrNotExist) {
creator = user_model.NewGhostUser()
@ -145,9 +150,13 @@ func GetPackageDescriptor(ctx context.Context, pv *PackageVersion) (*PackageDesc
return nil, err
}
pfds, err := GetPackageFileDescriptors(ctx, pfs)
if err != nil {
return nil, err
pfds := make([]*PackageFileDescriptor, 0, len(pfs))
for _, pf := range pfs {
pfd, err := getPackageFileDescriptor(ctx, pf, c)
if err != nil {
return nil, err
}
pfds = append(pfds, pfd)
}
var metadata any
@ -221,7 +230,11 @@ func GetPackageDescriptor(ctx context.Context, pv *PackageVersion) (*PackageDesc
// GetPackageFileDescriptor gets a package file descriptor for a package file
func GetPackageFileDescriptor(ctx context.Context, pf *PackageFile) (*PackageFileDescriptor, error) {
pb, err := GetBlobByID(ctx, pf.BlobID)
return getPackageFileDescriptor(ctx, pf, cache.NewEphemeralCache())
}
func getPackageFileDescriptor(ctx context.Context, pf *PackageFile, c *cache.EphemeralCache) (*PackageFileDescriptor, error) {
pb, err := cache.GetWithEphemeralCache(ctx, c, "package_file_blob", pf.BlobID, GetBlobByID)
if err != nil {
return nil, err
}
@ -251,9 +264,13 @@ func GetPackageFileDescriptors(ctx context.Context, pfs []*PackageFile) ([]*Pack
// GetPackageDescriptors gets the package descriptions for the versions
func GetPackageDescriptors(ctx context.Context, pvs []*PackageVersion) ([]*PackageDescriptor, error) {
return getPackageDescriptors(ctx, pvs, cache.NewEphemeralCache())
}
func getPackageDescriptors(ctx context.Context, pvs []*PackageVersion, c *cache.EphemeralCache) ([]*PackageDescriptor, error) {
pds := make([]*PackageDescriptor, 0, len(pvs))
for _, pv := range pvs {
pd, err := GetPackageDescriptor(ctx, pv)
pd, err := getPackageDescriptor(ctx, pv, c)
if err != nil {
return nil, err
}

View File

@ -5,176 +5,39 @@ package cache
import (
"context"
"sync"
"time"
"code.gitea.io/gitea/modules/log"
)
// cacheContext is a context that can be used to cache data in a request level context
// This is useful for caching data that is expensive to calculate and is likely to be
// used multiple times in a request.
type cacheContext struct {
data map[any]map[any]any
lock sync.RWMutex
created time.Time
discard bool
}
type cacheContextKeyType struct{}
func (cc *cacheContext) Get(tp, key any) any {
cc.lock.RLock()
defer cc.lock.RUnlock()
return cc.data[tp][key]
}
var cacheContextKey = cacheContextKeyType{}
func (cc *cacheContext) Put(tp, key, value any) {
cc.lock.Lock()
defer cc.lock.Unlock()
if cc.discard {
return
}
d := cc.data[tp]
if d == nil {
d = make(map[any]any)
cc.data[tp] = d
}
d[key] = value
}
func (cc *cacheContext) Delete(tp, key any) {
cc.lock.Lock()
defer cc.lock.Unlock()
delete(cc.data[tp], key)
}
func (cc *cacheContext) Discard() {
cc.lock.Lock()
defer cc.lock.Unlock()
cc.data = nil
cc.discard = true
}
func (cc *cacheContext) isDiscard() bool {
cc.lock.RLock()
defer cc.lock.RUnlock()
return cc.discard
}
// cacheContextLifetime is the max lifetime of cacheContext.
// Since cacheContext is used to cache data in a request level context, 5 minutes is enough.
// If a cacheContext is used more than 5 minutes, it's probably misuse.
const cacheContextLifetime = 5 * time.Minute
var timeNow = time.Now
func (cc *cacheContext) Expired() bool {
return timeNow().Sub(cc.created) > cacheContextLifetime
}
var cacheContextKey = struct{}{}
/*
Since there are both WithCacheContext and WithNoCacheContext,
it may be confusing when there is nesting.
Some cases to explain the design:
When:
- A, B or C means a cache context.
- A', B' or C' means a discard cache context.
- ctx means context.Backgrand().
- A(ctx) means a cache context with ctx as the parent context.
- B(A(ctx)) means a cache context with A(ctx) as the parent context.
- With is alias of WithCacheContext.
- WithNo is alias of WithNoCacheContext.
So:
- With(ctx) -> A(ctx)
- With(With(ctx)) -> A(ctx), not B(A(ctx)), always reuse parent cache context if possible.
- With(With(With(ctx))) -> A(ctx), not C(B(A(ctx))), ditto.
- WithNo(ctx) -> ctx, not A'(ctx), don't create new cache context if we don't have to.
- WithNo(With(ctx)) -> A'(ctx)
- WithNo(WithNo(With(ctx))) -> A'(ctx), not B'(A'(ctx)), don't create new cache context if we don't have to.
- With(WithNo(With(ctx))) -> B(A'(ctx)), not A(ctx), never reuse a discard cache context.
- WithNo(With(WithNo(With(ctx)))) -> B'(A'(ctx))
- With(WithNo(With(WithNo(With(ctx))))) -> C(B'(A'(ctx))), so there's always only one not-discard cache context.
*/
// contextCacheLifetime is the max lifetime of context cache.
// Since context cache is used to cache data in a request level context, 5 minutes is enough.
// If a context cache is used more than 5 minutes, it's probably abused.
const contextCacheLifetime = 5 * time.Minute
func WithCacheContext(ctx context.Context) context.Context {
if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok {
if !c.isDiscard() {
// reuse parent context
return ctx
}
}
// FIXME: review the use of this nolint directive
return context.WithValue(ctx, cacheContextKey, &cacheContext{ //nolint:staticcheck
data: make(map[any]map[any]any),
created: timeNow(),
})
}
func WithNoCacheContext(ctx context.Context) context.Context {
if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok {
// The caller want to run long-life tasks, but the parent context is a cache context.
// So we should disable and clean the cache data, or it will be kept in memory for a long time.
c.Discard()
if c := GetContextCache(ctx); c != nil {
return ctx
}
return ctx
return context.WithValue(ctx, cacheContextKey, NewEphemeralCache(contextCacheLifetime))
}
func GetContextData(ctx context.Context, tp, key any) any {
if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok {
if c.Expired() {
// The warning means that the cache context is misused for long-life task,
// it can be resolved with WithNoCacheContext(ctx).
log.Warn("cache context is expired, is highly likely to be misused for long-life tasks: %v", c)
return nil
}
return c.Get(tp, key)
}
return nil
}
func SetContextData(ctx context.Context, tp, key, value any) {
if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok {
if c.Expired() {
// The warning means that the cache context is misused for long-life task,
// it can be resolved with WithNoCacheContext(ctx).
log.Warn("cache context is expired, is highly likely to be misused for long-life tasks: %v", c)
return
}
c.Put(tp, key, value)
return
}
}
func RemoveContextData(ctx context.Context, tp, key any) {
if c, ok := ctx.Value(cacheContextKey).(*cacheContext); ok {
if c.Expired() {
// The warning means that the cache context is misused for long-life task,
// it can be resolved with WithNoCacheContext(ctx).
log.Warn("cache context is expired, is highly likely to be misused for long-life tasks: %v", c)
return
}
c.Delete(tp, key)
}
func GetContextCache(ctx context.Context) *EphemeralCache {
c, _ := ctx.Value(cacheContextKey).(*EphemeralCache)
return c
}
// GetWithContextCache returns the cache value of the given key in the given context.
// FIXME: in some cases, the "context cache" should not be used, because it has uncontrollable behaviors
// For example, these calls:
// * GetWithContextCache(TargetID) -> OtherCodeCreateModel(TargetID) -> GetWithContextCache(TargetID)
// Will cause the second call is not able to get the correct created target.
// UNLESS it is certain that the target won't be changed during the request, DO NOT use it.
func GetWithContextCache[T, K any](ctx context.Context, groupKey string, targetKey K, f func(context.Context, K) (T, error)) (T, error) {
v := GetContextData(ctx, groupKey, targetKey)
if vv, ok := v.(T); ok {
return vv, nil
if c := GetContextCache(ctx); c != nil {
return GetWithEphemeralCache(ctx, c, groupKey, targetKey, f)
}
t, err := f(ctx, targetKey)
if err != nil {
return t, err
}
SetContextData(ctx, groupKey, targetKey, t)
return t, nil
return f(ctx, targetKey)
}

View File

@ -8,27 +8,29 @@ import (
"testing"
"time"
"code.gitea.io/gitea/modules/test"
"github.com/stretchr/testify/assert"
)
func TestWithCacheContext(t *testing.T) {
ctx := WithCacheContext(t.Context())
v := GetContextData(ctx, "empty_field", "my_config1")
c := GetContextCache(ctx)
v, _ := c.Get("empty_field", "my_config1")
assert.Nil(t, v)
const field = "system_setting"
v = GetContextData(ctx, field, "my_config1")
v, _ = c.Get(field, "my_config1")
assert.Nil(t, v)
SetContextData(ctx, field, "my_config1", 1)
v = GetContextData(ctx, field, "my_config1")
c.Put(field, "my_config1", 1)
v, _ = c.Get(field, "my_config1")
assert.NotNil(t, v)
assert.Equal(t, 1, v.(int))
RemoveContextData(ctx, field, "my_config1")
RemoveContextData(ctx, field, "my_config2") // remove a non-exist key
c.Delete(field, "my_config1")
c.Delete(field, "my_config2") // remove a non-exist key
v = GetContextData(ctx, field, "my_config1")
v, _ = c.Get(field, "my_config1")
assert.Nil(t, v)
vInt, err := GetWithContextCache(ctx, field, "my_config1", func(context.Context, string) (int, error) {
@ -37,42 +39,12 @@ func TestWithCacheContext(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, 1, vInt)
v = GetContextData(ctx, field, "my_config1")
v, _ = c.Get(field, "my_config1")
assert.EqualValues(t, 1, v)
now := timeNow
defer func() {
timeNow = now
}()
timeNow = func() time.Time {
return now().Add(5 * time.Minute)
}
v = GetContextData(ctx, field, "my_config1")
defer test.MockVariableValue(&timeNow, func() time.Time {
return time.Now().Add(5 * time.Minute)
})()
v, _ = c.Get(field, "my_config1")
assert.Nil(t, v)
}
func TestWithNoCacheContext(t *testing.T) {
ctx := t.Context()
const field = "system_setting"
v := GetContextData(ctx, field, "my_config1")
assert.Nil(t, v)
SetContextData(ctx, field, "my_config1", 1)
v = GetContextData(ctx, field, "my_config1")
assert.Nil(t, v) // still no cache
ctx = WithCacheContext(ctx)
v = GetContextData(ctx, field, "my_config1")
assert.Nil(t, v)
SetContextData(ctx, field, "my_config1", 1)
v = GetContextData(ctx, field, "my_config1")
assert.NotNil(t, v)
ctx = WithNoCacheContext(ctx)
v = GetContextData(ctx, field, "my_config1")
assert.Nil(t, v)
SetContextData(ctx, field, "my_config1", 1)
v = GetContextData(ctx, field, "my_config1")
assert.Nil(t, v) // still no cache
}

90
modules/cache/ephemeral.go vendored Normal file
View File

@ -0,0 +1,90 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package cache
import (
"context"
"sync"
"time"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"
)
// EphemeralCache is a cache that can be used to store data in a request level context
// This is useful for caching data that is expensive to calculate and is likely to be
// used multiple times in a request.
type EphemeralCache struct {
data map[any]map[any]any
lock sync.RWMutex
created time.Time
checkLifeTime time.Duration
}
var timeNow = time.Now
func NewEphemeralCache(checkLifeTime ...time.Duration) *EphemeralCache {
return &EphemeralCache{
data: make(map[any]map[any]any),
created: timeNow(),
checkLifeTime: util.OptionalArg(checkLifeTime, 0),
}
}
func (cc *EphemeralCache) checkExceededLifeTime(tp, key any) bool {
if cc.checkLifeTime > 0 && timeNow().Sub(cc.created) > cc.checkLifeTime {
log.Warn("EphemeralCache is expired, is highly likely to be abused for long-life tasks: %v, %v", tp, key)
return true
}
return false
}
func (cc *EphemeralCache) Get(tp, key any) (any, bool) {
if cc.checkExceededLifeTime(tp, key) {
return nil, false
}
cc.lock.RLock()
defer cc.lock.RUnlock()
ret, ok := cc.data[tp][key]
return ret, ok
}
func (cc *EphemeralCache) Put(tp, key, value any) {
if cc.checkExceededLifeTime(tp, key) {
return
}
cc.lock.Lock()
defer cc.lock.Unlock()
d := cc.data[tp]
if d == nil {
d = make(map[any]any)
cc.data[tp] = d
}
d[key] = value
}
func (cc *EphemeralCache) Delete(tp, key any) {
if cc.checkExceededLifeTime(tp, key) {
return
}
cc.lock.Lock()
defer cc.lock.Unlock()
delete(cc.data[tp], key)
}
func GetWithEphemeralCache[T, K any](ctx context.Context, c *EphemeralCache, groupKey string, targetKey K, f func(context.Context, K) (T, error)) (T, error) {
v, has := c.Get(groupKey, targetKey)
if vv, ok := v.(T); has && ok {
return vv, nil
}
t, err := f(ctx, targetKey)
if err != nil {
return t, err
}
c.Put(groupKey, targetKey, t)
return t, nil
}