This commit is contained in:
Lunny Xiao 2025-04-13 12:50:18 +08:00 committed by GitHub
commit 0ed20679d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 65 additions and 77 deletions

View File

@ -4,7 +4,6 @@
package repo
import (
"errors"
"net/http"
"code.gitea.io/gitea/modules/templates"
@ -30,11 +29,7 @@ func CodeFrequency(ctx *context.Context) {
// CodeFrequencyData returns JSON of code frequency data
func CodeFrequencyData(ctx *context.Context) {
if contributorStats, err := contributors_service.GetContributorStats(ctx, ctx.Cache, ctx.Repo.Repository, ctx.Repo.Repository.DefaultBranch); err != nil {
if errors.Is(err, contributors_service.ErrAwaitGeneration) {
ctx.Status(http.StatusAccepted)
return
}
ctx.ServerError("GetContributorStats", err)
ctx.ServerError("GetCodeFrequencyData", err)
} else {
ctx.JSON(http.StatusOK, contributorStats["total"].Weeks)
}

View File

@ -4,7 +4,6 @@
package repo
import (
"errors"
"net/http"
"code.gitea.io/gitea/modules/templates"
@ -27,10 +26,6 @@ func Contributors(ctx *context.Context) {
// ContributorsData renders JSON of contributors along with their weekly commit statistics
func ContributorsData(ctx *context.Context) {
if contributorStats, err := contributors_service.GetContributorStats(ctx, ctx.Cache, ctx.Repo.Repository, ctx.Repo.Repository.DefaultBranch); err != nil {
if errors.Is(err, contributors_service.ErrAwaitGeneration) {
ctx.Status(http.StatusAccepted)
return
}
ctx.ServerError("GetContributorStats", err)
} else {
ctx.JSON(http.StatusOK, contributorStats)

View File

@ -6,12 +6,10 @@ package repository
import (
"bufio"
"context"
"errors"
"fmt"
"os"
"strconv"
"strings"
"sync"
"time"
"code.gitea.io/gitea/models/avatars"
@ -20,8 +18,9 @@ import (
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/globallock"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
)
@ -30,12 +29,6 @@ const (
contributorStatsCacheTimeout int64 = 60 * 10
)
var (
ErrAwaitGeneration = errors.New("generation took longer than ")
awaitGenerationTime = time.Second * 5
generateLock = sync.Map{}
)
type WeekData struct {
Week int64 `json:"week"` // Starting day of the week as Unix timestamp
Additions int `json:"additions"` // Number of additions in that week
@ -81,41 +74,46 @@ func findLastSundayBeforeDate(dateStr string) (string, error) {
func GetContributorStats(ctx context.Context, cache cache.StringCache, repo *repo_model.Repository, revision string) (map[string]*ContributorData, error) {
// as GetContributorStats is resource intensive we cache the result
cacheKey := fmt.Sprintf(contributorStatsCacheKey, repo.FullName(), revision)
if !cache.IsExist(cacheKey) {
genReady := make(chan struct{})
// dont start multiple async generations
_, run := generateLock.Load(cacheKey)
if run {
return nil, ErrAwaitGeneration
if cache.IsExist(cacheKey) {
// TODO: renew timeout of cache cache.UpdateTimeout(cacheKey, contributorStatsCacheTimeout)
var res map[string]*ContributorData
if _, cacheErr := cache.GetJSON(cacheKey, &res); cacheErr != nil {
return nil, fmt.Errorf("cached error: %w", cacheErr.ToError())
}
return res, nil
}
generateLock.Store(cacheKey, struct{}{})
// run generation async
go generateContributorStats(genReady, cache, cacheKey, repo, revision)
// dont start multiple generations for the same repository and same revision
releaser, err := globallock.Lock(ctx, cacheKey)
if err != nil {
return nil, err
}
defer releaser()
select {
case <-time.After(awaitGenerationTime):
return nil, ErrAwaitGeneration
case <-genReady:
// we got generation ready before timeout
break
// check if generation is already completed by other request when we were waiting for lock
if cache.IsExist(cacheKey) {
var res map[string]*ContributorData
if _, cacheErr := cache.GetJSON(cacheKey, &res); cacheErr != nil {
return nil, fmt.Errorf("cached error: %w", cacheErr.ToError())
}
return res, nil
}
// TODO: renew timeout of cache cache.UpdateTimeout(cacheKey, contributorStatsCacheTimeout)
var res map[string]*ContributorData
if _, cacheErr := cache.GetJSON(cacheKey, &res); cacheErr != nil {
return nil, fmt.Errorf("cached error: %w", cacheErr.ToError())
ctx, cancel := context.WithTimeout(ctx, time.Duration(setting.Git.Timeout.Default)*time.Second)
defer cancel()
res, err := generateContributorStats(ctx, repo, revision)
if err != nil {
return nil, err
}
cancel()
_ = cache.PutJSON(cacheKey, res, contributorStatsCacheTimeout)
return res, nil
}
// getExtendedCommitStats return the list of *ExtendedCommitStats for the given revision
func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int */) ([]*ExtendedCommitStats, error) {
baseCommit, err := repo.GetCommit(revision)
if err != nil {
return nil, err
}
func getExtendedCommitStats(ctx context.Context, repoPath string, baseCommit *git.Commit) ([]*ExtendedCommitStats, error) {
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, err
@ -131,8 +129,8 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int
var extendedCommitStats []*ExtendedCommitStats
stderr := new(strings.Builder)
err = gitCmd.Run(repo.Ctx, &git.RunOpts{
Dir: repo.Path,
err = gitCmd.Run(ctx, &git.RunOpts{
Dir: repoPath,
Stdout: stdoutWriter,
Stderr: stderr,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
@ -140,6 +138,12 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int
scanner := bufio.NewScanner(stdoutReader)
for scanner.Scan() {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
line := strings.TrimSpace(scanner.Text())
if line != "---" {
continue
@ -193,33 +197,32 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int
},
})
if err != nil {
return nil, fmt.Errorf("Failed to get ContributorsCommitStats for repository.\nError: %w\nStderr: %s", err, stderr)
return nil, fmt.Errorf("failed to get ContributorsCommitStats for repository.\nError: %w\nStderr: %s", err, stderr)
}
return extendedCommitStats, nil
}
func generateContributorStats(genDone chan struct{}, cache cache.StringCache, cacheKey string, repo *repo_model.Repository, revision string) {
ctx := graceful.GetManager().HammerContext()
func generateContributorStats(ctx context.Context, repo *repo_model.Repository, revision string) (map[string]*ContributorData, error) {
gitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, repo)
if err != nil {
_ = cache.PutJSON(cacheKey, fmt.Errorf("OpenRepository: %w", err), contributorStatsCacheTimeout)
return
return nil, err
}
defer closer.Close()
if len(revision) == 0 {
revision = repo.DefaultBranch
}
extendedCommitStats, err := getExtendedCommitStats(gitRepo, revision)
baseCommit, err := gitRepo.GetCommit(revision)
if err != nil {
_ = cache.PutJSON(cacheKey, fmt.Errorf("ExtendedCommitStats: %w", err), contributorStatsCacheTimeout)
return
return nil, err
}
extendedCommitStats, err := getExtendedCommitStats(ctx, repo.RepoPath(), baseCommit)
if err != nil {
return nil, err
}
if len(extendedCommitStats) == 0 {
_ = cache.PutJSON(cacheKey, fmt.Errorf("no commit stats returned for revision '%s'", revision), contributorStatsCacheTimeout)
return
return nil, fmt.Errorf("no commit stats returned for revision '%s'", revision)
}
layout := time.DateOnly
@ -232,12 +235,17 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca
}
total := contributorsCommitStats["total"]
emailUserCache := make(map[string]*user_model.User)
for _, v := range extendedCommitStats {
userEmail := v.Author.Email
if len(userEmail) == 0 {
continue
}
u, _ := user_model.GetUserByEmail(ctx, userEmail)
u, ok := emailUserCache[userEmail]
if !ok {
u, _ = user_model.GetUserByEmail(ctx, userEmail)
emailUserCache[userEmail] = u
}
if u != nil {
// update userEmail with user's primary email address so
// that different mail addresses will linked to same account
@ -300,9 +308,5 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca
total.TotalCommits++
}
_ = cache.PutJSON(cacheKey, contributorsCommitStats, contributorStatsCacheTimeout)
generateLock.Delete(cacheKey)
if genDone != nil {
genDone <- struct{}{}
}
return contributorsCommitStats, nil
}

View File

@ -10,8 +10,6 @@ import (
"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/setting"
"github.com/stretchr/testify/assert"
)
@ -21,18 +19,14 @@ func TestRepository_ContributorsGraph(t *testing.T) {
repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 2})
assert.NoError(t, repo.LoadOwner(db.DefaultContext))
mockCache, err := cache.NewStringCache(setting.Cache{})
data, err := generateContributorStats(t.Context(), repo, "404ref")
assert.ErrorContains(t, err, "object does not exist")
assert.Nil(t, data)
data, err = generateContributorStats(t.Context(), repo, "master")
assert.NoError(t, err)
generateContributorStats(nil, mockCache, "key", repo, "404ref")
var data map[string]*ContributorData
_, getErr := mockCache.GetJSON("key", &data)
assert.NotNil(t, getErr)
assert.ErrorContains(t, getErr.ToError(), "object does not exist")
generateContributorStats(nil, mockCache, "key2", repo, "master")
exist, _ := mockCache.GetJSON("key2", &data)
assert.True(t, exist)
assert.NotNil(t, data)
var keys []string
for k := range data {
keys = append(keys, k)