From 0f3e717a1abb2b2161b87dac557beb6475224a2e Mon Sep 17 00:00:00 2001
From: wxiaoguang <wxiaoguang@gmail.com>
Date: Fri, 3 May 2024 17:13:48 +0800
Subject: [PATCH] Improve grep search (#30843)

Reduce the context line number to 1, make "git grep" search respect the
include/exclude patter, and fix #30785
---
 modules/git/grep.go             |  2 ++
 modules/git/grep_test.go        | 20 ++++++++++++++++++++
 modules/setting/glob.go         | 32 ++++++++++++++++++++++++++++++++
 modules/setting/indexer.go      | 12 +++++-------
 routers/web/repo/search.go      | 18 +++++++++++++++++-
 routers/web/repo/search_test.go | 19 +++++++++++++++++++
 6 files changed, 95 insertions(+), 8 deletions(-)
 create mode 100644 modules/setting/glob.go
 create mode 100644 routers/web/repo/search_test.go

diff --git a/modules/git/grep.go b/modules/git/grep.go
index e7d238e586..bf6b41a886 100644
--- a/modules/git/grep.go
+++ b/modules/git/grep.go
@@ -29,6 +29,7 @@ type GrepOptions struct {
 	ContextLineNumber int
 	IsFuzzy           bool
 	MaxLineLength     int // the maximum length of a line to parse, exceeding chars will be truncated
+	PathspecList      []string
 }
 
 func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
@@ -62,6 +63,7 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
 		cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
 	}
 	cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))
+	cmd.AddDashesAndList(opts.PathspecList...)
 	opts.MaxResultLimit = util.IfZero(opts.MaxResultLimit, 50)
 	stderr := bytes.Buffer{}
 	err = cmd.Run(&RunOpts{
diff --git a/modules/git/grep_test.go b/modules/git/grep_test.go
index 7f4ded478f..6a99f80407 100644
--- a/modules/git/grep_test.go
+++ b/modules/git/grep_test.go
@@ -31,6 +31,26 @@ func TestGrepSearch(t *testing.T) {
 		},
 	}, res)
 
+	res, err = GrepSearch(context.Background(), repo, "void", GrepOptions{PathspecList: []string{":(glob)java-hello/*"}})
+	assert.NoError(t, err)
+	assert.Equal(t, []*GrepResult{
+		{
+			Filename:    "java-hello/main.java",
+			LineNumbers: []int{3},
+			LineCodes:   []string{" public static void main(String[] args)"},
+		},
+	}, res)
+
+	res, err = GrepSearch(context.Background(), repo, "void", GrepOptions{PathspecList: []string{":(glob,exclude)java-hello/*"}})
+	assert.NoError(t, err)
+	assert.Equal(t, []*GrepResult{
+		{
+			Filename:    "main.vendor.java",
+			LineNumbers: []int{3},
+			LineCodes:   []string{" public static void main(String[] args)"},
+		},
+	}, res)
+
 	res, err = GrepSearch(context.Background(), repo, "void", GrepOptions{MaxResultLimit: 1})
 	assert.NoError(t, err)
 	assert.Equal(t, []*GrepResult{
diff --git a/modules/setting/glob.go b/modules/setting/glob.go
new file mode 100644
index 0000000000..8f1d24dea4
--- /dev/null
+++ b/modules/setting/glob.go
@@ -0,0 +1,32 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package setting
+
+import "github.com/gobwas/glob"
+
+type GlobMatcher struct {
+	compiledGlob  glob.Glob
+	patternString string
+}
+
+var _ glob.Glob = (*GlobMatcher)(nil)
+
+func (g *GlobMatcher) Match(s string) bool {
+	return g.compiledGlob.Match(s)
+}
+
+func (g *GlobMatcher) PatternString() string {
+	return g.patternString
+}
+
+func GlobMatcherCompile(pattern string, separators ...rune) (*GlobMatcher, error) {
+	g, err := glob.Compile(pattern, separators...)
+	if err != nil {
+		return nil, err
+	}
+	return &GlobMatcher{
+		compiledGlob:  g,
+		patternString: pattern,
+	}, nil
+}
diff --git a/modules/setting/indexer.go b/modules/setting/indexer.go
index 6877d70e3c..18585602c3 100644
--- a/modules/setting/indexer.go
+++ b/modules/setting/indexer.go
@@ -10,8 +10,6 @@ import (
 	"time"
 
 	"code.gitea.io/gitea/modules/log"
-
-	"github.com/gobwas/glob"
 )
 
 // Indexer settings
@@ -30,8 +28,8 @@ var Indexer = struct {
 	RepoConnStr          string
 	RepoIndexerName      string
 	MaxIndexerFileSize   int64
-	IncludePatterns      []glob.Glob
-	ExcludePatterns      []glob.Glob
+	IncludePatterns      []*GlobMatcher
+	ExcludePatterns      []*GlobMatcher
 	ExcludeVendored      bool
 }{
 	IssueType:        "bleve",
@@ -93,12 +91,12 @@ func loadIndexerFrom(rootCfg ConfigProvider) {
 }
 
 // IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing
-func IndexerGlobFromString(globstr string) []glob.Glob {
-	extarr := make([]glob.Glob, 0, 10)
+func IndexerGlobFromString(globstr string) []*GlobMatcher {
+	extarr := make([]*GlobMatcher, 0, 10)
 	for _, expr := range strings.Split(strings.ToLower(globstr), ",") {
 		expr = strings.TrimSpace(expr)
 		if expr != "" {
-			if g, err := glob.Compile(expr, '.', '/'); err != nil {
+			if g, err := GlobMatcherCompile(expr, '.', '/'); err != nil {
 				log.Info("Invalid glob expression '%s' (skipped): %v", expr, err)
 			} else {
 				extarr = append(extarr, g)
diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go
index d7854b2499..920a865555 100644
--- a/routers/web/repo/search.go
+++ b/routers/web/repo/search.go
@@ -17,6 +17,16 @@ import (
 
 const tplSearch base.TplName = "repo/search"
 
+func indexSettingToGitGrepPathspecList() (list []string) {
+	for _, expr := range setting.Indexer.IncludePatterns {
+		list = append(list, ":(glob)"+expr.PatternString())
+	}
+	for _, expr := range setting.Indexer.ExcludePatterns {
+		list = append(list, ":(glob,exclude)"+expr.PatternString())
+	}
+	return list
+}
+
 // Search render repository search page
 func Search(ctx *context.Context) {
 	language := ctx.FormTrim("l")
@@ -65,8 +75,14 @@ func Search(ctx *context.Context) {
 			ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
 		}
 	} else {
-		res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{ContextLineNumber: 3, IsFuzzy: isFuzzy})
+		res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{
+			ContextLineNumber: 1,
+			IsFuzzy:           isFuzzy,
+			RefName:           git.RefNameFromBranch(ctx.Repo.BranchName).String(), // BranchName should be default branch or the first existing branch
+			PathspecList:      indexSettingToGitGrepPathspecList(),
+		})
 		if err != nil {
+			// TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree.
 			ctx.ServerError("GrepSearch", err)
 			return
 		}
diff --git a/routers/web/repo/search_test.go b/routers/web/repo/search_test.go
new file mode 100644
index 0000000000..33a1610384
--- /dev/null
+++ b/routers/web/repo/search_test.go
@@ -0,0 +1,19 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package repo
+
+import (
+	"testing"
+
+	"code.gitea.io/gitea/modules/setting"
+	"code.gitea.io/gitea/modules/test"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestIndexSettingToGitGrepPathspecList(t *testing.T) {
+	defer test.MockVariableValue(&setting.Indexer.IncludePatterns, setting.IndexerGlobFromString("a"))()
+	defer test.MockVariableValue(&setting.Indexer.ExcludePatterns, setting.IndexerGlobFromString("b"))()
+	assert.Equal(t, []string{":(glob)a", ":(glob,exclude)b"}, indexSettingToGitGrepPathspecList())
+}