forgejo/modules/git/grep.go
Radosław Piliszek 824dd6bc5d git-grep: set timeout to 2s by default and allow configuring it
We need to shorten the timeout to bound effectively for
computation size. This protects against "too big" repos.
This also protects to some extent against too long lines
if kept to very low values (basically so that grep cannot run out
of memory beforehand).

Docs-PR: forgejo/docs#812
2024-08-14 07:55:05 +02:00

181 lines
4.5 KiB
Go

// Copyright 2024 The Gitea Authors. All rights reserved.
// Copyright 2024 The Forgejo Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"bufio"
"bytes"
"cmp"
"context"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"
"time"
"code.gitea.io/gitea/modules/setting"
)
type GrepResult struct {
Filename string
LineNumbers []int
LineCodes []string
HighlightedRanges [][3]int
}
type GrepOptions struct {
RefName string
MaxResultLimit int
MatchesPerFile int
ContextLineNumber int
IsFuzzy bool
PathSpec []setting.Glob
}
func hasPrefixFold(s, t string) bool {
if len(s) < len(t) {
return false
}
return strings.EqualFold(s[:len(t)], t)
}
func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("unable to create os pipe to grep: %w", err)
}
defer func() {
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
}()
/*
The output is like this ("^@" means \x00; the first number denotes the line,
the second number denotes the column of the first match in line):
HEAD:.air.toml
6^@8^@bin = "gitea"
HEAD:.changelog.yml
2^@10^@repo: go-gitea/gitea
*/
var results []*GrepResult
cmd := NewCommand(ctx, "grep",
"--null", "--break", "--heading", "--column",
"--fixed-strings", "--line-number", "--ignore-case", "--full-name")
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
if opts.MatchesPerFile > 0 {
cmd.AddOptionValues("--max-count", fmt.Sprint(opts.MatchesPerFile))
}
words := []string{search}
if opts.IsFuzzy {
words = strings.Fields(search)
}
for _, word := range words {
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
}
// pathspec
files := make([]string, 0,
len(setting.Indexer.IncludePatterns)+
len(setting.Indexer.ExcludePatterns)+
len(opts.PathSpec))
for _, expr := range append(setting.Indexer.IncludePatterns, opts.PathSpec...) {
files = append(files, ":"+expr.Pattern())
}
for _, expr := range setting.Indexer.ExcludePatterns {
files = append(files, ":^"+expr.Pattern())
}
cmd.AddDynamicArguments(cmp.Or(opts.RefName, "HEAD")).AddDashesAndList(files...)
opts.MaxResultLimit = cmp.Or(opts.MaxResultLimit, 50)
stderr := bytes.Buffer{}
err = cmd.Run(&RunOpts{
Timeout: time.Duration(setting.Git.Timeout.Grep) * time.Second,
Dir: repo.Path,
Stdout: stdoutWriter,
Stderr: &stderr,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
defer stdoutReader.Close()
isInBlock := false
scanner := bufio.NewReader(stdoutReader)
var res *GrepResult
for {
line, err := scanner.ReadString('\n')
if err != nil {
if err == io.EOF {
return nil
}
return err
}
// Remove delimiter.
if len(line) > 0 {
line = line[:len(line)-1]
}
if !isInBlock {
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
isInBlock = true
res = &GrepResult{Filename: filename}
results = append(results, res)
}
continue
}
if line == "" {
if len(results) >= opts.MaxResultLimit {
cancel()
break
}
isInBlock = false
continue
}
if line == "--" {
continue
}
if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok {
lineNumInt, _ := strconv.Atoi(lineNum)
res.LineNumbers = append(res.LineNumbers, lineNumInt)
if lineCol, lineCode2, ok := strings.Cut(lineCode, "\x00"); ok {
lineColInt, _ := strconv.Atoi(lineCol)
start := lineColInt - 1
matchLen := len(lineCode2)
for _, word := range words {
if hasPrefixFold(lineCode2[start:], word) {
matchLen = len(word)
break
}
}
res.HighlightedRanges = append(res.HighlightedRanges, [3]int{
len(res.LineCodes),
start,
start + matchLen,
})
res.LineCodes = append(res.LineCodes, lineCode2)
continue
}
res.LineCodes = append(res.LineCodes, lineCode)
}
}
return nil
},
})
// git grep exits by cancel (killed), usually it is caused by the limit of results
if IsErrorExitCode(err, -1) && stderr.Len() == 0 {
return results, nil
}
// git grep exits with 1 if no results are found
if IsErrorExitCode(err, 1) && stderr.Len() == 0 {
return nil, nil
}
if err != nil && !errors.Is(err, context.Canceled) {
return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String())
}
return results, nil
}