forgejo/modules/git/repo_attribute.go
forgejo-backport-action 9f80081795 [v7.0/forgejo] [REFACTOR] git attribute: test proper cancellation and unify nul-byte reader (#2939)
**Backport:** https://codeberg.org/forgejo/forgejo/pulls/2906

Following #2763 (refactor of git check-attr)
and #2866 (wrong log.Error format in check-attr)

- refactors the `nul-byte` reader to be used in both the streaming and one-off cases.
- add test for some failure cases
- don't log the error returned by `cmd.Run`, but return it to the `CheckPath` caller (which can then decide what to do with it).

This should solve the following flaky `log.Error` (or at least move it to the caller, instead of being inside a random goroutine):

https://codeberg.org/forgejo/forgejo/actions/runs/9541/jobs/5#jobstep-7-839

> FATAL ERROR: log.Error has been called: 2024/03/28 14:30:33 ...it/repo_attribute.go:313:func2() [E] Unable to open checker for 3fa2f829675543ecfc16b2891aebe8bf0608a8f4. Error: failed to run attr-check. Error: exit status 128
        Stderr: fatal: this operation must be run in a work tree

Co-authored-by: oliverpool <git@olivier.pfad.fr>
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/2939
Reviewed-by: oliverpool <oliverpool@noreply.codeberg.org>
Co-authored-by: forgejo-backport-action <forgejo-backport-action@noreply.codeberg.org>
Co-committed-by: forgejo-backport-action <forgejo-backport-action@noreply.codeberg.org>
2024-04-01 06:44:46 +00:00

286 lines
8.3 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"bufio"
"bytes"
"context"
"fmt"
"io"
"os"
"strings"
"sync/atomic"
"code.gitea.io/gitea/modules/optional"
)
var LinguistAttributes = []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"}
// newCheckAttrStdoutReader parses the nul-byte separated output of git check-attr on each call of
// the returned function. The first reading error will stop the reading and be returned on all
// subsequent calls.
func newCheckAttrStdoutReader(r io.Reader, count int) func() (map[string]GitAttribute, error) {
scanner := bufio.NewScanner(r)
// adapted from bufio.ScanLines to split on nul-byte \x00
scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexByte(data, '\x00'); i >= 0 {
// We have a full nul-terminated line.
return i + 1, data[0:i], nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}
// Request more data.
return 0, nil, nil
})
var err error
nextText := func() string {
if err != nil {
return ""
}
if !scanner.Scan() {
err = scanner.Err()
if err == nil {
err = io.ErrUnexpectedEOF
}
return ""
}
return scanner.Text()
}
nextAttribute := func() (string, GitAttribute, error) {
nextText() // discard filename
key := nextText()
value := GitAttribute(nextText())
return key, value, err
}
return func() (map[string]GitAttribute, error) {
values := make(map[string]GitAttribute, count)
for range count {
k, v, err := nextAttribute()
if err != nil {
return values, err
}
values[k] = v
}
return values, scanner.Err()
}
}
// GitAttribute exposes an attribute from the .gitattribute file
type GitAttribute string //nolint:revive
// IsSpecified returns true if the gitattribute is set and not empty
func (ca GitAttribute) IsSpecified() bool {
return ca != "" && ca != "unspecified"
}
// String returns the value of the attribute or "" if unspecified
func (ca GitAttribute) String() string {
if !ca.IsSpecified() {
return ""
}
return string(ca)
}
// Prefix returns the value of the attribute before any question mark '?'
//
// sometimes used within gitlab-language: https://docs.gitlab.com/ee/user/project/highlighting.html#override-syntax-highlighting-for-a-file-type
func (ca GitAttribute) Prefix() string {
s := ca.String()
if i := strings.IndexByte(s, '?'); i >= 0 {
return s[:i]
}
return s
}
// Bool returns true if "set"/"true", false if "unset"/"false", none otherwise
func (ca GitAttribute) Bool() optional.Option[bool] {
switch ca {
case "set", "true":
return optional.Some(true)
case "unset", "false":
return optional.Some(false)
}
return optional.None[bool]()
}
// gitCheckAttrCommand prepares the "git check-attr" command for later use as one-shot or streaming
// instanciation.
func (repo *Repository) gitCheckAttrCommand(treeish string, attributes ...string) (*Command, *RunOpts, context.CancelFunc, error) {
if len(attributes) == 0 {
return nil, nil, nil, fmt.Errorf("no provided attributes to check-attr")
}
env := os.Environ()
var removeTempFiles context.CancelFunc = func() {}
// git < 2.40 cannot run check-attr on bare repo, but needs INDEX + WORK_TREE
hasIndex := treeish == ""
if !hasIndex && !SupportCheckAttrOnBare {
indexFilename, worktree, cancel, err := repo.ReadTreeToTemporaryIndex(treeish)
if err != nil {
return nil, nil, nil, err
}
removeTempFiles = cancel
env = append(env, "GIT_INDEX_FILE="+indexFilename, "GIT_WORK_TREE="+worktree)
hasIndex = true
// clear treeish to read from provided index/work_tree
treeish = ""
}
cmd := NewCommand(repo.Ctx, "check-attr", "-z")
if hasIndex {
cmd.AddArguments("--cached")
}
if len(treeish) > 0 {
cmd.AddArguments("--source")
cmd.AddDynamicArguments(treeish)
}
cmd.AddDynamicArguments(attributes...)
// Version 2.43.1 has a bug where the behavior of `GIT_FLUSH` is flipped.
// Ref: https://lore.kernel.org/git/CABn0oJvg3M_kBW-u=j3QhKnO=6QOzk-YFTgonYw_UvFS1NTX4g@mail.gmail.com
if InvertedGitFlushEnv {
env = append(env, "GIT_FLUSH=0")
} else {
env = append(env, "GIT_FLUSH=1")
}
return cmd, &RunOpts{
Env: env,
Dir: repo.Path,
}, removeTempFiles, nil
}
// GitAttributeFirst returns the first specified attribute of the given filename.
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributeFirst(treeish, filename string, attributes ...string) (GitAttribute, error) {
values, err := repo.GitAttributes(treeish, filename, attributes...)
if err != nil {
return "", err
}
for _, a := range attributes {
if values[a].IsSpecified() {
return values[a], nil
}
}
return "", nil
}
// GitAttributes returns the gitattribute of the given filename.
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributes(treeish, filename string, attributes ...string) (map[string]GitAttribute, error) {
cmd, runOpts, removeTempFiles, err := repo.gitCheckAttrCommand(treeish, attributes...)
if err != nil {
return nil, err
}
defer removeTempFiles()
stdOut := new(bytes.Buffer)
runOpts.Stdout = stdOut
stdErr := new(bytes.Buffer)
runOpts.Stderr = stdErr
cmd.AddDashesAndList(filename)
if err := cmd.Run(runOpts); err != nil {
return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String())
}
return newCheckAttrStdoutReader(stdOut, len(attributes))()
}
// GitAttributeChecker creates an AttributeChecker for the given repository and provided commit ID
// to retrieve the attributes of multiple files. The AttributeChecker must be closed after use.
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributeChecker(treeish string, attributes ...string) (AttributeChecker, error) {
cmd, runOpts, removeTempFiles, err := repo.gitCheckAttrCommand(treeish, attributes...)
if err != nil {
return AttributeChecker{}, err
}
cmd.AddArguments("--stdin")
// os.Pipe is needed (and not io.Pipe), otherwise cmd.Wait will wait for the stdinReader
// to be closed before returning (which would require another goroutine)
// https://go.dev/issue/23019
stdinReader, stdinWriter, err := os.Pipe() // reader closed in goroutine / writer closed on ac.Close
if err != nil {
return AttributeChecker{}, err
}
stdoutReader, stdoutWriter := io.Pipe() // closed in goroutine
ac := AttributeChecker{
removeTempFiles: removeTempFiles, // called on ac.Close
stdinWriter: stdinWriter,
readStdout: newCheckAttrStdoutReader(stdoutReader, len(attributes)),
err: &atomic.Value{},
}
go func() {
defer stdinReader.Close()
defer stdoutWriter.Close() // in case of a panic (no-op if already closed by CloseWithError at the end)
stdErr := new(bytes.Buffer)
runOpts.Stdin = stdinReader
runOpts.Stdout = stdoutWriter
runOpts.Stderr = stdErr
err := cmd.Run(runOpts)
// if the context was cancelled, Run error is irrelevant
if e := cmd.parentContext.Err(); e != nil {
err = e
}
if err != nil { // decorate the returned error
err = fmt.Errorf("git check-attr (stderr: %q): %w", strings.TrimSpace(stdErr.String()), err)
ac.err.Store(err)
}
stdoutWriter.CloseWithError(err)
}()
return ac, nil
}
type AttributeChecker struct {
removeTempFiles context.CancelFunc
stdinWriter io.WriteCloser
readStdout func() (map[string]GitAttribute, error)
err *atomic.Value
}
func (ac AttributeChecker) CheckPath(path string) (map[string]GitAttribute, error) {
if _, err := ac.stdinWriter.Write([]byte(path + "\x00")); err != nil {
// try to return the Run error if available, since it is likely more helpful
// than just "broken pipe"
if aerr, _ := ac.err.Load().(error); aerr != nil {
return nil, aerr
}
return nil, fmt.Errorf("git check-attr: %w", err)
}
return ac.readStdout()
}
func (ac AttributeChecker) Close() error {
ac.removeTempFiles()
return ac.stdinWriter.Close()
}