From 3c81f7478c491e284c72b07a14975dd14d0af1f8 Mon Sep 17 00:00:00 2001 From: oliverpool Date: Sun, 24 Mar 2024 12:44:30 +0100 Subject: [PATCH] [PERFORMANCE] git check-attr on bare repo if supported --- modules/git/git.go | 8 +- modules/git/repo_attribute.go | 431 +++++++++--------- modules/git/repo_attribute_test.go | 129 +++++- modules/git/repo_language_stats_nogogit.go | 59 +-- .../tests/repos/language_stats_repo/config | 2 +- .../tests/repos/language_stats_repo/logs/HEAD | 1 + .../logs/refs/heads/master | 1 + .../1e/ea60592b55dcb45c36029cc1202132e9fb756c | Bin 0 -> 63 bytes .../22/b6aa0588563508d8879f062470c8cbc7b2f2bb | Bin 0 -> 200 bytes .../34/1fca5b5ea3de596dc483e54c2db28633cd2f97 | Bin 0 -> 172 bytes .../language_stats_repo/refs/heads/master | 2 +- routers/web/repo/setting/lfs.go | 39 +- routers/web/repo/view.go | 17 +- services/gitdiff/gitdiff.go | 51 +-- services/repository/files/content.go | 29 +- services/repository/files/update.go | 8 +- services/repository/files/upload.go | 59 ++- 17 files changed, 450 insertions(+), 386 deletions(-) create mode 100644 modules/git/tests/repos/language_stats_repo/objects/1e/ea60592b55dcb45c36029cc1202132e9fb756c create mode 100644 modules/git/tests/repos/language_stats_repo/objects/22/b6aa0588563508d8879f062470c8cbc7b2f2bb create mode 100644 modules/git/tests/repos/language_stats_repo/objects/34/1fca5b5ea3de596dc483e54c2db28633cd2f97 diff --git a/modules/git/git.go b/modules/git/git.go index 13a3127498..583bd1f0ed 100644 --- a/modules/git/git.go +++ b/modules/git/git.go @@ -33,9 +33,10 @@ var ( // DefaultContext is the default context to run git commands in, must be initialized by git.InitXxx DefaultContext context.Context - SupportProcReceive bool // >= 2.29 - SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’ - InvertedGitFlushEnv bool // 2.43.1 + SupportProcReceive bool // >= 2.29 + SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’ + InvertedGitFlushEnv bool // 2.43.1 + SupportCheckAttrOnBare bool // >= 2.40 gitVersion *version.Version ) @@ -187,6 +188,7 @@ func InitFull(ctx context.Context) (err error) { } SupportProcReceive = CheckGitVersionAtLeast("2.29") == nil SupportHashSha256 = CheckGitVersionAtLeast("2.42") == nil && !isGogit + SupportCheckAttrOnBare = CheckGitVersionAtLeast("2.40") == nil if SupportHashSha256 { SupportedObjectFormats = append(SupportedObjectFormats, Sha256ObjectFormat) } else { diff --git a/modules/git/repo_attribute.go b/modules/git/repo_attribute.go index 197d626a4d..7651a4434c 100644 --- a/modules/git/repo_attribute.go +++ b/modules/git/repo_attribute.go @@ -7,65 +7,147 @@ import ( "bytes" "context" "fmt" - "io" "os" + "strings" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/optional" ) -// CheckAttributeOpts represents the possible options to CheckAttribute -type CheckAttributeOpts struct { - CachedOnly bool - AllAttributes bool - Attributes []string - Filenames []string - IndexFile string - WorkTree string +var LinguistAttributes = []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"} + +// GitAttribute exposes an attribute from the .gitattribute file +type GitAttribute string //nolint:revive + +// IsSpecified returns true if the gitattribute is set and not empty +func (ca GitAttribute) IsSpecified() bool { + return ca != "" && ca != "unspecified" } -// CheckAttribute return the Blame object of file -func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) { - env := []string{} - - if len(opts.IndexFile) > 0 { - env = append(env, "GIT_INDEX_FILE="+opts.IndexFile) +// String returns the value of the attribute or "" if unspecified +func (ca GitAttribute) String() string { + if !ca.IsSpecified() { + return "" } - if len(opts.WorkTree) > 0 { - env = append(env, "GIT_WORK_TREE="+opts.WorkTree) + return string(ca) +} + +// Prefix returns the value of the attribute before any question mark '?' +// +// sometimes used within gitlab-language: https://docs.gitlab.com/ee/user/project/highlighting.html#override-syntax-highlighting-for-a-file-type +func (ca GitAttribute) Prefix() string { + s := ca.String() + if i := strings.IndexByte(s, '?'); i >= 0 { + return s[:i] + } + return s +} + +// Bool returns true if "set"/"true", false if "unset"/"false", none otherwise +func (ca GitAttribute) Bool() optional.Option[bool] { + switch ca { + case "set", "true": + return optional.Some(true) + case "unset", "false": + return optional.Some(false) + } + return optional.None[bool]() +} + +// GitAttributeFirst returns the first specified attribute +// +// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare). +func (repo *Repository) GitAttributeFirst(treeish, filename string, attributes ...string) (GitAttribute, error) { + values, err := repo.GitAttributes(treeish, filename, attributes...) + if err != nil { + return "", err + } + for _, a := range attributes { + if values[a].IsSpecified() { + return values[a], nil + } + } + return "", nil +} + +func (repo *Repository) gitCheckAttrCommand(treeish string, attributes ...string) (*Command, *RunOpts, context.CancelFunc, error) { + if len(attributes) == 0 { + return nil, nil, nil, fmt.Errorf("no provided attributes to check-attr") } - if len(env) > 0 { - env = append(os.Environ(), env...) + env := os.Environ() + var deleteTemporaryFile context.CancelFunc + + // git < 2.40 cannot run check-attr on bare repo, but needs INDEX + WORK_TREE + hasIndex := treeish == "" + if !hasIndex && !SupportCheckAttrOnBare { + indexFilename, worktree, cancel, err := repo.ReadTreeToTemporaryIndex(treeish) + if err != nil { + return nil, nil, nil, err + } + deleteTemporaryFile = cancel + + env = append(env, "GIT_INDEX_FILE="+indexFilename, "GIT_WORK_TREE="+worktree) + + hasIndex = true + + // clear treeish to read from provided index/work_tree + treeish = "" } - - stdOut := new(bytes.Buffer) - stdErr := new(bytes.Buffer) - - cmd := NewCommand(repo.Ctx, "check-attr", "-z") - - if opts.AllAttributes { - cmd.AddArguments("-a") - } else { - for _, attribute := range opts.Attributes { - if attribute != "" { - cmd.AddDynamicArguments(attribute) - } + ctx, cancel := context.WithCancel(repo.Ctx) + if deleteTemporaryFile != nil { + ctxCancel := cancel + cancel = func() { + ctxCancel() + deleteTemporaryFile() } } - if opts.CachedOnly { + cmd := NewCommand(ctx, "check-attr", "-z") + + if hasIndex { cmd.AddArguments("--cached") } - cmd.AddDashesAndList(opts.Filenames...) + if len(treeish) > 0 { + cmd.AddArguments("--source") + cmd.AddDynamicArguments(treeish) + } + cmd.AddDynamicArguments(attributes...) - if err := cmd.Run(&RunOpts{ - Env: env, - Dir: repo.Path, - Stdout: stdOut, - Stderr: stdErr, - }); err != nil { + // Version 2.43.1 has a bug where the behavior of `GIT_FLUSH` is flipped. + // Ref: https://lore.kernel.org/git/CABn0oJvg3M_kBW-u=j3QhKnO=6QOzk-YFTgonYw_UvFS1NTX4g@mail.gmail.com + if InvertedGitFlushEnv { + env = append(env, "GIT_FLUSH=0") + } else { + env = append(env, "GIT_FLUSH=1") + } + + return cmd, &RunOpts{ + Env: env, + Dir: repo.Path, + }, cancel, nil +} + +// GitAttributes returns gitattribute. +// +// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare). +func (repo *Repository) GitAttributes(treeish, filename string, attributes ...string) (map[string]GitAttribute, error) { + cmd, runOpts, cancel, err := repo.gitCheckAttrCommand(treeish, attributes...) + if err != nil { + return nil, err + } + defer cancel() + + stdOut := new(bytes.Buffer) + runOpts.Stdout = stdOut + + stdErr := new(bytes.Buffer) + runOpts.Stderr = stdErr + + cmd.AddDashesAndList(filename) + + if err := cmd.Run(runOpts); err != nil { return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String()) } @@ -76,155 +158,14 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[ return nil, fmt.Errorf("wrong number of fields in return from check-attr") } - name2attribute2info := make(map[string]map[string]string) - - for i := 0; i < (len(fields) / 3); i++ { - filename := string(fields[3*i]) - attribute := string(fields[3*i+1]) - info := string(fields[3*i+2]) - attribute2info := name2attribute2info[filename] - if attribute2info == nil { - attribute2info = make(map[string]string) - } - attribute2info[attribute] = info - name2attribute2info[filename] = attribute2info + values := make(map[string]GitAttribute, len(attributes)) + for ; len(fields) >= 3; fields = fields[3:] { + // filename := string(fields[0]) + attribute := string(fields[1]) + value := string(fields[2]) + values[attribute] = GitAttribute(value) } - - return name2attribute2info, nil -} - -// CheckAttributeReader provides a reader for check-attribute content that can be long running -type CheckAttributeReader struct { - // params - Attributes []string - Repo *Repository - IndexFile string - WorkTree string - - stdinReader io.ReadCloser - stdinWriter *os.File - stdOut attributeWriter - cmd *Command - env []string - ctx context.Context - cancel context.CancelFunc -} - -// Init initializes the CheckAttributeReader -func (c *CheckAttributeReader) Init(ctx context.Context) error { - if len(c.Attributes) == 0 { - lw := new(nulSeparatedAttributeWriter) - lw.attributes = make(chan attributeTriple) - lw.closed = make(chan struct{}) - - c.stdOut = lw - c.stdOut.Close() - return fmt.Errorf("no provided Attributes to check") - } - - c.ctx, c.cancel = context.WithCancel(ctx) - c.cmd = NewCommand(c.ctx, "check-attr", "--stdin", "-z") - - if len(c.IndexFile) > 0 { - c.cmd.AddArguments("--cached") - c.env = append(c.env, "GIT_INDEX_FILE="+c.IndexFile) - } - - if len(c.WorkTree) > 0 { - c.env = append(c.env, "GIT_WORK_TREE="+c.WorkTree) - } - - // Version 2.43.1 has a bug where the behavior of `GIT_FLUSH` is flipped. - // Ref: https://lore.kernel.org/git/CABn0oJvg3M_kBW-u=j3QhKnO=6QOzk-YFTgonYw_UvFS1NTX4g@mail.gmail.com - if InvertedGitFlushEnv { - c.env = append(c.env, "GIT_FLUSH=0") - } else { - c.env = append(c.env, "GIT_FLUSH=1") - } - - c.cmd.AddDynamicArguments(c.Attributes...) - - var err error - - c.stdinReader, c.stdinWriter, err = os.Pipe() - if err != nil { - c.cancel() - return err - } - - lw := new(nulSeparatedAttributeWriter) - lw.attributes = make(chan attributeTriple, 5) - lw.closed = make(chan struct{}) - c.stdOut = lw - return nil -} - -// Run run cmd -func (c *CheckAttributeReader) Run() error { - defer func() { - _ = c.stdinReader.Close() - _ = c.stdOut.Close() - }() - stdErr := new(bytes.Buffer) - err := c.cmd.Run(&RunOpts{ - Env: c.env, - Dir: c.Repo.Path, - Stdin: c.stdinReader, - Stdout: c.stdOut, - Stderr: stdErr, - }) - if err != nil && // If there is an error we need to return but: - c.ctx.Err() != err && // 1. Ignore the context error if the context is cancelled or exceeds the deadline (RunWithContext could return c.ctx.Err() which is Canceled or DeadlineExceeded) - err.Error() != "signal: killed" { // 2. We should not pass up errors due to the program being killed - return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String()) - } - return nil -} - -// CheckPath check attr for given path -func (c *CheckAttributeReader) CheckPath(path string) (rs map[string]string, err error) { - defer func() { - if err != nil && err != c.ctx.Err() { - log.Error("Unexpected error when checking path %s in %s. Error: %v", path, c.Repo.Path, err) - } - }() - - select { - case <-c.ctx.Done(): - return nil, c.ctx.Err() - default: - } - - if _, err = c.stdinWriter.Write([]byte(path + "\x00")); err != nil { - defer c.Close() - return nil, err - } - - rs = make(map[string]string) - for range c.Attributes { - select { - case attr, ok := <-c.stdOut.ReadAttribute(): - if !ok { - return nil, c.ctx.Err() - } - rs[attr.Attribute] = attr.Value - case <-c.ctx.Done(): - return nil, c.ctx.Err() - } - } - return rs, nil -} - -// Close close pip after use -func (c *CheckAttributeReader) Close() error { - c.cancel() - err := c.stdinWriter.Close() - return err -} - -type attributeWriter interface { - io.WriteCloser - ReadAttribute() <-chan attributeTriple + return values, nil } type attributeTriple struct { @@ -275,10 +216,6 @@ func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) { return len(p), nil } -func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple { - return wr.attributes -} - func (wr *nulSeparatedAttributeWriter) Close() error { select { case <-wr.closed: @@ -290,49 +227,87 @@ func (wr *nulSeparatedAttributeWriter) Close() error { return nil } -// Create a check attribute reader for the current repository and provided commit ID -func (repo *Repository) CheckAttributeReader(commitID string) (*CheckAttributeReader, context.CancelFunc) { - indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) +// GitAttributeChecker creates an AttributeChecker for the given repository and provided commit ID. +// +// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare). +func (repo *Repository) GitAttributeChecker(treeish string, attributes ...string) (AttributeChecker, error) { + cmd, runOpts, cancel, err := repo.gitCheckAttrCommand(treeish, attributes...) if err != nil { - return nil, func() {} + return AttributeChecker{}, err } - checker := &CheckAttributeReader{ - Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"}, - Repo: repo, - IndexFile: indexFilename, - WorkTree: worktree, - } - ctx, cancel := context.WithCancel(repo.Ctx) - if err := checker.Init(ctx); err != nil { - log.Error("Unable to open checker for %s. Error: %v", commitID, err) - } else { - go func() { - err := checker.Run() - if err != nil && err != ctx.Err() { - log.Error("Unable to open checker for %s. Error: %v", commitID, err) - } - cancel() - }() - } - deferable := func() { - _ = checker.Close() - cancel() - deleteTemporaryFile() + ac := AttributeChecker{ + attributeNumber: len(attributes), + ctx: cmd.parentContext, + cancel: cancel, // will be cancelled on Close } - return checker, deferable + stdinReader, stdinWriter, err := os.Pipe() + if err != nil { + ac.cancel() + return AttributeChecker{}, err + } + ac.stdinWriter = stdinWriter // will be closed on Close + + lw := new(nulSeparatedAttributeWriter) + lw.attributes = make(chan attributeTriple, len(attributes)) + lw.closed = make(chan struct{}) + ac.attributesCh = lw.attributes + + cmd.AddArguments("--stdin") + go func() { + defer stdinReader.Close() + defer lw.Close() + + stdErr := new(bytes.Buffer) + runOpts.Stdin = stdinReader + runOpts.Stdout = lw + runOpts.Stderr = stdErr + err := cmd.Run(runOpts) + + if err != nil && // If there is an error we need to return but: + cmd.parentContext.Err() != err && // 1. Ignore the context error if the context is cancelled or exceeds the deadline (RunWithContext could return c.ctx.Err() which is Canceled or DeadlineExceeded) + err.Error() != "signal: killed" { // 2. We should not pass up errors due to the program being killed + log.Error("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String()) + } + }() + + return ac, nil } -// true if "set"/"true", false if "unset"/"false", none otherwise -func attributeToBool(attr map[string]string, name string) optional.Option[bool] { - if value, has := attr[name]; has && value != "unspecified" { - switch value { - case "set", "true": - return optional.Some(true) - case "unset", "false": - return optional.Some(false) +type AttributeChecker struct { + ctx context.Context + cancel context.CancelFunc + stdinWriter *os.File + attributeNumber int + attributesCh <-chan attributeTriple +} + +func (ac AttributeChecker) CheckPath(path string) (map[string]GitAttribute, error) { + if err := ac.ctx.Err(); err != nil { + return nil, err + } + + if _, err := ac.stdinWriter.Write([]byte(path + "\x00")); err != nil { + return nil, err + } + + rs := make(map[string]GitAttribute) + for i := 0; i < ac.attributeNumber; i++ { + select { + case attr, ok := <-ac.attributesCh: + if !ok { + return nil, ac.ctx.Err() + } + rs[attr.Attribute] = GitAttribute(attr.Value) + case <-ac.ctx.Done(): + return nil, ac.ctx.Err() } } - return optional.None[bool]() + return rs, nil +} + +func (ac AttributeChecker) Close() error { + ac.cancel() + return ac.stdinWriter.Close() } diff --git a/modules/git/repo_attribute_test.go b/modules/git/repo_attribute_test.go index ed16dccbe4..84d15bb983 100644 --- a/modules/git/repo_attribute_test.go +++ b/modules/git/repo_attribute_test.go @@ -4,10 +4,14 @@ package git import ( + "path/filepath" "testing" "time" + "code.gitea.io/gitea/modules/test" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { @@ -22,7 +26,7 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { assert.Len(t, testStr, n) assert.NoError(t, err) select { - case attr := <-wr.ReadAttribute(): + case attr := <-wr.attributes: assert.Equal(t, ".gitignore\"\n", attr.Filename) assert.Equal(t, "linguist-vendored", attr.Attribute) assert.Equal(t, "unspecified", attr.Value) @@ -36,7 +40,7 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { assert.NoError(t, err) select { - case attr := <-wr.ReadAttribute(): + case attr := <-wr.attributes: assert.Equal(t, ".gitignore\"\n", attr.Filename) assert.Equal(t, "linguist-vendored", attr.Attribute) assert.Equal(t, "unspecified", attr.Value) @@ -51,14 +55,14 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { assert.NoError(t, err) select { - case <-wr.ReadAttribute(): + case <-wr.attributes: assert.FailNow(t, "There should not be an attribute ready to read") case <-time.After(100 * time.Millisecond): } _, err = wr.Write([]byte("attribute\x00")) assert.NoError(t, err) select { - case <-wr.ReadAttribute(): + case <-wr.attributes: assert.FailNow(t, "There should not be an attribute ready to read") case <-time.After(100 * time.Millisecond): } @@ -66,28 +70,28 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { _, err = wr.Write([]byte("value\x00")) assert.NoError(t, err) - attr := <-wr.ReadAttribute() + attr := <-wr.attributes assert.Equal(t, "incomplete-filename", attr.Filename) assert.Equal(t, "attribute", attr.Attribute) assert.Equal(t, "value", attr.Value) _, err = wr.Write([]byte("shouldbe.vendor\x00linguist-vendored\x00set\x00shouldbe.vendor\x00linguist-generated\x00unspecified\x00shouldbe.vendor\x00linguist-language\x00unspecified\x00")) assert.NoError(t, err) - attr = <-wr.ReadAttribute() + attr = <-wr.attributes assert.NoError(t, err) assert.EqualValues(t, attributeTriple{ Filename: "shouldbe.vendor", Attribute: "linguist-vendored", Value: "set", }, attr) - attr = <-wr.ReadAttribute() + attr = <-wr.attributes assert.NoError(t, err) assert.EqualValues(t, attributeTriple{ Filename: "shouldbe.vendor", Attribute: "linguist-generated", Value: "unspecified", }, attr) - attr = <-wr.ReadAttribute() + attr = <-wr.attributes assert.NoError(t, err) assert.EqualValues(t, attributeTriple{ Filename: "shouldbe.vendor", @@ -95,3 +99,112 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { Value: "unspecified", }, attr) } + +func TestGitAttributeBareNonBare(t *testing.T) { + if !SupportCheckAttrOnBare { + t.Skip("git check-attr supported on bare repo starting with git 2.40") + } + + repoPath := filepath.Join(testReposDir, "language_stats_repo") + gitRepo, err := openRepositoryWithDefaultContext(repoPath) + require.NoError(t, err) + defer gitRepo.Close() + + for _, commitID := range []string{ + "8fee858da5796dfb37704761701bb8e800ad9ef3", + "341fca5b5ea3de596dc483e54c2db28633cd2f97", + } { + t.Run("GitAttributeChecker/"+commitID, func(t *testing.T) { + bareChecker, err := gitRepo.GitAttributeChecker(commitID, LinguistAttributes...) + assert.NoError(t, err) + t.Cleanup(func() { bareChecker.Close() }) + + bareStats, err := bareChecker.CheckPath("i-am-a-python.p") + assert.NoError(t, err) + + defer test.MockVariableValue(&SupportCheckAttrOnBare, false)() + cloneChecker, err := gitRepo.GitAttributeChecker(commitID, LinguistAttributes...) + assert.NoError(t, err) + t.Cleanup(func() { cloneChecker.Close() }) + cloneStats, err := cloneChecker.CheckPath("i-am-a-python.p") + assert.NoError(t, err) + + assert.EqualValues(t, cloneStats, bareStats) + }) + + t.Run("GitAttributes/"+commitID, func(t *testing.T) { + bareStats, err := gitRepo.GitAttributes(commitID, "i-am-a-python.p", LinguistAttributes...) + assert.NoError(t, err) + + defer test.MockVariableValue(&SupportCheckAttrOnBare, false)() + cloneStats, err := gitRepo.GitAttributes(commitID, "i-am-a-python.p", LinguistAttributes...) + assert.NoError(t, err) + + assert.EqualValues(t, cloneStats, bareStats) + }) + } +} + +func TestGitAttributes(t *testing.T) { + repoPath := filepath.Join(testReposDir, "language_stats_repo") + gitRepo, err := openRepositoryWithDefaultContext(repoPath) + require.NoError(t, err) + defer gitRepo.Close() + + attr, err := gitRepo.GitAttributes("8fee858da5796dfb37704761701bb8e800ad9ef3", "i-am-a-python.p", LinguistAttributes...) + assert.NoError(t, err) + assert.EqualValues(t, map[string]GitAttribute{ + "gitlab-language": "unspecified", + "linguist-detectable": "unspecified", + "linguist-documentation": "unspecified", + "linguist-generated": "unspecified", + "linguist-language": "Python", + "linguist-vendored": "unspecified", + }, attr) + + attr, err = gitRepo.GitAttributes("341fca5b5ea3de596dc483e54c2db28633cd2f97", "i-am-a-python.p", LinguistAttributes...) + assert.NoError(t, err) + assert.EqualValues(t, map[string]GitAttribute{ + "gitlab-language": "unspecified", + "linguist-detectable": "unspecified", + "linguist-documentation": "unspecified", + "linguist-generated": "unspecified", + "linguist-language": "Cobra", + "linguist-vendored": "unspecified", + }, attr) +} + +func TestGitAttributeFirst(t *testing.T) { + repoPath := filepath.Join(testReposDir, "language_stats_repo") + gitRepo, err := openRepositoryWithDefaultContext(repoPath) + require.NoError(t, err) + defer gitRepo.Close() + + t.Run("first is specified", func(t *testing.T) { + language, err := gitRepo.GitAttributeFirst("8fee858da5796dfb37704761701bb8e800ad9ef3", "i-am-a-python.p", "linguist-language", "gitlab-language") + assert.NoError(t, err) + assert.Equal(t, "Python", language.String()) + }) + + t.Run("second is specified", func(t *testing.T) { + language, err := gitRepo.GitAttributeFirst("8fee858da5796dfb37704761701bb8e800ad9ef3", "i-am-a-python.p", "gitlab-language", "linguist-language") + assert.NoError(t, err) + assert.Equal(t, "Python", language.String()) + }) + + t.Run("none is specified", func(t *testing.T) { + language, err := gitRepo.GitAttributeFirst("8fee858da5796dfb37704761701bb8e800ad9ef3", "i-am-a-python.p", "linguist-detectable", "gitlab-language", "non-existing") + assert.NoError(t, err) + assert.Equal(t, "", language.String()) + }) +} + +func TestGitAttributeStruct(t *testing.T) { + assert.Equal(t, "", GitAttribute("").String()) + assert.Equal(t, "", GitAttribute("unspecified").String()) + + assert.Equal(t, "python", GitAttribute("python").String()) + + assert.Equal(t, "text?token=Error", GitAttribute("text?token=Error").String()) + assert.Equal(t, "text", GitAttribute("text?token=Error").Prefix()) +} diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 1a3fdbb1f8..672f7571d9 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -8,8 +8,8 @@ package git import ( "bytes" + "cmp" "io" - "strings" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/log" @@ -61,8 +61,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } - checker, deferable := repo.CheckAttributeReader(commitID) - defer deferable() + checker, err := repo.GitAttributeChecker(commitID, LinguistAttributes...) + if err != nil { + return nil, err + } + defer checker.Close() contentBuf := bytes.Buffer{} var content []byte @@ -102,41 +105,25 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err isDocumentation := optional.None[bool]() isDetectable := optional.None[bool]() - if checker != nil { - attrs, err := checker.CheckPath(f.Name()) - if err == nil { - isVendored = attributeToBool(attrs, "linguist-vendored") - isGenerated = attributeToBool(attrs, "linguist-generated") - isDocumentation = attributeToBool(attrs, "linguist-documentation") - isDetectable = attributeToBool(attrs, "linguist-detectable") - if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { - // group languages, such as Pug -> HTML; SCSS -> CSS - group := enry.GetLanguageGroup(language) - if len(group) != 0 { - language = group - } - - // this language will always be added to the size - sizes[language] += f.Size() - continue - } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { - // strip off a ? if present - if idx := strings.IndexByte(language, '?'); idx >= 0 { - language = language[:idx] - } - if len(language) != 0 { - // group languages, such as Pug -> HTML; SCSS -> CSS - group := enry.GetLanguageGroup(language) - if len(group) != 0 { - language = group - } - - // this language will always be added to the size - sizes[language] += f.Size() - continue - } + attrs, err := checker.CheckPath(f.Name()) + if err == nil { + isVendored = attrs["linguist-vendored"].Bool() + isGenerated = attrs["linguist-generated"].Bool() + isDocumentation = attrs["linguist-documentation"].Bool() + isDetectable = attrs["linguist-detectable"].Bool() + if language := cmp.Or( + attrs["linguist-language"].String(), + attrs["gitlab-language"].Prefix(), + ); language != "" { + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if len(group) != 0 { + language = group } + // this language will always be added to the size + sizes[language] += f.Size() + continue } } diff --git a/modules/git/tests/repos/language_stats_repo/config b/modules/git/tests/repos/language_stats_repo/config index 515f483629..a4ef456cbc 100644 --- a/modules/git/tests/repos/language_stats_repo/config +++ b/modules/git/tests/repos/language_stats_repo/config @@ -1,5 +1,5 @@ [core] repositoryformatversion = 0 filemode = true - bare = false + bare = true logallrefupdates = true diff --git a/modules/git/tests/repos/language_stats_repo/logs/HEAD b/modules/git/tests/repos/language_stats_repo/logs/HEAD index ce489502bb..9cedbb66a9 100644 --- a/modules/git/tests/repos/language_stats_repo/logs/HEAD +++ b/modules/git/tests/repos/language_stats_repo/logs/HEAD @@ -1 +1,2 @@ 0000000000000000000000000000000000000000 8fee858da5796dfb37704761701bb8e800ad9ef3 Andrew Thornton 1632140318 +0100 commit (initial): Add some test files for GetLanguageStats +8fee858da5796dfb37704761701bb8e800ad9ef3 341fca5b5ea3de596dc483e54c2db28633cd2f97 oliverpool 1711278775 +0100 push diff --git a/modules/git/tests/repos/language_stats_repo/logs/refs/heads/master b/modules/git/tests/repos/language_stats_repo/logs/refs/heads/master index ce489502bb..9cedbb66a9 100644 --- a/modules/git/tests/repos/language_stats_repo/logs/refs/heads/master +++ b/modules/git/tests/repos/language_stats_repo/logs/refs/heads/master @@ -1 +1,2 @@ 0000000000000000000000000000000000000000 8fee858da5796dfb37704761701bb8e800ad9ef3 Andrew Thornton 1632140318 +0100 commit (initial): Add some test files for GetLanguageStats +8fee858da5796dfb37704761701bb8e800ad9ef3 341fca5b5ea3de596dc483e54c2db28633cd2f97 oliverpool 1711278775 +0100 push diff --git a/modules/git/tests/repos/language_stats_repo/objects/1e/ea60592b55dcb45c36029cc1202132e9fb756c b/modules/git/tests/repos/language_stats_repo/objects/1e/ea60592b55dcb45c36029cc1202132e9fb756c new file mode 100644 index 0000000000000000000000000000000000000000..3c55bab91e0f9b29be108b34ab989900c6e68e08 GIT binary patch literal 63 zcmV-F0Korv0ZYosPf{>5WYE$pOU+BkFVf3OEK5|#$;?YH%`7g_g$SmmaB1lkAVhN# VfzpZTskYAfNkxfVTmV4s7y72Y9s2+P literal 0 HcmV?d00001 diff --git a/modules/git/tests/repos/language_stats_repo/objects/22/b6aa0588563508d8879f062470c8cbc7b2f2bb b/modules/git/tests/repos/language_stats_repo/objects/22/b6aa0588563508d8879f062470c8cbc7b2f2bb new file mode 100644 index 0000000000000000000000000000000000000000..947feecea9cd7632d10dbd328570b6e121568d07 GIT binary patch literal 200 zcmV;(05|`50V^p=O;s>5Fl8__FfcPQQP4}zEJ-XWDauSLElDkAkb9L7sU3P}ON<%Q zoP!FAMlXMt=0H_u>L%vuCh8VcmSp7T=@l?|onTJx^X{A~l4mrF^})1XI^_#&O$>lQ zAuF*gQ8yzsCnuj_)gtL1KHbVJ3BC3wtV=2!KDj2%gqoL|n3<|7 literal 0 HcmV?d00001 diff --git a/modules/git/tests/repos/language_stats_repo/objects/34/1fca5b5ea3de596dc483e54c2db28633cd2f97 b/modules/git/tests/repos/language_stats_repo/objects/34/1fca5b5ea3de596dc483e54c2db28633cd2f97 new file mode 100644 index 0000000000000000000000000000000000000000..9ce337e070c1d098809ccef501f8c27db034ffe7 GIT binary patch literal 172 zcmV;d08{^X0iBLPZUZ3<0DI;YKDW}!uz@lSBAWa>ocfAumcK@t ab2Pb4J;3|2L;Hbi|ApLTB=Z1IMNSlufK+Gz literal 0 HcmV?d00001 diff --git a/modules/git/tests/repos/language_stats_repo/refs/heads/master b/modules/git/tests/repos/language_stats_repo/refs/heads/master index 679afcc506..e89143e56b 100644 --- a/modules/git/tests/repos/language_stats_repo/refs/heads/master +++ b/modules/git/tests/repos/language_stats_repo/refs/heads/master @@ -1 +1 @@ -8fee858da5796dfb37704761701bb8e800ad9ef3 +341fca5b5ea3de596dc483e54c2db28633cd2f97 diff --git a/routers/web/repo/setting/lfs.go b/routers/web/repo/setting/lfs.go index 9b66af37b2..c18cb6a8c8 100644 --- a/routers/web/repo/setting/lfs.go +++ b/routers/web/repo/setting/lfs.go @@ -145,30 +145,13 @@ func LFSLocks(ctx *context.Context) { return } - name2attribute2info, err := gitRepo.CheckAttribute(git.CheckAttributeOpts{ - Attributes: []string{"lockable"}, - Filenames: filenames, - CachedOnly: true, - }) + ctx.Data["Lockables"], err = lockablesGitAttributes(gitRepo, lfsLocks) if err != nil { - log.Error("Unable to check attributes in %s (%v)", tmpBasePath, err) + log.Error("Unable to get lockablesGitAttributes in %s (%v)", tmpBasePath, err) ctx.ServerError("LFSLocks", err) return } - lockables := make([]bool, len(lfsLocks)) - for i, lock := range lfsLocks { - attribute2info, has := name2attribute2info[lock.Path] - if !has { - continue - } - if attribute2info["lockable"] != "set" { - continue - } - lockables[i] = true - } - ctx.Data["Lockables"] = lockables - filelist, err := gitRepo.LsFiles(filenames...) if err != nil { log.Error("Unable to lsfiles in %s (%v)", tmpBasePath, err) @@ -189,6 +172,24 @@ func LFSLocks(ctx *context.Context) { ctx.HTML(http.StatusOK, tplSettingsLFSLocks) } +func lockablesGitAttributes(gitRepo *git.Repository, lfsLocks []*git_model.LFSLock) ([]bool, error) { + checker, err := gitRepo.GitAttributeChecker("", "lockable") + if err != nil { + return nil, fmt.Errorf("could not GitAttributeChecker: %w", err) + } + defer checker.Close() + + lockables := make([]bool, len(lfsLocks)) + for i, lock := range lfsLocks { + attrs, err := checker.CheckPath(lock.Path) + if err != nil { + return nil, fmt.Errorf("could not CheckPath(%s): %w", lock.Path, err) + } + lockables[i] = attrs["lockable"].Bool().Value() + } + return lockables, nil +} + // LFSLockFile locks a file func LFSLockFile(ctx *context.Context) { if !setting.LFS.StartServer { diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index e4d7179f63..becb8748cd 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -643,17 +643,12 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry) { } if ctx.Repo.GitRepo != nil { - checker, deferable := ctx.Repo.GitRepo.CheckAttributeReader(ctx.Repo.CommitID) - if checker != nil { - defer deferable() - attrs, err := checker.CheckPath(ctx.Repo.TreePath) - if err == nil { - vendored, has := attrs["linguist-vendored"] - ctx.Data["IsVendored"] = has && (vendored == "set" || vendored == "true") - - generated, has := attrs["linguist-generated"] - ctx.Data["IsGenerated"] = has && (generated == "set" || generated == "true") - } + attrs, err := ctx.Repo.GitRepo.GitAttributes(ctx.Repo.CommitID, ctx.Repo.TreePath, "linguist-vendored", "linguist-generated") + if err != nil { + log.Error("GitAttributes(%s, %s) failed: %v", ctx.Repo.CommitID, ctx.Repo.TreePath, err) + } else { + ctx.Data["IsVendored"] = attrs["linguist-vendored"].Bool().Value() + ctx.Data["IsGenerated"] = attrs["linguist-generated"].Bool().Value() } } diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 2b796181d8..0c89839edf 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -7,6 +7,7 @@ package gitdiff import ( "bufio" "bytes" + "cmp" "context" "fmt" "html" @@ -1172,38 +1173,32 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi } diff.Start = opts.SkipTo - checker, deferable := gitRepo.CheckAttributeReader(opts.AfterCommitID) - defer deferable() + checker, err := gitRepo.GitAttributeChecker(opts.AfterCommitID, git.LinguistAttributes...) + if err != nil { + return nil, fmt.Errorf("unable to GitAttributeChecker: %w", err) + } + defer checker.Close() for _, diffFile := range diff.Files { - gotVendor := false gotGenerated := false - if checker != nil { - attrs, err := checker.CheckPath(diffFile.Name) - if err == nil { - if vendored, has := attrs["linguist-vendored"]; has { - if vendored == "set" || vendored == "true" { - diffFile.IsVendored = true - gotVendor = true - } else { - gotVendor = vendored == "false" - } - } - if generated, has := attrs["linguist-generated"]; has { - if generated == "set" || generated == "true" { - diffFile.IsGenerated = true - gotGenerated = true - } else { - gotGenerated = generated == "false" - } - } - if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { - diffFile.Language = language - } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { - diffFile.Language = language - } - } + + attrs, err := checker.CheckPath(diffFile.Name) + if err != nil { + log.Error("checker.CheckPath(%s) failed: %v", diffFile.Name, err) + } else { + vendored := attrs["linguist-vendored"].Bool() + diffFile.IsVendored = vendored.Value() + gotVendor = vendored.Has() + + generated := attrs["linguist-generated"].Bool() + diffFile.IsGenerated = generated.Value() + gotGenerated = generated.Has() + + diffFile.Language = cmp.Or( + attrs["linguist-language"].String(), + attrs["gitlab-language"].Prefix(), + ) } if !gotVendor { diff --git a/services/repository/files/content.go b/services/repository/files/content.go index 009e5cfa2a..8dfd8b846d 100644 --- a/services/repository/files/content.go +++ b/services/repository/files/content.go @@ -273,31 +273,6 @@ func GetBlobBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git // TryGetContentLanguage tries to get the (linguist) language of the file content func TryGetContentLanguage(gitRepo *git.Repository, commitID, treePath string) (string, error) { - indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(commitID) - if err != nil { - return "", err - } - - defer deleteTemporaryFile() - - filename2attribute2info, err := gitRepo.CheckAttribute(git.CheckAttributeOpts{ - CachedOnly: true, - Attributes: []string{"linguist-language", "gitlab-language"}, - Filenames: []string{treePath}, - IndexFile: indexFilename, - WorkTree: worktree, - }) - if err != nil { - return "", err - } - - language := filename2attribute2info[treePath]["linguist-language"] - if language == "" || language == "unspecified" { - language = filename2attribute2info[treePath]["gitlab-language"] - } - if language == "unspecified" { - language = "" - } - - return language, nil + attribute, err := gitRepo.GitAttributeFirst(commitID, treePath, "linguist-language", "gitlab-language") + return attribute.Prefix(), err } diff --git a/services/repository/files/update.go b/services/repository/files/update.go index 4f7178184b..a6c813f024 100644 --- a/services/repository/files/update.go +++ b/services/repository/files/update.go @@ -400,16 +400,12 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file var lfsMetaObject *git_model.LFSMetaObject if setting.LFS.StartServer && hasOldBranch { // Check there is no way this can return multiple infos - filename2attribute2info, err := t.gitRepo.CheckAttribute(git.CheckAttributeOpts{ - Attributes: []string{"filter"}, - Filenames: []string{file.Options.treePath}, - CachedOnly: true, - }) + filterAttribute, err := t.gitRepo.GitAttributeFirst("", file.Options.treePath, "filter") if err != nil { return err } - if filename2attribute2info[file.Options.treePath] != nil && filename2attribute2info[file.Options.treePath]["filter"] == "lfs" { + if filterAttribute == "lfs" { // OK so we are supposed to LFS this data! pointer, err := lfs.GeneratePointer(treeObjectContentReader) if err != nil { diff --git a/services/repository/files/upload.go b/services/repository/files/upload.go index e8c149f77f..1330116889 100644 --- a/services/repository/files/upload.go +++ b/services/repository/files/upload.go @@ -105,23 +105,9 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use } } - var filename2attribute2info map[string]map[string]string - if setting.LFS.StartServer { - filename2attribute2info, err = t.gitRepo.CheckAttribute(git.CheckAttributeOpts{ - Attributes: []string{"filter"}, - Filenames: names, - CachedOnly: true, - }) - if err != nil { - return err - } - } - // Copy uploaded files into repository. - for i := range infos { - if err := copyUploadedLFSFileIntoRepository(&infos[i], filename2attribute2info, t, opts.TreePath); err != nil { - return err - } + if err := copyUploadedLFSFilesIntoRepository(infos, t, opts.TreePath); err != nil { + return err } // Now write the tree @@ -169,7 +155,44 @@ func UploadRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use return repo_model.DeleteUploads(ctx, uploads...) } -func copyUploadedLFSFileIntoRepository(info *uploadInfo, filename2attribute2info map[string]map[string]string, t *TemporaryUploadRepository, treePath string) error { +func copyUploadedLFSFilesIntoRepository(infos []uploadInfo, t *TemporaryUploadRepository, treePath string) error { + var storeInLFSFunc func(string) (bool, error) + + if setting.LFS.StartServer { + checker, err := t.gitRepo.GitAttributeChecker("", "filter") + if err != nil { + return err + } + defer checker.Close() + + storeInLFSFunc = func(name string) (bool, error) { + attrs, err := checker.CheckPath(name) + if err != nil { + return false, fmt.Errorf("could not CheckPath(%s): %w", name, err) + } + return attrs["filter"] == "lfs", nil + } + } + + // Copy uploaded files into repository. + for i, info := range infos { + storeInLFS := false + if storeInLFSFunc != nil { + var err error + storeInLFS, err = storeInLFSFunc(info.upload.Name) + if err != nil { + return err + } + } + + if err := copyUploadedLFSFileIntoRepository(&infos[i], storeInLFS, t, treePath); err != nil { + return err + } + } + return nil +} + +func copyUploadedLFSFileIntoRepository(info *uploadInfo, storeInLFS bool, t *TemporaryUploadRepository, treePath string) error { file, err := os.Open(info.upload.LocalPath()) if err != nil { return err @@ -177,7 +200,7 @@ func copyUploadedLFSFileIntoRepository(info *uploadInfo, filename2attribute2info defer file.Close() var objectHash string - if setting.LFS.StartServer && filename2attribute2info[info.upload.Name] != nil && filename2attribute2info[info.upload.Name]["filter"] == "lfs" { + if storeInLFS { // Handle LFS // FIXME: Inefficient! this should probably happen in models.Upload pointer, err := lfs.GeneratePointer(file)