From 248b96d8a38b2d52a73d7091a82f688f4688295e Mon Sep 17 00:00:00 2001 From: zeripath Date: Thu, 9 Sep 2021 21:13:36 +0100 Subject: [PATCH] Use git attributes to determine generated and vendored status for language stats and diffs (#16773) Replaces #16262 Replaces #16250 Replaces #14833 This PR first implements a `git check-attr` pipe reader - using `git check-attr --stdin -z --cached` - taking account of the change in the output format in git 1.8.5 and creates a helper function to read a tree into a temporary index file for that pipe reader. It then wires this in to the language stats helper and into the git diff generation. Files which are marked generated will be folded by default. Fixes #14786 Fixes #12653 --- modules/analyze/generated.go | 28 ++ modules/git/repo_attribute.go | 285 ++++++++++++++++++++- modules/git/repo_attribute_test.go | 159 ++++++++++++ modules/git/repo_index.go | 39 ++- modules/git/repo_language_stats_gogit.go | 70 ++++- modules/git/repo_language_stats_nogogit.go | 71 ++++- options/locale/locale_en-US.ini | 2 + services/gitdiff/gitdiff.go | 78 ++++++ templates/repo/diff/box.tmpl | 18 +- web_src/js/index.js | 3 +- 10 files changed, 736 insertions(+), 17 deletions(-) create mode 100644 modules/analyze/generated.go create mode 100644 modules/git/repo_attribute_test.go diff --git a/modules/analyze/generated.go b/modules/analyze/generated.go new file mode 100644 index 0000000000..0f14d28545 --- /dev/null +++ b/modules/analyze/generated.go @@ -0,0 +1,28 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package analyze + +import ( + "path/filepath" + "strings" + + "github.com/go-enry/go-enry/v2/data" +) + +// IsGenerated returns whether or not path is a generated path. +func IsGenerated(path string) bool { + ext := strings.ToLower(filepath.Ext(path)) + if _, ok := data.GeneratedCodeExtensions[ext]; ok { + return true + } + + for _, m := range data.GeneratedCodeNameMatchers { + if m(path) { + return true + } + } + + return false +} diff --git a/modules/git/repo_attribute.go b/modules/git/repo_attribute.go index aa5e4c10e7..0bd7d7e49c 100644 --- a/modules/git/repo_attribute.go +++ b/modules/git/repo_attribute.go @@ -6,7 +6,12 @@ package git import ( "bytes" + "context" "fmt" + "io" + "os" + "strconv" + "strings" ) // CheckAttributeOpts represents the possible options to CheckAttribute @@ -21,7 +26,7 @@ type CheckAttributeOpts struct { func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) { err := LoadGitVersion() if err != nil { - return nil, fmt.Errorf("Git version missing: %v", err) + return nil, fmt.Errorf("git version missing: %v", err) } stdOut := new(bytes.Buffer) @@ -55,13 +60,14 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[ cmd := NewCommand(cmdArgs...) if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil { - return nil, fmt.Errorf("Failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String()) + return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String()) } + // FIXME: This is incorrect on versions < 1.8.5 fields := bytes.Split(stdOut.Bytes(), []byte{'\000'}) if len(fields)%3 != 1 { - return nil, fmt.Errorf("Wrong number of fields in return from check-attr") + return nil, fmt.Errorf("wrong number of fields in return from check-attr") } var name2attribute2info = make(map[string]map[string]string) @@ -80,3 +86,276 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[ return name2attribute2info, nil } + +// CheckAttributeReader provides a reader for check-attribute content that can be long running +type CheckAttributeReader struct { + // params + Attributes []string + Repo *Repository + IndexFile string + WorkTree string + + stdinReader io.ReadCloser + stdinWriter *os.File + stdOut attributeWriter + cmd *Command + env []string + ctx context.Context + cancel context.CancelFunc + running chan struct{} +} + +// Init initializes the cmd +func (c *CheckAttributeReader) Init(ctx context.Context) error { + c.running = make(chan struct{}) + cmdArgs := []string{"check-attr", "--stdin", "-z"} + + if len(c.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil { + cmdArgs = append(cmdArgs, "--cached") + c.env = []string{"GIT_INDEX_FILE=" + c.IndexFile} + } + + if len(c.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil { + c.env = []string{"GIT_WORK_TREE=" + c.WorkTree} + } + + if len(c.Attributes) > 0 { + cmdArgs = append(cmdArgs, c.Attributes...) + cmdArgs = append(cmdArgs, "--") + } else { + lw := new(nulSeparatedAttributeWriter) + lw.attributes = make(chan attributeTriple) + + c.stdOut = lw + c.stdOut.Close() + return fmt.Errorf("no provided Attributes to check") + } + + c.ctx, c.cancel = context.WithCancel(ctx) + c.cmd = NewCommandContext(c.ctx, cmdArgs...) + var err error + c.stdinReader, c.stdinWriter, err = os.Pipe() + if err != nil { + return err + } + + if CheckGitVersionAtLeast("1.8.5") == nil { + lw := new(nulSeparatedAttributeWriter) + lw.attributes = make(chan attributeTriple, 5) + + c.stdOut = lw + } else { + lw := new(lineSeparatedAttributeWriter) + lw.attributes = make(chan attributeTriple, 5) + + c.stdOut = lw + } + return nil +} + +// Run run cmd +func (c *CheckAttributeReader) Run() error { + stdErr := new(bytes.Buffer) + err := c.cmd.RunInDirTimeoutEnvFullPipelineFunc(c.env, -1, c.Repo.Path, c.stdOut, stdErr, c.stdinReader, func(_ context.Context, _ context.CancelFunc) error { + close(c.running) + return nil + }) + defer c.cancel() + _ = c.stdOut.Close() + if err != nil && c.ctx.Err() != nil && err.Error() != "signal: killed" { + return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String()) + } + + return nil +} + +// CheckPath check attr for given path +func (c *CheckAttributeReader) CheckPath(path string) (map[string]string, error) { + select { + case <-c.ctx.Done(): + return nil, c.ctx.Err() + case <-c.running: + } + + if _, err := c.stdinWriter.Write([]byte(path + "\x00")); err != nil { + defer c.cancel() + return nil, err + } + + if err := c.stdinWriter.Sync(); err != nil { + defer c.cancel() + return nil, err + } + + rs := make(map[string]string) + for range c.Attributes { + select { + case attr := <-c.stdOut.ReadAttribute(): + rs[attr.Attribute] = attr.Value + case <-c.ctx.Done(): + return nil, c.ctx.Err() + } + } + return rs, nil +} + +// Close close pip after use +func (c *CheckAttributeReader) Close() error { + select { + case <-c.running: + default: + close(c.running) + } + defer c.cancel() + return c.stdinWriter.Close() +} + +type attributeWriter interface { + io.WriteCloser + ReadAttribute() <-chan attributeTriple +} + +type attributeTriple struct { + Filename string + Attribute string + Value string +} + +type nulSeparatedAttributeWriter struct { + tmp []byte + attributes chan attributeTriple + working attributeTriple + pos int +} + +func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) { + l, read := len(p), 0 + + nulIdx := bytes.IndexByte(p, '\x00') + for nulIdx >= 0 { + wr.tmp = append(wr.tmp, p[:nulIdx]...) + switch wr.pos { + case 0: + wr.working = attributeTriple{ + Filename: string(wr.tmp), + } + case 1: + wr.working.Attribute = string(wr.tmp) + case 2: + wr.working.Value = string(wr.tmp) + } + wr.tmp = wr.tmp[:0] + wr.pos++ + if wr.pos > 2 { + wr.attributes <- wr.working + wr.pos = 0 + } + read += nulIdx + 1 + if l > read { + p = p[nulIdx+1:] + nulIdx = bytes.IndexByte(p, '\x00') + } else { + return l, nil + } + } + wr.tmp = append(wr.tmp, p...) + return len(p), nil +} + +func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple { + return wr.attributes +} + +func (wr *nulSeparatedAttributeWriter) Close() error { + close(wr.attributes) + return nil +} + +type lineSeparatedAttributeWriter struct { + tmp []byte + attributes chan attributeTriple +} + +func (wr *lineSeparatedAttributeWriter) Write(p []byte) (n int, err error) { + l := len(p) + + nlIdx := bytes.IndexByte(p, '\n') + for nlIdx >= 0 { + wr.tmp = append(wr.tmp, p[:nlIdx]...) + + if len(wr.tmp) == 0 { + // This should not happen + if len(p) > nlIdx+1 { + wr.tmp = wr.tmp[:0] + p = p[nlIdx+1:] + nlIdx = bytes.IndexByte(p, '\n') + continue + } else { + return l, nil + } + } + + working := attributeTriple{} + if wr.tmp[0] == '"' { + sb := new(strings.Builder) + remaining := string(wr.tmp[1:]) + for len(remaining) > 0 { + rn, _, tail, err := strconv.UnquoteChar(remaining, '"') + if err != nil { + if len(remaining) > 2 && remaining[0] == '"' && remaining[1] == ':' && remaining[2] == ' ' { + working.Filename = sb.String() + wr.tmp = []byte(remaining[3:]) + break + } + return l, fmt.Errorf("unexpected tail %s", string(remaining)) + } + _, _ = sb.WriteRune(rn) + remaining = tail + } + } else { + idx := bytes.IndexByte(wr.tmp, ':') + if idx < 0 { + return l, fmt.Errorf("unexpected input %s", string(wr.tmp)) + } + working.Filename = string(wr.tmp[:idx]) + if len(wr.tmp) < idx+2 { + return l, fmt.Errorf("unexpected input %s", string(wr.tmp)) + } + wr.tmp = wr.tmp[idx+2:] + } + + idx := bytes.IndexByte(wr.tmp, ':') + if idx < 0 { + return l, fmt.Errorf("unexpected input %s", string(wr.tmp)) + } + + working.Attribute = string(wr.tmp[:idx]) + if len(wr.tmp) < idx+2 { + return l, fmt.Errorf("unexpected input %s", string(wr.tmp)) + } + + working.Value = string(wr.tmp[idx+2:]) + + wr.attributes <- working + wr.tmp = wr.tmp[:0] + if len(p) > nlIdx+1 { + p = p[nlIdx+1:] + nlIdx = bytes.IndexByte(p, '\n') + continue + } else { + return l, nil + } + } + + wr.tmp = append(wr.tmp, p...) + return l, nil +} + +func (wr *lineSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple { + return wr.attributes +} + +func (wr *lineSeparatedAttributeWriter) Close() error { + close(wr.attributes) + return nil +} diff --git a/modules/git/repo_attribute_test.go b/modules/git/repo_attribute_test.go new file mode 100644 index 0000000000..92d1a78fa4 --- /dev/null +++ b/modules/git/repo_attribute_test.go @@ -0,0 +1,159 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package git + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { + wr := &nulSeparatedAttributeWriter{ + attributes: make(chan attributeTriple, 5), + } + + testStr := ".gitignore\"\n\x00linguist-vendored\x00unspecified\x00" + + n, err := wr.Write([]byte(testStr)) + + assert.Equal(t, n, len(testStr)) + assert.NoError(t, err) + select { + case attr := <-wr.ReadAttribute(): + assert.Equal(t, ".gitignore\"\n", attr.Filename) + assert.Equal(t, "linguist-vendored", attr.Attribute) + assert.Equal(t, "unspecified", attr.Value) + case <-time.After(100 * time.Millisecond): + assert.Fail(t, "took too long to read an attribute from the list") + } + // Write a second attribute again + n, err = wr.Write([]byte(testStr)) + + assert.Equal(t, n, len(testStr)) + assert.NoError(t, err) + + select { + case attr := <-wr.ReadAttribute(): + assert.Equal(t, ".gitignore\"\n", attr.Filename) + assert.Equal(t, "linguist-vendored", attr.Attribute) + assert.Equal(t, "unspecified", attr.Value) + case <-time.After(100 * time.Millisecond): + assert.Fail(t, "took too long to read an attribute from the list") + } + + //Write a partial attribute + _, err = wr.Write([]byte("incomplete-file")) + assert.NoError(t, err) + _, err = wr.Write([]byte("name\x00")) + assert.NoError(t, err) + + select { + case <-wr.ReadAttribute(): + assert.Fail(t, "There should not be an attribute ready to read") + case <-time.After(100 * time.Millisecond): + } + _, err = wr.Write([]byte("attribute\x00")) + assert.NoError(t, err) + select { + case <-wr.ReadAttribute(): + assert.Fail(t, "There should not be an attribute ready to read") + case <-time.After(100 * time.Millisecond): + } + + _, err = wr.Write([]byte("value\x00")) + assert.NoError(t, err) + + attr := <-wr.ReadAttribute() + assert.Equal(t, "incomplete-filename", attr.Filename) + assert.Equal(t, "attribute", attr.Attribute) + assert.Equal(t, "value", attr.Value) + + _, err = wr.Write([]byte("shouldbe.vendor\x00linguist-vendored\x00set\x00shouldbe.vendor\x00linguist-generated\x00unspecified\x00shouldbe.vendor\x00linguist-language\x00unspecified\x00")) + assert.NoError(t, err) + attr = <-wr.ReadAttribute() + assert.NoError(t, err) + assert.EqualValues(t, attributeTriple{ + Filename: "shouldbe.vendor", + Attribute: "linguist-vendored", + Value: "set", + }, attr) + attr = <-wr.ReadAttribute() + assert.NoError(t, err) + assert.EqualValues(t, attributeTriple{ + Filename: "shouldbe.vendor", + Attribute: "linguist-generated", + Value: "unspecified", + }, attr) + attr = <-wr.ReadAttribute() + assert.NoError(t, err) + assert.EqualValues(t, attributeTriple{ + Filename: "shouldbe.vendor", + Attribute: "linguist-language", + Value: "unspecified", + }, attr) +} + +func Test_lineSeparatedAttributeWriter_ReadAttribute(t *testing.T) { + wr := &lineSeparatedAttributeWriter{ + attributes: make(chan attributeTriple, 5), + } + + testStr := `".gitignore\"\n": linguist-vendored: unspecified +` + n, err := wr.Write([]byte(testStr)) + + assert.Equal(t, n, len(testStr)) + assert.NoError(t, err) + + select { + case attr := <-wr.ReadAttribute(): + assert.Equal(t, ".gitignore\"\n", attr.Filename) + assert.Equal(t, "linguist-vendored", attr.Attribute) + assert.Equal(t, "unspecified", attr.Value) + case <-time.After(100 * time.Millisecond): + assert.Fail(t, "took too long to read an attribute from the list") + } + + // Write a second attribute again + n, err = wr.Write([]byte(testStr)) + + assert.Equal(t, n, len(testStr)) + assert.NoError(t, err) + + select { + case attr := <-wr.ReadAttribute(): + assert.Equal(t, ".gitignore\"\n", attr.Filename) + assert.Equal(t, "linguist-vendored", attr.Attribute) + assert.Equal(t, "unspecified", attr.Value) + case <-time.After(100 * time.Millisecond): + assert.Fail(t, "took too long to read an attribute from the list") + } + + //Write a partial attribute + _, err = wr.Write([]byte("incomplete-file")) + assert.NoError(t, err) + _, err = wr.Write([]byte("name: ")) + assert.NoError(t, err) + select { + case <-wr.ReadAttribute(): + assert.Fail(t, "There should not be an attribute ready to read") + case <-time.After(100 * time.Millisecond): + } + _, err = wr.Write([]byte("attribute: ")) + assert.NoError(t, err) + select { + case <-wr.ReadAttribute(): + assert.Fail(t, "There should not be an attribute ready to read") + case <-time.After(100 * time.Millisecond): + } + _, err = wr.Write([]byte("value\n")) + assert.NoError(t, err) + attr := <-wr.ReadAttribute() + assert.Equal(t, "incomplete-filename", attr.Filename) + assert.Equal(t, "attribute", attr.Attribute) + assert.Equal(t, "value", attr.Value) +} diff --git a/modules/git/repo_index.go b/modules/git/repo_index.go index 2c351e209f..b301ff2437 100644 --- a/modules/git/repo_index.go +++ b/modules/git/repo_index.go @@ -6,11 +6,17 @@ package git import ( "bytes" + "context" + "io/ioutil" + "os" "strings" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/util" ) // ReadTreeToIndex reads a treeish to the index -func (repo *Repository) ReadTreeToIndex(treeish string) error { +func (repo *Repository) ReadTreeToIndex(treeish string, indexFilename ...string) error { if len(treeish) != 40 { res, err := NewCommand("rev-parse", "--verify", treeish).RunInDir(repo.Path) if err != nil { @@ -24,17 +30,42 @@ func (repo *Repository) ReadTreeToIndex(treeish string) error { if err != nil { return err } - return repo.readTreeToIndex(id) + return repo.readTreeToIndex(id, indexFilename...) } -func (repo *Repository) readTreeToIndex(id SHA1) error { - _, err := NewCommand("read-tree", id.String()).RunInDir(repo.Path) +func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error { + var env []string + if len(indexFilename) > 0 { + env = append(os.Environ(), "GIT_INDEX_FILE="+indexFilename[0]) + } + _, err := NewCommand("read-tree", id.String()).RunInDirWithEnv(repo.Path, env) if err != nil { return err } return nil } +// ReadTreeToTemporaryIndex reads a treeish to a temporary index file +func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) { + tmpIndex, err := ioutil.TempFile("", "index") + if err != nil { + return + } + filename = tmpIndex.Name() + cancel = func() { + err := util.Remove(filename) + if err != nil { + log.Error("failed to remove tmp index file: %v", err) + } + } + err = repo.ReadTreeToIndex(treeish, filename) + if err != nil { + defer cancel() + return "", func() {}, err + } + return +} + // EmptyIndex empties the index func (repo *Repository) EmptyIndex() error { _, err := NewCommand("read-tree", "--empty").RunInDir(repo.Path) diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go index 0a4cfbbc7b..3abce1f077 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/repo_language_stats_gogit.go @@ -9,10 +9,12 @@ package git import ( "bytes" + "context" "io" "io/ioutil" "code.gitea.io/gitea/modules/analyze" + "code.gitea.io/gitea/modules/log" "github.com/go-enry/go-enry/v2" "github.com/go-git/go-git/v5" @@ -42,9 +44,73 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } + var checker *CheckAttributeReader + + if CheckGitVersionAtLeast("1.7.8") == nil { + indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) + if err == nil { + defer deleteTemporaryFile() + + checker = &CheckAttributeReader{ + Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"}, + Repo: repo, + IndexFile: indexFilename, + } + ctx, cancel := context.WithCancel(DefaultContext) + if err := checker.Init(ctx); err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + } else { + go func() { + err = checker.Run() + if err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + cancel() + } + }() + } + defer cancel() + } + } + sizes := make(map[string]int64) err = tree.Files().ForEach(func(f *object.File) error { - if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) || + if f.Size == 0 { + return nil + } + + notVendored := false + notGenerated := false + + if checker != nil { + attrs, err := checker.CheckPath(f.Name) + if err == nil { + if vendored, has := attrs["linguist-vendored"]; has { + if vendored == "set" || vendored == "true" { + return nil + } + notVendored = vendored == "false" + } + if generated, has := attrs["linguist-generated"]; has { + if generated == "set" || generated == "true" { + return nil + } + notGenerated = generated == "false" + } + if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if len(group) == 0 { + language = group + } + + sizes[language] += f.Size + + return nil + } + } + } + + if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) || enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { return nil } @@ -54,7 +120,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err if f.Size <= bigFileSize { content, _ = readFile(f, fileSizeLimit) } - if enry.IsGenerated(f.Name, content) { + if !notGenerated && enry.IsGenerated(f.Name, content) { return nil } diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 7425e2dbb1..c3b96ea841 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -10,6 +10,7 @@ package git import ( "bufio" "bytes" + "context" "io" "math" @@ -62,13 +63,78 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } + var checker *CheckAttributeReader + + if CheckGitVersionAtLeast("1.7.8") == nil { + indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) + if err == nil { + defer deleteTemporaryFile() + + checker = &CheckAttributeReader{ + Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"}, + Repo: repo, + IndexFile: indexFilename, + } + ctx, cancel := context.WithCancel(DefaultContext) + if err := checker.Init(ctx); err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + } else { + go func() { + err = checker.Run() + if err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + cancel() + } + }() + } + defer cancel() + } + } + contentBuf := bytes.Buffer{} var content []byte sizes := make(map[string]int64) for _, f := range entries { contentBuf.Reset() content = contentBuf.Bytes() - if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || + + if f.Size() == 0 { + continue + } + + notVendored := false + notGenerated := false + + if checker != nil { + attrs, err := checker.CheckPath(f.Name()) + if err == nil { + if vendored, has := attrs["linguist-vendored"]; has { + if vendored == "set" || vendored == "true" { + continue + } + notVendored = vendored == "false" + } + if generated, has := attrs["linguist-generated"]; has { + if generated == "set" || generated == "true" { + continue + } + notGenerated = generated == "false" + } + if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if len(group) == 0 { + language = group + } + + sizes[language] += f.Size() + + continue + } + } + } + + if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) || enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) { continue } @@ -102,11 +168,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } } - if enry.IsGenerated(f.Name(), content) { + if !notGenerated && enry.IsGenerated(f.Name(), content) { continue } - // TODO: Use .gitattributes file for linguist overrides // FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary? // - eg. do the all the detection tests using filename first before reading content. language := analyze.GetCodeLanguage(f.Name(), content) diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 3462a15cca..17701094d7 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -2011,6 +2011,8 @@ diff.file_byte_size = Size diff.file_suppressed = File diff suppressed because it is too large diff.file_suppressed_line_too_long = File diff suppressed because one or more lines are too long diff.too_many_files = Some files were not shown because too many files changed in this diff +diff.generated = generated +diff.vendored = vendored diff.comment.placeholder = Leave a comment diff.comment.markdown_info = Styling with markdown is supported. diff.comment.add_single_comment = Add single comment diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 4be115f030..65d0dab4ca 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -23,6 +23,7 @@ import ( "time" "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/highlight" @@ -30,6 +31,7 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" "github.com/sergi/go-diff/diffmatchpatch" stdcharset "golang.org/x/net/html/charset" @@ -593,6 +595,8 @@ type DiffFile struct { IsIncomplete bool IsIncompleteLineTooLong bool IsProtected bool + IsGenerated bool + IsVendored bool } // GetType returns type of diff file. @@ -1268,7 +1272,81 @@ func GetDiffRangeWithWhitespaceBehavior(gitRepo *git.Repository, beforeCommitID, if err != nil { return nil, fmt.Errorf("ParsePatch: %v", err) } + + var checker *git.CheckAttributeReader + + if git.CheckGitVersionAtLeast("1.7.8") == nil { + indexFilename, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(afterCommitID) + if err == nil { + defer deleteTemporaryFile() + workdir, err := ioutil.TempDir("", "empty-work-dir") + if err != nil { + log.Error("Unable to create temporary directory: %v", err) + return nil, err + } + defer func() { + _ = util.RemoveAll(workdir) + }() + + checker = &git.CheckAttributeReader{ + Attributes: []string{"linguist-vendored", "linguist-generated"}, + Repo: gitRepo, + IndexFile: indexFilename, + WorkTree: workdir, + } + ctx, cancel := context.WithCancel(git.DefaultContext) + if err := checker.Init(ctx); err != nil { + log.Error("Unable to open checker for %s. Error: %v", afterCommitID, err) + } else { + go func() { + err = checker.Run() + if err != nil && err != ctx.Err() { + log.Error("Unable to open checker for %s. Error: %v", afterCommitID, err) + } + cancel() + }() + } + defer func() { + cancel() + }() + } + } + for _, diffFile := range diff.Files { + + gotVendor := false + gotGenerated := false + if checker != nil { + attrs, err := checker.CheckPath(diffFile.Name) + if err == nil { + if vendored, has := attrs["linguist-vendored"]; has { + if vendored == "set" || vendored == "true" { + diffFile.IsVendored = true + gotVendor = true + } else { + gotVendor = vendored == "false" + } + } + if generated, has := attrs["linguist-generated"]; has { + if generated == "set" || generated == "true" { + diffFile.IsGenerated = true + gotGenerated = true + } else { + gotGenerated = generated == "false" + } + } + } else { + log.Error("Unexpected error: %v", err) + } + } + + if !gotVendor { + diffFile.IsVendored = analyze.IsVendor(diffFile.Name) + } + if !gotGenerated { + diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name) + } + tailSection := diffFile.GetTailSection(gitRepo, beforeCommitID, afterCommitID) if tailSection != nil { diffFile.Sections = append(diffFile.Sections, tailSection) diff --git a/templates/repo/diff/box.tmpl b/templates/repo/diff/box.tmpl index 4f8f726097..8c4b972bbc 100644 --- a/templates/repo/diff/box.tmpl +++ b/templates/repo/diff/box.tmpl @@ -49,11 +49,15 @@ {{$isImage := or (call $.IsBlobAnImage $blobBase) (call $.IsBlobAnImage $blobHead)}} {{$isCsv := (call $.IsCsvFile $file)}} {{$showFileViewToggle := or $isImage (and (not $file.IsIncomplete) $isCsv)}} -
+

-
- - {{svg "octicon-chevron-down" 18}} +
+ + {{if $file.IsGenerated}} + {{svg "octicon-chevron-right" 18}} + {{else}} + {{svg "octicon-chevron-down" 18}} + {{end}}
{{if $file.IsBin}} @@ -65,6 +69,12 @@ {{end}}
{{if $file.IsRenamed}}{{$file.OldName}} → {{end}}{{$file.Name}}{{if .IsLFSFile}} ({{$.i18n.Tr "repo.stored_lfs"}}){{end}} + {{if $file.IsGenerated}} + {{$.i18n.Tr "repo.diff.generated"}} + {{end}} + {{if $file.IsVendored}} + {{$.i18n.Tr "repo.diff.vendored"}} + {{end}}
{{if $showFileViewToggle}} diff --git a/web_src/js/index.js b/web_src/js/index.js index 5ea16f44f8..78caa51fd6 100644 --- a/web_src/js/index.js +++ b/web_src/js/index.js @@ -2349,8 +2349,9 @@ function initCodeView() { } $(document).on('click', '.fold-file', ({currentTarget}) => { const box = currentTarget.closest('.file-content'); + const chevron = currentTarget.querySelector('a.chevron'); const folded = box.dataset.folded !== 'true'; - currentTarget.innerHTML = svg(`octicon-chevron-${folded ? 'right' : 'down'}`, 18); + chevron.innerHTML = svg(`octicon-chevron-${folded ? 'right' : 'down'}`, 18); box.dataset.folded = String(folded); }); $(document).on('click', '.blob-excerpt', async ({currentTarget}) => {