0
0
Fork 0
mirror of https://github.com/go-gitea/gitea synced 2024-12-27 10:44:19 +01:00
gitea/modules/repofiles/temp_repo.go
Filip Navara 2af67f6044 Improve listing performance by using go-git (#6478)
* Use go-git for tree reading and commit info lookup.

Signed-off-by: Filip Navara <navara@emclient.com>

* Use TreeEntry.IsRegular() instead of ObjectType that was removed.

Signed-off-by: Filip Navara <navara@emclient.com>

* Use the treePath to optimize commit info search.

Signed-off-by: Filip Navara <navara@emclient.com>

* Extract the latest commit at treePath along with the other commits.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix listing commit info for a directory that was created in one commit and never modified after.

Signed-off-by: Filip Navara <navara@emclient.com>

* Avoid nearly all external 'git' invocations when doing directory listing (.editorconfig code path is still hit).

Signed-off-by: Filip Navara <navara@emclient.com>

* Use go-git for reading blobs.

Signed-off-by: Filip Navara <navara@emclient.com>

* Make SHA1 type alias for plumbing.Hash in go-git.

Signed-off-by: Filip Navara <navara@emclient.com>

* Make Signature type alias for object.Signature in go-git.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix GetCommitsInfo for repository with only one commit.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix PGP signature verification.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix issues with walking commit graph across merges.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix typo in condition.

Signed-off-by: Filip Navara <navara@emclient.com>

* Speed up loading branch list by keeping the repository reference (and thus all the loaded packfile indexes).

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix lising submodules.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix build

Signed-off-by: Filip Navara <navara@emclient.com>

* Add back commit cache because of name-rev

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix tests

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix code style

* Fix spelling

* Address PR feedback

Signed-off-by: Filip Navara <navara@emclient.com>

* Update vendor module list

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix getting trees by commit id

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix remaining unit test failures

* Fix GetTreeBySHA

* Avoid running `git name-rev` if not necessary

Signed-off-by: Filip Navara <navara@emclient.com>

* Move Branch code to git module

* Clean up GPG signature verification and fix it for tagged commits

* Address PR feedback (import formatting, copyright headers)

* Make blob lookup by SHA working

* Update tests to use public API

* Allow getting content from any type of object through the blob interface

* Change test to actually expect the object content that is in the GIT repository

* Change one more test to actually expect the object content that is in the GIT repository

* Add comments
2019-04-19 20:17:27 +08:00

425 lines
13 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repofiles
import (
"bytes"
"context"
"fmt"
"io"
"os"
"os/exec"
"path"
"regexp"
"strings"
"time"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/setting"
"github.com/Unknwon/com"
)
// TemporaryUploadRepository is a type to wrap our upload repositories as a shallow clone
type TemporaryUploadRepository struct {
repo *models.Repository
gitRepo *git.Repository
basePath string
}
// NewTemporaryUploadRepository creates a new temporary upload repository
func NewTemporaryUploadRepository(repo *models.Repository) (*TemporaryUploadRepository, error) {
timeStr := com.ToStr(time.Now().Nanosecond()) // SHOULD USE SOMETHING UNIQUE
basePath := path.Join(models.LocalCopyPath(), "upload-"+timeStr+".git")
if err := os.MkdirAll(path.Dir(basePath), os.ModePerm); err != nil {
return nil, fmt.Errorf("failed to create dir %s: %v", basePath, err)
}
if repo.RepoPath() == "" {
return nil, fmt.Errorf("no path to repository on system")
}
t := &TemporaryUploadRepository{repo: repo, basePath: basePath}
return t, nil
}
// Close the repository cleaning up all files
func (t *TemporaryUploadRepository) Close() {
if _, err := os.Stat(t.basePath); !os.IsNotExist(err) {
os.RemoveAll(t.basePath)
}
}
// Clone the base repository to our path and set branch as the HEAD
func (t *TemporaryUploadRepository) Clone(branch string) error {
if _, stderr, err := process.GetManager().ExecTimeout(5*time.Minute,
fmt.Sprintf("Clone (git clone -s --bare): %s", t.basePath),
"git", "clone", "-s", "--bare", "-b", branch, t.repo.RepoPath(), t.basePath); err != nil {
if matched, _ := regexp.MatchString(".*Remote branch .* not found in upstream origin.*", stderr); matched {
return git.ErrBranchNotExist{
Name: branch,
}
} else if matched, _ := regexp.MatchString(".* repository .* does not exist.*", stderr); matched {
return models.ErrRepoNotExist{
ID: t.repo.ID,
UID: t.repo.OwnerID,
OwnerName: t.repo.OwnerName,
Name: t.repo.Name,
}
} else {
return fmt.Errorf("Clone: %v %s", err, stderr)
}
}
gitRepo, err := git.OpenRepository(t.basePath)
if err != nil {
return err
}
t.gitRepo = gitRepo
return nil
}
// SetDefaultIndex sets the git index to our HEAD
func (t *TemporaryUploadRepository) SetDefaultIndex() error {
if _, stderr, err := process.GetManager().ExecDir(5*time.Minute,
t.basePath,
fmt.Sprintf("SetDefaultIndex (git read-tree HEAD): %s", t.basePath),
"git", "read-tree", "HEAD"); err != nil {
return fmt.Errorf("SetDefaultIndex: %v %s", err, stderr)
}
return nil
}
// LsFiles checks if the given filename arguments are in the index
func (t *TemporaryUploadRepository) LsFiles(filenames ...string) ([]string, error) {
stdOut := new(bytes.Buffer)
stdErr := new(bytes.Buffer)
timeout := 5 * time.Minute
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
cmdArgs := []string{"ls-files", "-z", "--"}
for _, arg := range filenames {
if arg != "" {
cmdArgs = append(cmdArgs, arg)
}
}
cmd := exec.CommandContext(ctx, "git", cmdArgs...)
desc := fmt.Sprintf("lsFiles: (git ls-files) %v", cmdArgs)
cmd.Dir = t.basePath
cmd.Stdout = stdOut
cmd.Stderr = stdErr
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("exec(%s) failed: %v(%v)", desc, err, ctx.Err())
}
pid := process.GetManager().Add(desc, cmd)
err := cmd.Wait()
process.GetManager().Remove(pid)
if err != nil {
err = fmt.Errorf("exec(%d:%s) failed: %v(%v) stdout: %v stderr: %v", pid, desc, err, ctx.Err(), stdOut, stdErr)
return nil, err
}
filelist := make([]string, len(filenames))
for _, line := range bytes.Split(stdOut.Bytes(), []byte{'\000'}) {
filelist = append(filelist, string(line))
}
return filelist, err
}
// RemoveFilesFromIndex removes the given files from the index
func (t *TemporaryUploadRepository) RemoveFilesFromIndex(filenames ...string) error {
stdOut := new(bytes.Buffer)
stdErr := new(bytes.Buffer)
stdIn := new(bytes.Buffer)
for _, file := range filenames {
if file != "" {
stdIn.WriteString("0 0000000000000000000000000000000000000000\t")
stdIn.WriteString(file)
stdIn.WriteByte('\000')
}
}
timeout := 5 * time.Minute
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
cmdArgs := []string{"update-index", "--remove", "-z", "--index-info"}
cmd := exec.CommandContext(ctx, "git", cmdArgs...)
desc := fmt.Sprintf("removeFilesFromIndex: (git update-index) %v", filenames)
cmd.Dir = t.basePath
cmd.Stdout = stdOut
cmd.Stderr = stdErr
cmd.Stdin = bytes.NewReader(stdIn.Bytes())
if err := cmd.Start(); err != nil {
return fmt.Errorf("exec(%s) failed: %v(%v)", desc, err, ctx.Err())
}
pid := process.GetManager().Add(desc, cmd)
err := cmd.Wait()
process.GetManager().Remove(pid)
if err != nil {
err = fmt.Errorf("exec(%d:%s) failed: %v(%v) stdout: %v stderr: %v", pid, desc, err, ctx.Err(), stdOut, stdErr)
}
return err
}
// HashObject writes the provided content to the object db and returns its hash
func (t *TemporaryUploadRepository) HashObject(content io.Reader) (string, error) {
timeout := 5 * time.Minute
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
hashCmd := exec.CommandContext(ctx, "git", "hash-object", "-w", "--stdin")
hashCmd.Dir = t.basePath
hashCmd.Stdin = content
stdOutBuffer := new(bytes.Buffer)
stdErrBuffer := new(bytes.Buffer)
hashCmd.Stdout = stdOutBuffer
hashCmd.Stderr = stdErrBuffer
desc := fmt.Sprintf("hashObject: (git hash-object)")
if err := hashCmd.Start(); err != nil {
return "", fmt.Errorf("git hash-object: %s", err)
}
pid := process.GetManager().Add(desc, hashCmd)
err := hashCmd.Wait()
process.GetManager().Remove(pid)
if err != nil {
err = fmt.Errorf("exec(%d:%s) failed: %v(%v) stdout: %v stderr: %v", pid, desc, err, ctx.Err(), stdOutBuffer, stdErrBuffer)
return "", err
}
return strings.TrimSpace(stdOutBuffer.String()), nil
}
// AddObjectToIndex adds the provided object hash to the index with the provided mode and path
func (t *TemporaryUploadRepository) AddObjectToIndex(mode, objectHash, objectPath string) error {
if _, stderr, err := process.GetManager().ExecDir(5*time.Minute,
t.basePath,
fmt.Sprintf("addObjectToIndex (git update-index): %s", t.basePath),
"git", "update-index", "--add", "--replace", "--cacheinfo", mode, objectHash, objectPath); err != nil {
if matched, _ := regexp.MatchString(".*Invalid path '.*", stderr); matched {
return models.ErrFilePathInvalid{
Message: objectPath,
Path: objectPath,
}
}
return fmt.Errorf("git update-index: %s", stderr)
}
return nil
}
// WriteTree writes the current index as a tree to the object db and returns its hash
func (t *TemporaryUploadRepository) WriteTree() (string, error) {
treeHash, stderr, err := process.GetManager().ExecDir(5*time.Minute,
t.basePath,
fmt.Sprintf("WriteTree (git write-tree): %s", t.basePath),
"git", "write-tree")
if err != nil {
return "", fmt.Errorf("git write-tree: %s", stderr)
}
return strings.TrimSpace(treeHash), nil
}
// GetLastCommit gets the last commit ID SHA of the repo
func (t *TemporaryUploadRepository) GetLastCommit() (string, error) {
return t.GetLastCommitByRef("HEAD")
}
// GetLastCommitByRef gets the last commit ID SHA of the repo by ref
func (t *TemporaryUploadRepository) GetLastCommitByRef(ref string) (string, error) {
if ref == "" {
ref = "HEAD"
}
treeHash, stderr, err := process.GetManager().ExecDir(5*time.Minute,
t.basePath,
fmt.Sprintf("GetLastCommit (git rev-parse %s): %s", ref, t.basePath),
"git", "rev-parse", ref)
if err != nil {
return "", fmt.Errorf("git rev-parse %s: %s", ref, stderr)
}
return strings.TrimSpace(treeHash), nil
}
// CommitTree creates a commit from a given tree for the user with provided message
func (t *TemporaryUploadRepository) CommitTree(author, committer *models.User, treeHash string, message string) (string, error) {
commitTimeStr := time.Now().Format(time.UnixDate)
authorSig := author.NewGitSig()
committerSig := committer.NewGitSig()
// FIXME: Should we add SSH_ORIGINAL_COMMAND to this
// Because this may call hooks we should pass in the environment
env := append(os.Environ(),
"GIT_AUTHOR_NAME="+authorSig.Name,
"GIT_AUTHOR_EMAIL="+authorSig.Email,
"GIT_AUTHOR_DATE="+commitTimeStr,
"GIT_COMMITTER_NAME="+committerSig.Name,
"GIT_COMMITTER_EMAIL="+committerSig.Email,
"GIT_COMMITTER_DATE="+commitTimeStr,
)
commitHash, stderr, err := process.GetManager().ExecDirEnv(5*time.Minute,
t.basePath,
fmt.Sprintf("commitTree (git commit-tree): %s", t.basePath),
env,
"git", "commit-tree", treeHash, "-p", "HEAD", "-m", message)
if err != nil {
return "", fmt.Errorf("git commit-tree: %s", stderr)
}
return strings.TrimSpace(commitHash), nil
}
// Push the provided commitHash to the repository branch by the provided user
func (t *TemporaryUploadRepository) Push(doer *models.User, commitHash string, branch string) error {
isWiki := "false"
if strings.HasSuffix(t.repo.Name, ".wiki") {
isWiki = "true"
}
sig := doer.NewGitSig()
// FIXME: Should we add SSH_ORIGINAL_COMMAND to this
// Because calls hooks we need to pass in the environment
env := append(os.Environ(),
"GIT_AUTHOR_NAME="+sig.Name,
"GIT_AUTHOR_EMAIL="+sig.Email,
"GIT_COMMITTER_NAME="+sig.Name,
"GIT_COMMITTER_EMAIL="+sig.Email,
models.EnvRepoName+"="+t.repo.Name,
models.EnvRepoUsername+"="+t.repo.OwnerName,
models.EnvRepoIsWiki+"="+isWiki,
models.EnvPusherName+"="+doer.Name,
models.EnvPusherID+"="+fmt.Sprintf("%d", doer.ID),
models.ProtectedBranchRepoID+"="+fmt.Sprintf("%d", t.repo.ID),
)
if _, stderr, err := process.GetManager().ExecDirEnv(5*time.Minute,
t.basePath,
fmt.Sprintf("actuallyPush (git push): %s", t.basePath),
env,
"git", "push", t.repo.RepoPath(), strings.TrimSpace(commitHash)+":refs/heads/"+strings.TrimSpace(branch)); err != nil {
return fmt.Errorf("git push: %s", stderr)
}
return nil
}
// DiffIndex returns a Diff of the current index to the head
func (t *TemporaryUploadRepository) DiffIndex() (diff *models.Diff, err error) {
timeout := 5 * time.Minute
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
stdErr := new(bytes.Buffer)
cmd := exec.CommandContext(ctx, "git", "diff-index", "--cached", "-p", "HEAD")
cmd.Dir = t.basePath
cmd.Stderr = stdErr
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("StdoutPipe: %v stderr %s", err, stdErr.String())
}
if err = cmd.Start(); err != nil {
return nil, fmt.Errorf("Start: %v stderr %s", err, stdErr.String())
}
pid := process.GetManager().Add(fmt.Sprintf("diffIndex [repo_path: %s]", t.repo.RepoPath()), cmd)
defer process.GetManager().Remove(pid)
diff, err = models.ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, stdout)
if err != nil {
return nil, fmt.Errorf("ParsePatch: %v", err)
}
if err = cmd.Wait(); err != nil {
return nil, fmt.Errorf("Wait: %v", err)
}
return diff, nil
}
// CheckAttribute checks the given attribute of the provided files
func (t *TemporaryUploadRepository) CheckAttribute(attribute string, args ...string) (map[string]map[string]string, error) {
stdOut := new(bytes.Buffer)
stdErr := new(bytes.Buffer)
timeout := 5 * time.Minute
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
cmdArgs := []string{"check-attr", "-z", attribute, "--cached", "--"}
for _, arg := range args {
if arg != "" {
cmdArgs = append(cmdArgs, arg)
}
}
cmd := exec.CommandContext(ctx, "git", cmdArgs...)
desc := fmt.Sprintf("checkAttr: (git check-attr) %s %v", attribute, cmdArgs)
cmd.Dir = t.basePath
cmd.Stdout = stdOut
cmd.Stderr = stdErr
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("exec(%s) failed: %v(%v)", desc, err, ctx.Err())
}
pid := process.GetManager().Add(desc, cmd)
err := cmd.Wait()
process.GetManager().Remove(pid)
if err != nil {
err = fmt.Errorf("exec(%d:%s) failed: %v(%v) stdout: %v stderr: %v", pid, desc, err, ctx.Err(), stdOut, stdErr)
return nil, err
}
fields := bytes.Split(stdOut.Bytes(), []byte{'\000'})
if len(fields)%3 != 1 {
return nil, fmt.Errorf("Wrong number of fields in return from check-attr")
}
var name2attribute2info = make(map[string]map[string]string)
for i := 0; i < (len(fields) / 3); i++ {
filename := string(fields[3*i])
attribute := string(fields[3*i+1])
info := string(fields[3*i+2])
attribute2info := name2attribute2info[filename]
if attribute2info == nil {
attribute2info = make(map[string]string)
}
attribute2info[attribute] = info
name2attribute2info[filename] = attribute2info
}
return name2attribute2info, err
}
// GetBranchCommit Gets the commit object of the given branch
func (t *TemporaryUploadRepository) GetBranchCommit(branch string) (*git.Commit, error) {
if t.gitRepo == nil {
return nil, fmt.Errorf("repository has not been cloned")
}
return t.gitRepo.GetBranchCommit(branch)
}
// GetCommit Gets the commit object of the given commit ID
func (t *TemporaryUploadRepository) GetCommit(commitID string) (*git.Commit, error) {
if t.gitRepo == nil {
return nil, fmt.Errorf("repository has not been cloned")
}
return t.gitRepo.GetCommit(commitID)
}