mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-11-09 11:21:19 +01:00
f1e85622da
The current TestPatch conflict code uses a plain git apply which does not properly account for 3-way merging. However, we can improve things using `git read-tree -m` to do a three-way merge then follow the algorithm used in merge-one-file. We can also use `--patience` and/or `--histogram` to generate a nicer diff for applying patches too. Fix #13679 Fix #6417 Signed-off-by: Andrew Thornton <art27@cantab.net>
505 lines
16 KiB
Go
505 lines
16 KiB
Go
// Copyright 2019 The Gitea Authors.
|
|
// All rights reserved.
|
|
// Use of this source code is governed by a MIT-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package pull
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"code.gitea.io/gitea/models"
|
|
"code.gitea.io/gitea/models/unit"
|
|
"code.gitea.io/gitea/modules/git"
|
|
"code.gitea.io/gitea/modules/graceful"
|
|
"code.gitea.io/gitea/modules/log"
|
|
"code.gitea.io/gitea/modules/process"
|
|
"code.gitea.io/gitea/modules/util"
|
|
|
|
"github.com/gobwas/glob"
|
|
)
|
|
|
|
// DownloadDiffOrPatch will write the patch for the pr to the writer
|
|
func DownloadDiffOrPatch(pr *models.PullRequest, w io.Writer, patch, binary bool) error {
|
|
if err := pr.LoadBaseRepo(); err != nil {
|
|
log.Error("Unable to load base repository ID %d for pr #%d [%d]", pr.BaseRepoID, pr.Index, pr.ID)
|
|
return err
|
|
}
|
|
|
|
gitRepo, err := git.OpenRepository(pr.BaseRepo.RepoPath())
|
|
if err != nil {
|
|
return fmt.Errorf("OpenRepository: %v", err)
|
|
}
|
|
defer gitRepo.Close()
|
|
if err := gitRepo.GetDiffOrPatch(pr.MergeBase, pr.GetGitRefName(), w, patch, binary); err != nil {
|
|
log.Error("Unable to get patch file from %s to %s in %s Error: %v", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err)
|
|
return fmt.Errorf("Unable to get patch file from %s to %s in %s Error: %v", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var patchErrorSuffices = []string{
|
|
": already exists in index",
|
|
": patch does not apply",
|
|
": already exists in working directory",
|
|
"unrecognized input",
|
|
}
|
|
|
|
// TestPatch will test whether a simple patch will apply
|
|
func TestPatch(pr *models.PullRequest) error {
|
|
// Clone base repo.
|
|
tmpBasePath, err := createTemporaryRepo(pr)
|
|
if err != nil {
|
|
log.Error("CreateTemporaryPath: %v", err)
|
|
return err
|
|
}
|
|
defer func() {
|
|
if err := models.RemoveTemporaryPath(tmpBasePath); err != nil {
|
|
log.Error("Merge: RemoveTemporaryPath: %s", err)
|
|
}
|
|
}()
|
|
|
|
gitRepo, err := git.OpenRepository(tmpBasePath)
|
|
if err != nil {
|
|
return fmt.Errorf("OpenRepository: %v", err)
|
|
}
|
|
defer gitRepo.Close()
|
|
|
|
// 1. update merge base
|
|
pr.MergeBase, err = git.NewCommand("merge-base", "--", "base", "tracking").RunInDir(tmpBasePath)
|
|
if err != nil {
|
|
var err2 error
|
|
pr.MergeBase, err2 = gitRepo.GetRefCommitID(git.BranchPrefix + "base")
|
|
if err2 != nil {
|
|
return fmt.Errorf("GetMergeBase: %v and can't find commit ID for base: %v", err, err2)
|
|
}
|
|
}
|
|
pr.MergeBase = strings.TrimSpace(pr.MergeBase)
|
|
|
|
// 2. Check for conflicts
|
|
if conflicts, err := checkConflicts(pr, gitRepo, tmpBasePath); err != nil || conflicts || pr.Status == models.PullRequestStatusEmpty {
|
|
return err
|
|
}
|
|
|
|
// 3. Check for protected files changes
|
|
if err = checkPullFilesProtection(pr, gitRepo); err != nil {
|
|
return fmt.Errorf("pr.CheckPullFilesProtection(): %v", err)
|
|
}
|
|
|
|
if len(pr.ChangedProtectedFiles) > 0 {
|
|
log.Trace("Found %d protected files changed", len(pr.ChangedProtectedFiles))
|
|
}
|
|
|
|
pr.Status = models.PullRequestStatusMergeable
|
|
|
|
return nil
|
|
}
|
|
|
|
type errMergeConflict struct {
|
|
filename string
|
|
}
|
|
|
|
func (e *errMergeConflict) Error() string {
|
|
return fmt.Sprintf("conflict detected at: %s", e.filename)
|
|
}
|
|
|
|
func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, gitRepo *git.Repository) error {
|
|
switch {
|
|
case file.stage1 != nil && (file.stage2 == nil || file.stage3 == nil):
|
|
// 1. Deleted in one or both:
|
|
//
|
|
// Conflict <==> the stage1 !SameAs to the undeleted one
|
|
if (file.stage2 != nil && !file.stage1.SameAs(file.stage2)) || (file.stage3 != nil && !file.stage1.SameAs(file.stage3)) {
|
|
// Conflict!
|
|
return &errMergeConflict{file.stage1.path}
|
|
}
|
|
|
|
// Not a genuine conflict and we can simply remove the file from the index
|
|
return gitRepo.RemoveFilesFromIndex(file.stage1.path)
|
|
case file.stage1 == nil && file.stage2 != nil && (file.stage3 == nil || file.stage2.SameAs(file.stage3)):
|
|
// 2. Added in ours but not in theirs or identical in both
|
|
//
|
|
// Not a genuine conflict just add to the index
|
|
if err := gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(file.stage2.sha), file.stage2.path); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
case file.stage1 == nil && file.stage2 != nil && file.stage3 != nil && file.stage2.sha == file.stage3.sha && file.stage2.mode != file.stage3.mode:
|
|
// 3. Added in both with the same sha but the modes are different
|
|
//
|
|
// Conflict! (Not sure that this can actually happen but we should handle)
|
|
return &errMergeConflict{file.stage2.path}
|
|
case file.stage1 == nil && file.stage2 == nil && file.stage3 != nil:
|
|
// 4. Added in theirs but not ours:
|
|
//
|
|
// Not a genuine conflict just add to the index
|
|
return gitRepo.AddObjectToIndex(file.stage3.mode, git.MustIDFromString(file.stage3.sha), file.stage3.path)
|
|
case file.stage1 == nil:
|
|
// 5. Created by new in both
|
|
//
|
|
// Conflict!
|
|
return &errMergeConflict{file.stage2.path}
|
|
case file.stage2 != nil && file.stage3 != nil:
|
|
// 5. Modified in both - we should try to merge in the changes but first:
|
|
//
|
|
if file.stage2.mode == "120000" || file.stage3.mode == "120000" {
|
|
// 5a. Conflicting symbolic link change
|
|
return &errMergeConflict{file.stage2.path}
|
|
}
|
|
if file.stage2.mode == "160000" || file.stage3.mode == "160000" {
|
|
// 5b. Conflicting submodule change
|
|
return &errMergeConflict{file.stage2.path}
|
|
}
|
|
if file.stage2.mode != file.stage3.mode {
|
|
// 5c. Conflicting mode change
|
|
return &errMergeConflict{file.stage2.path}
|
|
}
|
|
|
|
// Need to get the objects from the object db to attempt to merge
|
|
root, err := git.NewCommandContext(ctx, "unpack-file", file.stage1.sha).RunInDir(tmpBasePath)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to get root object: %s at path: %s for merging. Error: %w", file.stage1.sha, file.stage1.path, err)
|
|
}
|
|
root = strings.TrimSpace(root)
|
|
defer func() {
|
|
_ = util.Remove(filepath.Join(tmpBasePath, root))
|
|
}()
|
|
|
|
base, err := git.NewCommandContext(ctx, "unpack-file", file.stage2.sha).RunInDir(tmpBasePath)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to get base object: %s at path: %s for merging. Error: %w", file.stage2.sha, file.stage2.path, err)
|
|
}
|
|
base = strings.TrimSpace(filepath.Join(tmpBasePath, base))
|
|
defer func() {
|
|
_ = util.Remove(base)
|
|
}()
|
|
head, err := git.NewCommandContext(ctx, "unpack-file", file.stage3.sha).RunInDir(tmpBasePath)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to get head object:%s at path: %s for merging. Error: %w", file.stage3.sha, file.stage3.path, err)
|
|
}
|
|
head = strings.TrimSpace(head)
|
|
defer func() {
|
|
_ = util.Remove(filepath.Join(tmpBasePath, head))
|
|
}()
|
|
|
|
// now git merge-file annoyingly takes a different order to the merge-tree ...
|
|
_, conflictErr := git.NewCommandContext(ctx, "merge-file", base, root, head).RunInDir(tmpBasePath)
|
|
if conflictErr != nil {
|
|
return &errMergeConflict{file.stage2.path}
|
|
}
|
|
|
|
// base now contains the merged data
|
|
hash, err := git.NewCommandContext(ctx, "hash-object", "-w", "--path", file.stage2.path, base).RunInDir(tmpBasePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
hash = strings.TrimSpace(hash)
|
|
return gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(hash), file.stage2.path)
|
|
default:
|
|
if file.stage1 != nil {
|
|
return &errMergeConflict{file.stage1.path}
|
|
} else if file.stage2 != nil {
|
|
return &errMergeConflict{file.stage2.path}
|
|
} else if file.stage3 != nil {
|
|
return &errMergeConflict{file.stage3.path}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func checkConflicts(pr *models.PullRequest, gitRepo *git.Repository, tmpBasePath string) (bool, error) {
|
|
ctx, cancel, finished := process.GetManager().AddContext(graceful.GetManager().HammerContext(), fmt.Sprintf("checkConflicts: pr[%d] %s/%s#%d", pr.ID, pr.BaseRepo.OwnerName, pr.BaseRepo.Name, pr.Index))
|
|
defer finished()
|
|
|
|
// First we use read-tree to do a simple three-way merge
|
|
if _, err := git.NewCommandContext(ctx, "read-tree", "-m", pr.MergeBase, "base", "tracking").RunInDir(tmpBasePath); err != nil {
|
|
log.Error("Unable to run read-tree -m! Error: %v", err)
|
|
return false, fmt.Errorf("unable to run read-tree -m! Error: %v", err)
|
|
}
|
|
|
|
// Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles
|
|
unmerged := make(chan *unmergedFile)
|
|
go unmergedFiles(ctx, tmpBasePath, unmerged)
|
|
|
|
defer func() {
|
|
cancel()
|
|
for range unmerged {
|
|
// empty the unmerged channel
|
|
}
|
|
}()
|
|
|
|
numberOfConflicts := 0
|
|
conflict := false
|
|
|
|
for file := range unmerged {
|
|
if file == nil {
|
|
break
|
|
}
|
|
if file.err != nil {
|
|
cancel()
|
|
return false, file.err
|
|
}
|
|
|
|
// OK now we have the unmerged file triplet attempt to merge it
|
|
if err := attemptMerge(ctx, file, tmpBasePath, gitRepo); err != nil {
|
|
if conflictErr, ok := err.(*errMergeConflict); ok {
|
|
log.Trace("Conflict: %s in PR[%d] %s/%s#%d", conflictErr.filename, pr.ID, pr.BaseRepo.OwnerName, pr.BaseRepo.Name, pr.Index)
|
|
conflict = true
|
|
if numberOfConflicts < 10 {
|
|
pr.ConflictedFiles = append(pr.ConflictedFiles, conflictErr.filename)
|
|
}
|
|
numberOfConflicts++
|
|
continue
|
|
}
|
|
return false, err
|
|
}
|
|
}
|
|
|
|
if !conflict {
|
|
treeHash, err := git.NewCommandContext(ctx, "write-tree").RunInDir(tmpBasePath)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
treeHash = strings.TrimSpace(treeHash)
|
|
baseTree, err := gitRepo.GetTree("base")
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if treeHash == baseTree.ID.String() {
|
|
log.Debug("PullRequest[%d]: Patch is empty - ignoring", pr.ID)
|
|
pr.Status = models.PullRequestStatusEmpty
|
|
pr.ConflictedFiles = []string{}
|
|
pr.ChangedProtectedFiles = []string{}
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
// OK read-tree has failed so we need to try a different thing - this might actually succeed where the above fails due to whitespace handling.
|
|
|
|
// 1. Create a plain patch from head to base
|
|
tmpPatchFile, err := os.CreateTemp("", "patch")
|
|
if err != nil {
|
|
log.Error("Unable to create temporary patch file! Error: %v", err)
|
|
return false, fmt.Errorf("unable to create temporary patch file! Error: %v", err)
|
|
}
|
|
defer func() {
|
|
_ = util.Remove(tmpPatchFile.Name())
|
|
}()
|
|
|
|
if err := gitRepo.GetDiffBinary(pr.MergeBase, "tracking", tmpPatchFile); err != nil {
|
|
tmpPatchFile.Close()
|
|
log.Error("Unable to get patch file from %s to %s in %s Error: %v", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err)
|
|
return false, fmt.Errorf("unable to get patch file from %s to %s in %s Error: %v", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err)
|
|
}
|
|
stat, err := tmpPatchFile.Stat()
|
|
if err != nil {
|
|
tmpPatchFile.Close()
|
|
return false, fmt.Errorf("unable to stat patch file: %v", err)
|
|
}
|
|
patchPath := tmpPatchFile.Name()
|
|
tmpPatchFile.Close()
|
|
|
|
// 1a. if the size of that patch is 0 - there can be no conflicts!
|
|
if stat.Size() == 0 {
|
|
log.Debug("PullRequest[%d]: Patch is empty - ignoring", pr.ID)
|
|
pr.Status = models.PullRequestStatusEmpty
|
|
pr.ConflictedFiles = []string{}
|
|
pr.ChangedProtectedFiles = []string{}
|
|
return false, nil
|
|
}
|
|
|
|
log.Trace("PullRequest[%d].testPatch (patchPath): %s", pr.ID, patchPath)
|
|
|
|
// 2. preset the pr.Status as checking (this is not save at present)
|
|
pr.Status = models.PullRequestStatusChecking
|
|
|
|
// 3. Read the base branch in to the index of the temporary repository
|
|
_, err = git.NewCommand("read-tree", "base").RunInDir(tmpBasePath)
|
|
if err != nil {
|
|
return false, fmt.Errorf("git read-tree %s: %v", pr.BaseBranch, err)
|
|
}
|
|
|
|
// 4. Now get the pull request configuration to check if we need to ignore whitespace
|
|
prUnit, err := pr.BaseRepo.GetUnit(unit.TypePullRequests)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
prConfig := prUnit.PullRequestsConfig()
|
|
|
|
// 5. Prepare the arguments to apply the patch against the index
|
|
args := []string{"apply", "--check", "--cached"}
|
|
if prConfig.IgnoreWhitespaceConflicts {
|
|
args = append(args, "--ignore-whitespace")
|
|
}
|
|
if git.CheckGitVersionAtLeast("2.32.0") == nil {
|
|
args = append(args, "--3way")
|
|
}
|
|
args = append(args, patchPath)
|
|
pr.ConflictedFiles = make([]string, 0, 5)
|
|
|
|
// 6. Prep the pipe:
|
|
// - Here we could do the equivalent of:
|
|
// `git apply --check --cached patch_file > conflicts`
|
|
// Then iterate through the conflicts. However, that means storing all the conflicts
|
|
// in memory - which is very wasteful.
|
|
// - alternatively we can do the equivalent of:
|
|
// `git apply --check ... | grep ...`
|
|
// meaning we don't store all of the conflicts unnecessarily.
|
|
stderrReader, stderrWriter, err := os.Pipe()
|
|
if err != nil {
|
|
log.Error("Unable to open stderr pipe: %v", err)
|
|
return false, fmt.Errorf("unable to open stderr pipe: %v", err)
|
|
}
|
|
defer func() {
|
|
_ = stderrReader.Close()
|
|
_ = stderrWriter.Close()
|
|
}()
|
|
|
|
// 7. Run the check command
|
|
conflict = false
|
|
err = git.NewCommand(args...).
|
|
RunInDirTimeoutEnvFullPipelineFunc(
|
|
nil, -1, tmpBasePath,
|
|
nil, stderrWriter, nil,
|
|
func(ctx context.Context, cancel context.CancelFunc) error {
|
|
// Close the writer end of the pipe to begin processing
|
|
_ = stderrWriter.Close()
|
|
defer func() {
|
|
// Close the reader on return to terminate the git command if necessary
|
|
_ = stderrReader.Close()
|
|
}()
|
|
|
|
const prefix = "error: patch failed:"
|
|
const errorPrefix = "error: "
|
|
|
|
conflictMap := map[string]bool{}
|
|
|
|
// Now scan the output from the command
|
|
scanner := bufio.NewScanner(stderrReader)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
if strings.HasPrefix(line, prefix) {
|
|
conflict = true
|
|
filepath := strings.TrimSpace(strings.Split(line[len(prefix):], ":")[0])
|
|
conflictMap[filepath] = true
|
|
} else if strings.HasPrefix(line, errorPrefix) {
|
|
conflict = true
|
|
for _, suffix := range patchErrorSuffices {
|
|
if strings.HasSuffix(line, suffix) {
|
|
filepath := strings.TrimSpace(strings.TrimSuffix(line[len(errorPrefix):], suffix))
|
|
if filepath != "" {
|
|
conflictMap[filepath] = true
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
// only list 10 conflicted files
|
|
if len(conflictMap) >= 10 {
|
|
break
|
|
}
|
|
}
|
|
|
|
if len(conflictMap) > 0 {
|
|
pr.ConflictedFiles = make([]string, 0, len(conflictMap))
|
|
for key := range conflictMap {
|
|
pr.ConflictedFiles = append(pr.ConflictedFiles, key)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
// 8. If there is a conflict the `git apply` command will return a non-zero error code - so there will be a positive error.
|
|
if err != nil {
|
|
if conflict {
|
|
pr.Status = models.PullRequestStatusConflict
|
|
log.Trace("Found %d files conflicted: %v", len(pr.ConflictedFiles), pr.ConflictedFiles)
|
|
|
|
return true, nil
|
|
}
|
|
return false, fmt.Errorf("git apply --check: %v", err)
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
// CheckFileProtection check file Protection
|
|
func CheckFileProtection(oldCommitID, newCommitID string, patterns []glob.Glob, limit int, env []string, repo *git.Repository) ([]string, error) {
|
|
if len(patterns) == 0 {
|
|
return nil, nil
|
|
}
|
|
affectedFiles, err := git.GetAffectedFiles(oldCommitID, newCommitID, env, repo)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
changedProtectedFiles := make([]string, 0, limit)
|
|
for _, affectedFile := range affectedFiles {
|
|
lpath := strings.ToLower(affectedFile)
|
|
for _, pat := range patterns {
|
|
if pat.Match(lpath) {
|
|
changedProtectedFiles = append(changedProtectedFiles, lpath)
|
|
break
|
|
}
|
|
}
|
|
if len(changedProtectedFiles) >= limit {
|
|
break
|
|
}
|
|
}
|
|
if len(changedProtectedFiles) > 0 {
|
|
err = models.ErrFilePathProtected{
|
|
Path: changedProtectedFiles[0],
|
|
}
|
|
}
|
|
return changedProtectedFiles, err
|
|
}
|
|
|
|
// CheckUnprotectedFiles check if the commit only touches unprotected files
|
|
func CheckUnprotectedFiles(oldCommitID, newCommitID string, patterns []glob.Glob, env []string, repo *git.Repository) (bool, error) {
|
|
if len(patterns) == 0 {
|
|
return false, nil
|
|
}
|
|
affectedFiles, err := git.GetAffectedFiles(oldCommitID, newCommitID, env, repo)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
for _, affectedFile := range affectedFiles {
|
|
lpath := strings.ToLower(affectedFile)
|
|
unprotected := false
|
|
for _, pat := range patterns {
|
|
if pat.Match(lpath) {
|
|
unprotected = true
|
|
break
|
|
}
|
|
}
|
|
if !unprotected {
|
|
return false, nil
|
|
}
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// checkPullFilesProtection check if pr changed protected files and save results
|
|
func checkPullFilesProtection(pr *models.PullRequest, gitRepo *git.Repository) error {
|
|
if err := pr.LoadProtectedBranch(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if pr.ProtectedBranch == nil {
|
|
pr.ChangedProtectedFiles = nil
|
|
return nil
|
|
}
|
|
|
|
var err error
|
|
pr.ChangedProtectedFiles, err = CheckFileProtection(pr.MergeBase, "tracking", pr.ProtectedBranch.GetProtectedFilePatterns(), 10, os.Environ(), gitRepo)
|
|
if err != nil && !models.IsErrFilePathProtected(err) {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|