More efficiently parse shas for shaPostProcessor (#16101)

* More efficiently parse shas for shaPostProcessor

The shaPostProcessor currently repeatedly calls git rev-parse --verify on both backends
which is fine if there is only one thing that matches a sha - however if there are
multiple things then this becomes wildly inefficient.

This PR provides functions for both backends which are much faster to use.

Fix #16092

* Add ShaExistCache to RenderContext

Signed-off-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: 6543 <6543@obermui.de>
This commit is contained in:
zeripath 2021-06-20 23:39:12 +01:00 committed by GitHub
parent 23358bc55d
commit 196593e2e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 122 additions and 10 deletions

View file

@ -13,6 +13,30 @@ import (
"github.com/go-git/go-git/v5/plumbing"
)
// IsObjectExist returns true if given reference exists in the repository.
func (repo *Repository) IsObjectExist(name string) bool {
if name == "" {
return false
}
_, err := repo.gogitRepo.ResolveRevision(plumbing.Revision(name))
return err == nil
}
// IsReferenceExist returns true if given reference exists in the repository.
func (repo *Repository) IsReferenceExist(name string) bool {
if name == "" {
return false
}
reference, err := repo.gogitRepo.Reference(plumbing.ReferenceName(name), true)
if err != nil {
return false
}
return reference.Type() != plumbing.InvalidReference
}
// IsBranchExist returns true if given branch exists in current repository.
func (repo *Repository) IsBranchExist(name string) bool {
if name == "" {

View file

@ -9,10 +9,28 @@ package git
import (
"bufio"
"bytes"
"io"
"strings"
)
// IsObjectExist returns true if given reference exists in the repository.
func (repo *Repository) IsObjectExist(name string) bool {
if name == "" {
return false
}
wr, rd, cancel := repo.CatFileBatchCheck()
defer cancel()
_, err := wr.Write([]byte(name + "\n"))
if err != nil {
log("Error writing to CatFileBatchCheck %v", err)
return false
}
sha, _, _, err := ReadBatchLine(rd)
return err == nil && bytes.HasPrefix(sha, []byte(strings.TrimSpace(name)))
}
// IsReferenceExist returns true if given reference exists in the repository.
func (repo *Repository) IsReferenceExist(name string) bool {
if name == "" {

View file

@ -286,6 +286,7 @@ var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM
var nulCleaner = strings.NewReplacer("\000", "")
func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error {
defer ctx.Cancel()
// FIXME: don't read all content to memory
rawHTML, err := ioutil.ReadAll(input)
if err != nil {
@ -996,6 +997,9 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
start := 0
next := node.NextSibling
if ctx.ShaExistCache == nil {
ctx.ShaExistCache = make(map[string]bool)
}
for node != nil && node != next && start < len(node.Data) {
m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:])
if m == nil {
@ -1013,10 +1017,28 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
// as used by git and github for linking and thus we have to do similar.
// Because of this, we check to make sure that a matched hash is actually
// a commit in the repository before making it a link.
if _, err := git.NewCommand("rev-parse", "--verify", hash).RunInDirBytes(ctx.Metas["repoPath"]); err != nil {
if !strings.Contains(err.Error(), "fatal: Needed a single revision") {
log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err)
// check cache first
exist, inCache := ctx.ShaExistCache[hash]
if !inCache {
if ctx.GitRepo == nil {
var err error
ctx.GitRepo, err = git.OpenRepository(ctx.Metas["repoPath"])
if err != nil {
log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err)
return
}
ctx.AddCancel(func() {
ctx.GitRepo.Close()
ctx.GitRepo = nil
})
}
exist = ctx.GitRepo.IsObjectExist(hash)
ctx.ShaExistCache[hash] = exist
}
if !exist {
start = m[3]
continue
}

View file

@ -13,6 +13,7 @@ import (
"strings"
"sync"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
)
@ -35,13 +36,44 @@ func Init() {
// RenderContext represents a render context
type RenderContext struct {
Ctx context.Context
Filename string
Type string
IsWiki bool
URLPrefix string
Metas map[string]string
DefaultLink string
Ctx context.Context
Filename string
Type string
IsWiki bool
URLPrefix string
Metas map[string]string
DefaultLink string
GitRepo *git.Repository
ShaExistCache map[string]bool
cancelFn func()
}
// Cancel runs any cleanup functions that have been registered for this Ctx
func (ctx *RenderContext) Cancel() {
if ctx == nil {
return
}
ctx.ShaExistCache = map[string]bool{}
if ctx.cancelFn == nil {
return
}
ctx.cancelFn()
}
// AddCancel adds the provided fn as a Cleanup for this Ctx
func (ctx *RenderContext) AddCancel(fn func()) {
if ctx == nil {
return
}
oldCancelFn := ctx.cancelFn
if oldCancelFn == nil {
ctx.cancelFn = fn
return
}
ctx.cancelFn = func() {
defer oldCancelFn()
fn()
}
}
// Renderer defines an interface for rendering markup file to HTML

View file

@ -41,6 +41,7 @@ func Home(ctx *context.Context) {
desc, err := markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: map[string]string{"mode": "document"},
GitRepo: ctx.Repo.GitRepo,
}, org.Description)
if err != nil {
ctx.ServerError("RenderString", err)

View file

@ -1137,6 +1137,7 @@ func ViewIssue(ctx *context.Context) {
issue.RenderedContent, err = markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, issue.Content)
if err != nil {
ctx.ServerError("RenderString", err)
@ -1301,6 +1302,7 @@ func ViewIssue(ctx *context.Context) {
comment.RenderedContent, err = markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, comment.Content)
if err != nil {
ctx.ServerError("RenderString", err)
@ -1376,6 +1378,7 @@ func ViewIssue(ctx *context.Context) {
comment.RenderedContent, err = markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, comment.Content)
if err != nil {
ctx.ServerError("RenderString", err)
@ -1734,6 +1737,7 @@ func UpdateIssueContent(ctx *context.Context) {
content, err := markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Query("context"),
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, issue.Content)
if err != nil {
ctx.ServerError("RenderString", err)
@ -2161,6 +2165,7 @@ func UpdateCommentContent(ctx *context.Context) {
content, err := markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Query("context"),
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, comment.Content)
if err != nil {
ctx.ServerError("RenderString", err)

View file

@ -88,6 +88,7 @@ func Milestones(ctx *context.Context) {
m.RenderedContent, err = markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, m.Content)
if err != nil {
ctx.ServerError("RenderString", err)
@ -280,6 +281,7 @@ func MilestoneIssuesAndPulls(ctx *context.Context) {
milestone.RenderedContent, err = markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, milestone.Content)
if err != nil {
ctx.ServerError("RenderString", err)

View file

@ -81,6 +81,7 @@ func Projects(ctx *context.Context) {
projects[i].RenderedContent, err = markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, projects[i].Description)
if err != nil {
ctx.ServerError("RenderString", err)
@ -322,6 +323,7 @@ func ViewProject(ctx *context.Context) {
project.RenderedContent, err = markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, project.Description)
if err != nil {
ctx.ServerError("RenderString", err)

View file

@ -145,6 +145,7 @@ func releasesOrTags(ctx *context.Context, isTagList bool) {
r.Note, err = markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, r.Note)
if err != nil {
ctx.ServerError("RenderString", err)
@ -213,6 +214,7 @@ func SingleRelease(ctx *context.Context) {
release.Note, err = markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: ctx.Repo.Repository.ComposeMetas(),
GitRepo: ctx.Repo.GitRepo,
}, release.Note)
if err != nil {
ctx.ServerError("RenderString", err)

View file

@ -338,6 +338,7 @@ func renderDirectory(ctx *context.Context, treeLink string) {
Filename: readmeFile.name,
URLPrefix: readmeTreelink,
Metas: ctx.Repo.Repository.ComposeDocumentMetas(),
GitRepo: ctx.Repo.GitRepo,
}, rd, &result)
if err != nil {
log.Error("Render failed: %v then fallback", err)
@ -512,6 +513,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
Filename: blob.Name(),
URLPrefix: path.Dir(treeLink),
Metas: ctx.Repo.Repository.ComposeDocumentMetas(),
GitRepo: ctx.Repo.GitRepo,
}, rd, &result)
if err != nil {
ctx.ServerError("Render", err)
@ -570,6 +572,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
Filename: blob.Name(),
URLPrefix: path.Dir(treeLink),
Metas: ctx.Repo.Repository.ComposeDocumentMetas(),
GitRepo: ctx.Repo.GitRepo,
}, rd, &result)
if err != nil {
ctx.ServerError("Render", err)

View file

@ -117,6 +117,7 @@ func Profile(ctx *context.Context) {
content, err := markdown.RenderString(&markup.RenderContext{
URLPrefix: ctx.Repo.RepoLink,
Metas: map[string]string{"mode": "document"},
GitRepo: ctx.Repo.GitRepo,
}, ctxUser.Description)
if err != nil {
ctx.ServerError("RenderString", err)