diff --git a/modules/html/html.go b/modules/html/html.go deleted file mode 100644 index b1ebd584c6..0000000000 --- a/modules/html/html.go +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2022 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package html - -// ParseSizeAndClass get size and class from string with default values -// If present, "others" expects the new size first and then the classes to use -func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) { - size := defaultSize - if len(others) >= 1 { - if v, ok := others[0].(int); ok && v != 0 { - size = v - } - } - class := defaultClass - if len(others) >= 2 { - if v, ok := others[1].(string); ok && v != "" { - if class != "" { - class += " " - } - class += v - } - } - return size, class -} diff --git a/modules/htmlutil/html.go b/modules/htmlutil/html.go new file mode 100644 index 0000000000..9b5f5a92d8 --- /dev/null +++ b/modules/htmlutil/html.go @@ -0,0 +1,48 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package htmlutil + +import ( + "fmt" + "html/template" + "slices" +) + +// ParseSizeAndClass get size and class from string with default values +// If present, "others" expects the new size first and then the classes to use +func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) { + size := defaultSize + if len(others) >= 1 { + if v, ok := others[0].(int); ok && v != 0 { + size = v + } + } + class := defaultClass + if len(others) >= 2 { + if v, ok := others[1].(string); ok && v != "" { + if class != "" { + class += " " + } + class += v + } + } + return size, class +} + +func HTMLFormat(s string, rawArgs ...any) template.HTML { + args := slices.Clone(rawArgs) + for i, v := range args { + switch v := v.(type) { + case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML: + // for most basic types (including template.HTML which is safe), just do nothing and use it + case string: + args[i] = template.HTMLEscapeString(v) + case fmt.Stringer: + args[i] = template.HTMLEscapeString(v.String()) + default: + args[i] = template.HTMLEscapeString(fmt.Sprint(v)) + } + } + return template.HTML(fmt.Sprintf(s, args...)) +} diff --git a/modules/htmlutil/html_test.go b/modules/htmlutil/html_test.go new file mode 100644 index 0000000000..5ff05d75b3 --- /dev/null +++ b/modules/htmlutil/html_test.go @@ -0,0 +1,15 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package htmlutil + +import ( + "html/template" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestHTMLFormat(t *testing.T) { + assert.Equal(t, template.HTML("< < 1"), HTMLFormat("%s %s %d", "<", template.HTML("<"), 1)) +} diff --git a/modules/markup/asciicast/asciicast.go b/modules/markup/asciicast/asciicast.go index 0678062340..e92b78a4bc 100644 --- a/modules/markup/asciicast/asciicast.go +++ b/modules/markup/asciicast/asciicast.go @@ -7,7 +7,6 @@ import ( "fmt" "io" "net/url" - "regexp" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" @@ -38,10 +37,7 @@ const ( // SanitizerRules implements markup.Renderer func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { - return []setting.MarkupSanitizerRule{ - {Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(playerClassName)}, - {Element: "div", AllowAttr: playerSrcAttr}, - } + return []setting.MarkupSanitizerRule{{Element: "div", AllowAttr: playerSrcAttr}} } // Render implements markup.Renderer @@ -53,12 +49,5 @@ func (Renderer) Render(ctx *markup.RenderContext, _ io.Reader, output io.Writer) ctx.Metas["BranchNameSubURL"], url.PathEscape(ctx.RelativePath), ) - - _, err := io.WriteString(output, fmt.Sprintf( - `
`, - playerClassName, - playerSrcAttr, - rawURL, - )) - return err + return ctx.RenderInternal.FormatWithSafeAttrs(output, ``, playerClassName, playerSrcAttr, rawURL) } diff --git a/modules/markup/common/html.go b/modules/markup/common/html.go deleted file mode 100644 index 5658839c6f..0000000000 --- a/modules/markup/common/html.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2019 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package common - -import ( - "mvdan.cc/xurls/v2" -) - -// NOTE: All below regex matching do not perform any extra validation. -// Thus a link is produced even if the linked entity does not exist. -// While fast, this is also incorrect and lead to false positives. -// TODO: fix invalid linking issue - -// LinkRegex is a regexp matching a valid link -var LinkRegex, _ = xurls.StrictMatchingScheme("https?://") diff --git a/modules/markup/common/linkify.go b/modules/markup/common/linkify.go index f84680205e..be6ab22b55 100644 --- a/modules/markup/common/linkify.go +++ b/modules/markup/common/linkify.go @@ -9,15 +9,27 @@ package common import ( "bytes" "regexp" + "sync" "github.com/yuin/goldmark" "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/text" "github.com/yuin/goldmark/util" + "mvdan.cc/xurls/v2" ) -var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) +type GlobalVarsType struct { + wwwURLRegxp *regexp.Regexp + LinkRegex *regexp.Regexp // fast matching a URL link, no any extra validation. +} + +var GlobalVars = sync.OnceValue[*GlobalVarsType](func() *GlobalVarsType { + v := &GlobalVarsType{} + v.wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) + v.LinkRegex, _ = xurls.StrictMatchingScheme("https?://") + return v +}) type linkifyParser struct{} @@ -60,10 +72,10 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont var protocol []byte typ := ast.AutoLinkURL if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { - m = LinkRegex.FindSubmatchIndex(line) + m = GlobalVars().LinkRegex.FindSubmatchIndex(line) } if m == nil && bytes.HasPrefix(line, domainWWW) { - m = wwwURLRegxp.FindSubmatchIndex(line) + m = GlobalVars().wwwURLRegxp.FindSubmatchIndex(line) protocol = []byte("http") } if m != nil { diff --git a/modules/markup/console/console.go b/modules/markup/console/console.go index d991527b80..06f3acfa68 100644 --- a/modules/markup/console/console.go +++ b/modules/markup/console/console.go @@ -6,8 +6,7 @@ package console import ( "bytes" "io" - "path/filepath" - "regexp" + "path" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" @@ -36,7 +35,7 @@ func (Renderer) Extensions() []string { // SanitizerRules implements markup.Renderer func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{ - {Element: "span", AllowAttr: "class", Regexp: regexp.MustCompile(`^term-((fg[ix]?|bg)\d+|container)$`)}, + {Element: "span", AllowAttr: "class", Regexp: `^term-((fg[ix]?|bg)\d+|container)$`}, } } @@ -46,7 +45,7 @@ func (Renderer) CanRender(filename string, input io.Reader) bool { if err != nil { return false } - if enry.GetLanguage(filepath.Base(filename), buf) != enry.OtherLanguage { + if enry.GetLanguage(path.Base(filename), buf) != enry.OtherLanguage { return false } return bytes.ContainsRune(buf, '\x1b') diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 3d952b0de4..a3e6bbaac6 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -7,7 +7,6 @@ import ( "bufio" "html" "io" - "regexp" "strconv" "code.gitea.io/gitea/modules/csv" @@ -37,9 +36,9 @@ func (Renderer) Extensions() []string { // SanitizerRules implements markup.Renderer func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{ - {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, - {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, - {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, + {Element: "table", AllowAttr: "class", Regexp: `^data-table$`}, + {Element: "th", AllowAttr: "class", Regexp: `^line-num$`}, + {Element: "td", AllowAttr: "class", Regexp: `^line-num$`}, } } @@ -51,13 +50,13 @@ func writeField(w io.Writer, element, class, field string) error { return err } if len(class) > 0 { - if _, err := io.WriteString(w, " class=\""); err != nil { + if _, err := io.WriteString(w, ` class="`); err != nil { return err } if _, err := io.WriteString(w, class); err != nil { return err } - if _, err := io.WriteString(w, "\""); err != nil { + if _, err := io.WriteString(w, `"`); err != nil { return err } } diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go index 122517ed11..d28dc9fa5d 100644 --- a/modules/markup/external/external.go +++ b/modules/markup/external/external.go @@ -102,7 +102,7 @@ func (p *Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io. _, err = io.Copy(f, input) if err != nil { - f.Close() + _ = f.Close() return fmt.Errorf("%s write data to temp file when rendering %s failed: %w", p.Name(), p.Command, err) } @@ -113,10 +113,9 @@ func (p *Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io. args = append(args, f.Name()) } - if ctx == nil || ctx.Ctx == nil { - if ctx == nil { - log.Warn("RenderContext not provided defaulting to empty ctx") - ctx = &markup.RenderContext{} + if ctx.Ctx == nil { + if !setting.IsProd || setting.IsInTesting { + panic("RenderContext did not provide context") } log.Warn("RenderContext did not provide context, defaulting to Shutdown context") ctx.Ctx = graceful.GetManager().ShutdownContext() diff --git a/modules/markup/html.go b/modules/markup/html.go index 16ccd4b406..e8799c401c 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -25,9 +25,6 @@ const ( IssueNameStyleRegexp = "regexp" ) -// CSS class for action keywords (e.g. "closes: #1") -const keywordClass = "issue-keyword" - type globalVarsType struct { hashCurrentPattern *regexp.Regexp shortLinkPattern *regexp.Regexp @@ -39,6 +36,7 @@ type globalVarsType struct { emojiShortCodeRegex *regexp.Regexp issueFullPattern *regexp.Regexp filesChangedFullPattern *regexp.Regexp + codePreviewPattern *regexp.Regexp tagCleaner *regexp.Regexp nulCleaner *strings.Replacer @@ -88,6 +86,9 @@ var globalVars = sync.OnceValue[*globalVarsType](func() *globalVarsType { // example: https://domain/org/repo/pulls/27/files#hash v.filesChangedFullPattern = regexp.MustCompile(`https?://(?:\S+/)[\w_.-]+/[\w_.-]+/pulls/((?:\w{1,10}-)?[1-9][0-9]*)/files([\?|#](\S+)?)?\b`) + // codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20" + v.codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`) + v.tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`) v.nulCleaner = strings.NewReplacer("\000", "") return v @@ -129,7 +130,7 @@ func CustomLinkURLSchemes(schemes []string) { } withAuth = append(withAuth, s) } - common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) + common.GlobalVars().LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) } type postProcessError struct { @@ -164,11 +165,7 @@ var defaultProcessors = []processor{ // emails with HTML links, parsing shortlinks in the format of [[Link]], like // MediaWiki, linking issues in the format #ID, and mentions in the format // @user, and others. -func PostProcess( - ctx *RenderContext, - input io.Reader, - output io.Writer, -) error { +func PostProcess(ctx *RenderContext, input io.Reader, output io.Writer) error { return postProcess(ctx, defaultProcessors, input, output) } @@ -189,10 +186,7 @@ var commitMessageProcessors = []processor{ // RenderCommitMessage will use the same logic as PostProcess, but will disable // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is // set, which changes every text node into a link to the passed default link. -func RenderCommitMessage( - ctx *RenderContext, - content string, -) (string, error) { +func RenderCommitMessage(ctx *RenderContext, content string) (string, error) { procs := commitMessageProcessors return renderProcessString(ctx, procs, content) } @@ -219,10 +213,7 @@ var emojiProcessors = []processor{ // RenderCommitMessage, but will disable the shortLinkProcessor and // emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set, // which changes every text node into a link to the passed default link. -func RenderCommitMessageSubject( - ctx *RenderContext, - defaultLink, content string, -) (string, error) { +func RenderCommitMessageSubject(ctx *RenderContext, defaultLink, content string) (string, error) { procs := slices.Clone(commitMessageSubjectProcessors) procs = append(procs, func(ctx *RenderContext, node *html.Node) { ch := &html.Node{Parent: node, Type: html.TextNode, Data: node.Data} @@ -236,10 +227,7 @@ func RenderCommitMessageSubject( } // RenderIssueTitle to process title on individual issue/pull page -func RenderIssueTitle( - ctx *RenderContext, - title string, -) (string, error) { +func RenderIssueTitle(ctx *RenderContext, title string) (string, error) { // do not render other issue/commit links in an issue's title - which in most cases is already a link. return renderProcessString(ctx, []processor{ emojiShortCodeProcessor, @@ -257,10 +245,7 @@ func renderProcessString(ctx *RenderContext, procs []processor, content string) // RenderDescriptionHTML will use similar logic as PostProcess, but will // use a single special linkProcessor. -func RenderDescriptionHTML( - ctx *RenderContext, - content string, -) (string, error) { +func RenderDescriptionHTML(ctx *RenderContext, content string) (string, error) { return renderProcessString(ctx, []processor{ descriptionLinkProcessor, emojiShortCodeProcessor, @@ -270,10 +255,7 @@ func RenderDescriptionHTML( // RenderEmoji for when we want to just process emoji and shortcodes // in various places it isn't already run through the normal markdown processor -func RenderEmoji( - ctx *RenderContext, - content string, -) (string, error) { +func RenderEmoji(ctx *RenderContext, content string) (string, error) { return renderProcessString(ctx, emojiProcessors, content) } @@ -333,6 +315,17 @@ func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output return nil } +func isEmojiNode(node *html.Node) bool { + if node.Type == html.ElementNode && node.Data == atom.Span.String() { + for _, attr := range node.Attr { + if (attr.Key == "class" || attr.Key == "data-attr-class") && strings.Contains(attr.Val, "emoji") { + return true + } + } + } + return false +} + func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Node { // Add user-content- to IDs and "#" links if they don't already have them for idx, attr := range node.Attr { @@ -346,47 +339,27 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix { node.Attr[idx].Val = "#user-content-" + val } - - if attr.Key == "class" && attr.Val == "emoji" { - procs = nil - } } switch node.Type { case html.TextNode: - processTextNodes(ctx, procs, node) + for _, proc := range procs { + proc(ctx, node) // it might add siblings + } + case html.ElementNode: - if node.Data == "code" || node.Data == "pre" { - // ignore code and pre nodes + if isEmojiNode(node) { + // TextNode emoji will be converted to ``, then the next iteration will visit the "span" + // if we don't stop it, it will go into the TextNode again and create an infinite recursion return node.NextSibling + } else if node.Data == "code" || node.Data == "pre" { + return node.NextSibling // ignore code and pre nodes } else if node.Data == "img" { return visitNodeImg(ctx, node) } else if node.Data == "video" { return visitNodeVideo(ctx, node) } else if node.Data == "a" { - // Restrict text in links to emojis - procs = emojiProcessors - } else if node.Data == "i" { - for _, attr := range node.Attr { - if attr.Key != "class" { - continue - } - classes := strings.Split(attr.Val, " ") - for i, class := range classes { - if class == "icon" { - classes[0], classes[i] = classes[i], classes[0] - attr.Val = strings.Join(classes, " ") - - // Remove all children of icons - child := node.FirstChild - for child != nil { - node.RemoveChild(child) - child = node.FirstChild - } - break - } - } - } + procs = emojiProcessors // Restrict text in links to emojis } for n := node.FirstChild; n != nil; { n = visitNode(ctx, procs, n) @@ -396,22 +369,17 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod return node.NextSibling } -// processTextNodes runs the passed node through various processors, in order to handle -// all kinds of special links handled by the post-processing. -func processTextNodes(ctx *RenderContext, procs []processor, node *html.Node) { - for _, p := range procs { - p(ctx, node) - } -} - // createKeyword() renders a highlighted version of an action keyword -func createKeyword(content string) *html.Node { +func createKeyword(ctx *RenderContext, content string) *html.Node { + // CSS class for action keywords (e.g. "closes: #1") + const keywordClass = "issue-keyword" + span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{}, } - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass}) + span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", keywordClass)) text := &html.Node{ Type: html.TextNode, @@ -422,7 +390,7 @@ func createKeyword(content string) *html.Node { return span } -func createLink(href, content, class string) *html.Node { +func createLink(ctx *RenderContext, href, content, class string) *html.Node { a := &html.Node{ Type: html.ElementNode, Data: atom.A.String(), @@ -432,7 +400,7 @@ func createLink(href, content, class string) *html.Node { a.Attr = append(a.Attr, html.Attribute{Key: "data-markdown-generated-content"}) } if class != "" { - a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) + a.Attr = append(a.Attr, ctx.RenderInternal.NodeSafeAttr("class", class)) } text := &html.Node{ diff --git a/modules/markup/html_codepreview.go b/modules/markup/html_codepreview.go index 5ab9290b3e..5c88481d76 100644 --- a/modules/markup/html_codepreview.go +++ b/modules/markup/html_codepreview.go @@ -6,7 +6,6 @@ package markup import ( "html/template" "net/url" - "regexp" "strconv" "strings" @@ -16,9 +15,6 @@ import ( "golang.org/x/net/html" ) -// codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20" -var codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`) - type RenderCodePreviewOptions struct { FullURL string OwnerName string @@ -30,7 +26,7 @@ type RenderCodePreviewOptions struct { } func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosStop int, htm template.HTML, err error) { - m := codePreviewPattern.FindStringSubmatchIndex(node.Data) + m := globalVars().codePreviewPattern.FindStringSubmatchIndex(node.Data) if m == nil { return 0, 0, "", nil } @@ -66,8 +62,8 @@ func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) { node = node.NextSibling continue } - urlPosStart, urlPosEnd, h, err := renderCodeBlock(ctx, node) - if err != nil || h == "" { + urlPosStart, urlPosEnd, renderedCodeBlock, err := renderCodeBlock(ctx, node) + if err != nil || renderedCodeBlock == "" { if err != nil { log.Error("Unable to render code preview: %v", err) } @@ -84,7 +80,8 @@ func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) { // then it is resolved as: "{TextBefore}
{TextAfter}
", // so unless it could correctly replace the parent "p/li" node, it is very difficult to eliminate the "TextBefore" empty node. node.Data = textBefore - node.Parent.InsertBefore(&html.Node{Type: html.RawNode, Data: string(h)}, next) + renderedCodeNode := &html.Node{Type: html.RawNode, Data: string(ctx.RenderInternal.ProtectSafeAttrs(renderedCodeBlock))} + node.Parent.InsertBefore(renderedCodeNode, next) if textAfter != "" { node.Parent.InsertBefore(&html.Node{Type: html.TextNode, Data: textAfter}, next) } diff --git a/modules/markup/html_email.go b/modules/markup/html_email.go index 32d0285eb4..cbfae8b829 100644 --- a/modules/markup/html_email.go +++ b/modules/markup/html_email.go @@ -15,7 +15,7 @@ func emailAddressProcessor(ctx *RenderContext, node *html.Node) { } mail := node.Data[m[2]:m[3]] - replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) + replaceContent(node, m[2], m[3], createLink(ctx, "mailto:"+mail, mail, "" /*mailto*/)) node = node.NextSibling.NextSibling } } diff --git a/modules/markup/html_emoji.go b/modules/markup/html_emoji.go index 6eacb2067f..c638065425 100644 --- a/modules/markup/html_emoji.go +++ b/modules/markup/html_emoji.go @@ -13,15 +13,13 @@ import ( "golang.org/x/net/html/atom" ) -func createEmoji(content, class, name string) *html.Node { +func createEmoji(ctx *RenderContext, content, name string) *html.Node { span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{}, } - if class != "" { - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class}) - } + span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", "emoji")) if name != "" { span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name}) } @@ -35,13 +33,13 @@ func createEmoji(content, class, name string) *html.Node { return span } -func createCustomEmoji(alias string) *html.Node { +func createCustomEmoji(ctx *RenderContext, alias string) *html.Node { span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{}, } - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"}) + span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", "emoji")) span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias}) img := &html.Node{ @@ -77,7 +75,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { if converted == nil { // check if this is a custom reaction if _, exist := setting.UI.CustomEmojisMap[alias]; exist { - replaceContent(node, m[0], m[1], createCustomEmoji(alias)) + replaceContent(node, m[0], m[1], createCustomEmoji(ctx, alias)) node = node.NextSibling.NextSibling start = 0 continue @@ -85,7 +83,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { continue } - replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description)) + replaceContent(node, m[0], m[1], createEmoji(ctx, converted.Emoji, converted.Description)) node = node.NextSibling.NextSibling start = 0 } @@ -107,7 +105,7 @@ func emojiProcessor(ctx *RenderContext, node *html.Node) { start = m[1] val := emoji.FromCode(codepoint) if val != nil { - replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description)) + replaceContent(node, m[0], m[1], createEmoji(ctx, codepoint, val.Description)) node = node.NextSibling.NextSibling start = 0 } diff --git a/modules/markup/html_issue.go b/modules/markup/html_issue.go index 2acf154ad2..7341af7eb6 100644 --- a/modules/markup/html_issue.go +++ b/modules/markup/html_issue.go @@ -57,10 +57,10 @@ func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) { matchRepo := linkParts[len(linkParts)-3] if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] { - replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) + replaceContent(node, m[0], m[1], createLink(ctx, link, text, "ref-issue")) } else { text = matchOrg + "/" + matchRepo + text - replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) + replaceContent(node, m[0], m[1], createLink(ctx, link, text, "ref-issue")) } node = node.NextSibling.NextSibling } @@ -129,16 +129,16 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err) } - link = createLink(res, reftext, "ref-issue ref-external-issue") + link = createLink(ctx, res, reftext, "ref-issue ref-external-issue") } else { // Path determines the type of link that will be rendered. It's unknown at this point whether // the linked item is actually a PR or an issue. Luckily it's of no real consequence because // Gitea will redirect on click as appropriate. issuePath := util.Iif(ref.IsPull, "pulls", "issues") if ref.Owner == "" { - link = createLink(util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue") + link = createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue") } else { - link = createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue") + link = createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue") } } @@ -151,7 +151,7 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { // Decorate action keywords if actionable var keyword *html.Node if references.IsXrefActionable(ref, hasExtTrackFormat) { - keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) + keyword = createKeyword(ctx, node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) } else { keyword = &html.Node{ Type: html.TextNode, @@ -177,7 +177,7 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) { } reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha) - link := createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") + link := createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) node = node.NextSibling.NextSibling diff --git a/modules/markup/html_link.go b/modules/markup/html_link.go index b7562d0aa6..32aa7dc614 100644 --- a/modules/markup/html_link.go +++ b/modules/markup/html_link.go @@ -189,13 +189,13 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) { func linkProcessor(ctx *RenderContext, node *html.Node) { next := node.NextSibling for node != nil && node != next { - m := common.LinkRegex.FindStringIndex(node.Data) + m := common.GlobalVars().LinkRegex.FindStringIndex(node.Data) if m == nil { return } uri := node.Data[m[0]:m[1]] - replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) + replaceContent(node, m[0], m[1], createLink(ctx, uri, uri, "" /*link*/)) node = node.NextSibling.NextSibling } } @@ -204,7 +204,7 @@ func linkProcessor(ctx *RenderContext, node *html.Node) { func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) { next := node.NextSibling for node != nil && node != next { - m := common.LinkRegex.FindStringIndex(node.Data) + m := common.GlobalVars().LinkRegex.FindStringIndex(node.Data) if m == nil { return } diff --git a/modules/markup/html_mention.go b/modules/markup/html_mention.go index 3f0692e05f..f7e2ad50f1 100644 --- a/modules/markup/html_mention.go +++ b/modules/markup/html_mention.go @@ -33,7 +33,7 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) { if ok && strings.Contains(mention, "/") { mentionOrgAndTeam := strings.Split(mention, "/") if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { - replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) + replaceContent(node, loc.Start, loc.End, createLink(ctx, util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "" /*mention*/)) node = node.NextSibling.NextSibling start = 0 continue @@ -44,7 +44,7 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) { mentionedUsername := mention[1:] if DefaultProcessorHelper.IsUsernameMentionable != nil && DefaultProcessorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) { - replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "mention")) + replaceContent(node, loc.Start, loc.End, createLink(ctx, util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "" /*mention*/)) node = node.NextSibling.NextSibling start = 0 } else { diff --git a/modules/markup/internal/finalprocessor.go b/modules/markup/internal/finalprocessor.go new file mode 100644 index 0000000000..14d46a161f --- /dev/null +++ b/modules/markup/internal/finalprocessor.go @@ -0,0 +1,30 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "bytes" + "io" +) + +type finalProcessor struct { + renderInternal *RenderInternal + + output io.Writer + buf bytes.Buffer +} + +func (p *finalProcessor) Write(data []byte) (int, error) { + p.buf.Write(data) + return len(data), nil +} + +func (p *finalProcessor) Close() error { + // TODO: reading the whole markdown isn't a problem at the moment, + // because "postProcess" already does so. In the future we could optimize the code to process data on the fly. + buf := p.buf.Bytes() + buf = bytes.ReplaceAll(buf, []byte(` data-attr-class="`+p.renderInternal.secureIDPrefix), []byte(` class="`)) + _, err := p.output.Write(buf) + return err +} diff --git a/modules/markup/internal/internal_test.go b/modules/markup/internal/internal_test.go new file mode 100644 index 0000000000..98ff3bc079 --- /dev/null +++ b/modules/markup/internal/internal_test.go @@ -0,0 +1,61 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "bytes" + "html/template" + "io" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestRenderInternal(t *testing.T) { + cases := []struct { + input, protected, recovered string + }{ + { + input: ``)
- if err != nil {
- return
- }
-
- // include language-x class as part of commonmark spec
- // the "display" class is used by "js/markup/math.js" to render the code element as a block
- _, err = w.WriteString(``)
- if err != nil {
- return
- }
- } else {
- _, err := w.WriteString("
")
- if err != nil {
- return
- }
- }
- }),
- ),
- math.NewExtension(
- math.Enabled(setting.Markdown.EnableMath),
- ),
- meta.Meta,
- ),
- goldmark.WithParserOptions(
- parser.WithAttribute(),
- parser.WithAutoHeadingID(),
- parser.WithASTTransformers(
- util.Prioritized(NewASTTransformer(), 10000),
- ),
- ),
- goldmark.WithRendererOptions(
- html.WithUnsafe(),
- ),
- )
-
- // Override the original Tasklist renderer!
- specMarkdown.Renderer().AddOptions(
- renderer.WithNodeRenderers(
- util.Prioritized(NewHTMLRenderer(), 10),
- ),
- )
- })
- return specMarkdown
+ goldmarkMarkdown goldmark.Markdown
}
-// actualRender renders Markdown to HTML without handling special links.
-func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
- converter := SpecializedMarkdown()
+func (r *GlodmarkRender) Convert(source []byte, writer io.Writer, opts ...parser.ParseOption) error {
+ return r.goldmarkMarkdown.Convert(source, writer, opts...)
+}
+
+func (r *GlodmarkRender) Renderer() renderer.Renderer {
+ return r.goldmarkMarkdown.Renderer()
+}
+
+func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
+ if entering {
+ language, _ := c.Language()
+ if language == nil {
+ language = []byte("text")
+ }
+
+ languageStr := string(language)
+
+ preClasses := []string{"code-block"}
+ if languageStr == "mermaid" || languageStr == "math" {
+ preClasses = append(preClasses, "is-loading")
+ }
+
+ err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, ``, strings.Join(preClasses, " "))
+ if err != nil {
+ return
+ }
+
+ // include language-x class as part of commonmark spec
+ // the "display" class is used by "js/markup/math.js" to render the code element as a block
+ err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, ``, string(language))
+ if err != nil {
+ return
+ }
+ } else {
+ _, err := w.WriteString("
")
+ if err != nil {
+ return
+ }
+ }
+}
+
+// SpecializedMarkdown sets up the Gitea specific markdown extensions
+func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender {
+ // TODO: it could use a pool to cache the renderers to reuse them with different contexts
+ // at the moment it is fast enough (see the benchmarks)
+ r := &GlodmarkRender{ctx: ctx}
+ r.goldmarkMarkdown = goldmark.New(
+ goldmark.WithExtensions(
+ extension.NewTable(extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)),
+ extension.Strikethrough,
+ extension.TaskList,
+ extension.DefinitionList,
+ common.FootnoteExtension,
+ highlighting.NewHighlighting(
+ highlighting.WithFormatOptions(
+ chromahtml.WithClasses(true),
+ chromahtml.PreventSurroundingPre(true),
+ ),
+ highlighting.WithWrapperRenderer(r.highlightingRenderer),
+ ),
+ math.NewExtension(&ctx.RenderInternal, math.Enabled(setting.Markdown.EnableMath)),
+ meta.Meta,
+ ),
+ goldmark.WithParserOptions(
+ parser.WithAttribute(),
+ parser.WithAutoHeadingID(),
+ parser.WithASTTransformers(util.Prioritized(NewASTTransformer(&ctx.RenderInternal), 10000)),
+ ),
+ goldmark.WithRendererOptions(html.WithUnsafe()),
+ )
+
+ // Override the original Tasklist renderer!
+ r.goldmarkMarkdown.Renderer().AddOptions(
+ renderer.WithNodeRenderers(util.Prioritized(NewHTMLRenderer(&ctx.RenderInternal), 10)),
+ )
+
+ return r
+}
+
+// render calls goldmark render to convert Markdown to HTML
+// NOTE: The output of this method MUST get sanitized separately!!!
+func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
+ converter := SpecializedMarkdown(ctx)
lw := &limitWriter{
w: output,
limit: setting.UI.MaxDisplayFileSize * 3,
@@ -160,8 +164,8 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
}
log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
- if log.IsDebug() {
- log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2))
+ if (!setting.IsProd && !setting.IsInTesting) || log.IsDebug() {
+ log.Error("Panic in markdown: %v\n%s", err, log.Stack(2))
}
}()
@@ -200,26 +204,6 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
return nil
}
-// Note: The output of this method must get sanitized.
-func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
- defer func() {
- err := recover()
- if err == nil {
- return
- }
-
- log.Warn("Unable to render markdown due to panic in goldmark - will return raw bytes")
- if log.IsDebug() {
- log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2))
- }
- _, err = io.Copy(output, input)
- if err != nil {
- log.Error("io.Copy failed: %v", err)
- }
- }()
- return actualRender(ctx, input, output)
-}
-
// MarkupName describes markup's name
var MarkupName = "markdown"
diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go
index 780df8727f..e4889a75e5 100644
--- a/modules/markup/markdown/markdown_test.go
+++ b/modules/markup/markdown/markdown_test.go
@@ -1051,3 +1051,17 @@ func TestAttention(t *testing.T) {
// legacy GitHub style
test(`> **warning**`, renderAttention("warning", "octicon-alert")+"\n")
}
+
+func BenchmarkSpecializedMarkdown(b *testing.B) {
+ // 240856 4719 ns/op
+ for i := 0; i < b.N; i++ {
+ markdown.SpecializedMarkdown(&markup.RenderContext{})
+ }
+}
+
+func BenchmarkMarkdownRender(b *testing.B) {
+ // 23202 50840 ns/op
+ for i := 0; i < b.N; i++ {
+ _, _ = markdown.RenderString(&markup.RenderContext{Ctx: context.Background()}, "https://example.com\n- a\n- b\n")
+ }
+}
diff --git a/modules/markup/markdown/math/block_renderer.go b/modules/markup/markdown/math/block_renderer.go
index 84817ef1e4..0d2a966102 100644
--- a/modules/markup/markdown/math/block_renderer.go
+++ b/modules/markup/markdown/math/block_renderer.go
@@ -4,17 +4,21 @@
package math
import (
+ "code.gitea.io/gitea/modules/markup/internal"
+
gast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/util"
)
// BlockRenderer represents a renderer for math Blocks
-type BlockRenderer struct{}
+type BlockRenderer struct {
+ renderInternal *internal.RenderInternal
+}
// NewBlockRenderer creates a new renderer for math Blocks
-func NewBlockRenderer() renderer.NodeRenderer {
- return &BlockRenderer{}
+func NewBlockRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer {
+ return &BlockRenderer{renderInternal: renderInternal}
}
// RegisterFuncs registers the renderer for math Blocks
@@ -33,7 +37,7 @@ func (r *BlockRenderer) writeLines(w util.BufWriter, source []byte, n gast.Node)
func (r *BlockRenderer) renderBlock(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
n := node.(*Block)
if entering {
- _, _ = w.WriteString(``)
+ _ = r.renderInternal.FormatWithSafeAttrs(w, ``)
r.writeLines(w, source, n)
} else {
_, _ = w.WriteString(`
` + "\n")
diff --git a/modules/markup/markdown/math/inline_renderer.go b/modules/markup/markdown/math/inline_renderer.go
index 96848099cc..0cff4f1e74 100644
--- a/modules/markup/markdown/math/inline_renderer.go
+++ b/modules/markup/markdown/math/inline_renderer.go
@@ -6,17 +6,21 @@ package math
import (
"bytes"
+ "code.gitea.io/gitea/modules/markup/internal"
+
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/util"
)
// InlineRenderer is an inline renderer
-type InlineRenderer struct{}
+type InlineRenderer struct {
+ renderInternal *internal.RenderInternal
+}
// NewInlineRenderer returns a new renderer for inline math
-func NewInlineRenderer() renderer.NodeRenderer {
- return &InlineRenderer{}
+func NewInlineRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer {
+ return &InlineRenderer{renderInternal: renderInternal}
}
func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
@@ -25,7 +29,7 @@ func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Nod
if _, ok := n.(*InlineBlock); ok {
extraClass = "display "
}
- _, _ = w.WriteString(``)
+ _ = r.renderInternal.FormatWithSafeAttrs(w, ``, extraClass)
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
segment := c.(*ast.Text).Segment
value := util.EscapeHTML(segment.Value(source))
diff --git a/modules/markup/markdown/math/math.go b/modules/markup/markdown/math/math.go
index 3d9f376bc6..7e8defcd4a 100644
--- a/modules/markup/markdown/math/math.go
+++ b/modules/markup/markdown/math/math.go
@@ -4,6 +4,8 @@
package math
import (
+ "code.gitea.io/gitea/modules/markup/internal"
+
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
@@ -12,6 +14,7 @@ import (
// Extension is a math extension
type Extension struct {
+ renderInternal *internal.RenderInternal
enabled bool
parseDollarInline bool
parseDollarBlock bool
@@ -39,38 +42,10 @@ func Enabled(enable ...bool) Option {
})
}
-// WithInlineDollarParser enables or disables the parsing of $...$
-func WithInlineDollarParser(enable ...bool) Option {
- value := true
- if len(enable) > 0 {
- value = enable[0]
- }
- return extensionFunc(func(e *Extension) {
- e.parseDollarInline = value
- })
-}
-
-// WithBlockDollarParser enables or disables the parsing of $$...$$
-func WithBlockDollarParser(enable ...bool) Option {
- value := true
- if len(enable) > 0 {
- value = enable[0]
- }
- return extensionFunc(func(e *Extension) {
- e.parseDollarBlock = value
- })
-}
-
-// Math represents a math extension with default rendered delimiters
-var Math = &Extension{
- enabled: true,
- parseDollarBlock: true,
- parseDollarInline: true,
-}
-
// NewExtension creates a new math extension with the provided options
-func NewExtension(opts ...Option) *Extension {
+func NewExtension(renderInternal *internal.RenderInternal, opts ...Option) *Extension {
r := &Extension{
+ renderInternal: renderInternal,
enabled: true,
parseDollarBlock: true,
parseDollarInline: true,
@@ -102,7 +77,7 @@ func (e *Extension) Extend(m goldmark.Markdown) {
m.Parser().AddOptions(parser.WithInlineParsers(inlines...))
m.Renderer().AddOptions(renderer.WithNodeRenderers(
- util.Prioritized(NewBlockRenderer(), 501),
- util.Prioritized(NewInlineRenderer(), 502),
+ util.Prioritized(NewBlockRenderer(e.renderInternal), 501),
+ util.Prioritized(NewInlineRenderer(e.renderInternal), 502),
))
}
diff --git a/modules/markup/markdown/meta_test.go b/modules/markup/markdown/meta_test.go
index 6949966328..278c33f1d2 100644
--- a/modules/markup/markdown/meta_test.go
+++ b/modules/markup/markdown/meta_test.go
@@ -11,10 +11,8 @@ import (
"github.com/stretchr/testify/assert"
)
-/*
-IssueTemplate is a legacy to keep the unit tests working.
-Copied from structs.IssueTemplate, the original type has been changed a lot to support yaml template.
-*/
+// IssueTemplate is a legacy to keep the unit tests working.
+// Copied from structs.IssueTemplate, the original type has been changed a lot to support yaml template.
type IssueTemplate struct {
Name string `json:"name" yaml:"name"`
Title string `json:"title" yaml:"title"`
diff --git a/modules/markup/markdown/transform_blockquote.go b/modules/markup/markdown/transform_blockquote.go
index 92dc500e69..2651d44a69 100644
--- a/modules/markup/markdown/transform_blockquote.go
+++ b/modules/markup/markdown/transform_blockquote.go
@@ -32,7 +32,8 @@ func (r *HTMLRenderer) renderAttention(w util.BufWriter, source []byte, node ast
default: // including "note"
octiconName = "info"
}
- _, _ = w.WriteString(string(svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType)))
+ svgHTML := svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType)
+ _, _ = w.WriteString(string(r.renderInternal.ProtectSafeAttrs(svgHTML)))
}
return ast.WalkContinue, nil
}
@@ -128,13 +129,13 @@ func (g *ASTTransformer) transformBlockquote(v *ast.Blockquote, reader text.Read
}
// color the blockquote
- v.SetAttributeString("class", []byte("attention-header attention-"+attentionType))
+ v.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-header attention-"+attentionType)))
// create an emphasis to make it bold
attentionParagraph := ast.NewParagraph()
g.applyElementDir(attentionParagraph)
emphasis := ast.NewEmphasis(2)
- emphasis.SetAttributeString("class", []byte("attention-"+attentionType))
+ emphasis.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-"+attentionType)))
attentionAstString := ast.NewString([]byte(cases.Title(language.English).String(attentionType)))
diff --git a/modules/markup/markdown/transform_codespan.go b/modules/markup/markdown/transform_codespan.go
index ff7d24eec9..bccc43aad2 100644
--- a/modules/markup/markdown/transform_codespan.go
+++ b/modules/markup/markdown/transform_codespan.go
@@ -5,7 +5,6 @@ package markdown
import (
"bytes"
- "fmt"
"strings"
"code.gitea.io/gitea/modules/markup"
@@ -40,7 +39,7 @@ func (r *HTMLRenderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Nod
r.Writer.RawWrite(w, value)
}
case *ColorPreview:
- _, _ = w.WriteString(fmt.Sprintf(``, string(v.Color)))
+ _ = r.renderInternal.FormatWithSafeAttrs(w, ``, string(v.Color))
}
}
return ast.WalkSkipChildren, nil
diff --git a/modules/markup/markdown/transform_list.go b/modules/markup/markdown/transform_list.go
index b982fd4a83..c89ad2f2cf 100644
--- a/modules/markup/markdown/transform_list.go
+++ b/modules/markup/markdown/transform_list.go
@@ -72,7 +72,7 @@ func (g *ASTTransformer) transformList(_ *markup.RenderContext, v *ast.List, rc
}
newChild := NewTaskCheckBoxListItem(listItem)
newChild.IsChecked = taskCheckBox.IsChecked
- newChild.SetAttributeString("class", []byte("task-list-item"))
+ newChild.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("task-list-item")))
segments := newChild.FirstChild().Lines()
if segments.Len() > 0 {
segment := segments.At(0)
diff --git a/modules/markup/render.go b/modules/markup/render.go
index 1977dc73f5..f05cb62626 100644
--- a/modules/markup/render.go
+++ b/modules/markup/render.go
@@ -9,14 +9,15 @@ import (
"io"
"net/url"
"strings"
- "sync"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
+ "code.gitea.io/gitea/modules/markup/internal"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/yuin/goldmark/ast"
+ "golang.org/x/sync/errgroup"
)
type RenderMetaMode string
@@ -65,6 +66,8 @@ type RenderContext struct {
SidebarTocNode ast.Node
RenderMetaAs RenderMetaMode
InStandalonePage bool // used by external render. the router "/org/repo/render/..." will output the rendered content in a standalone page
+
+ RenderInternal internal.RenderInternal
}
// Cancel runs any cleanup functions that have been registered for this Ctx
@@ -156,59 +159,53 @@ sandbox="allow-scripts"
return err
}
-func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error {
- var wg sync.WaitGroup
- var err error
+func pipes() (io.ReadCloser, io.WriteCloser, func()) {
pr, pw := io.Pipe()
- defer func() {
+ return pr, pw, func() {
_ = pr.Close()
_ = pw.Close()
- }()
+ }
+}
- var pr2 io.ReadCloser
- var pw2 io.WriteCloser
+func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error {
+ finalProcessor := ctx.RenderInternal.Init(output)
+ defer finalProcessor.Close()
- var sanitizerDisabled bool
- if r, ok := renderer.(ExternalRenderer); ok {
- sanitizerDisabled = r.SanitizerDisabled()
+ // input -> (pw1=pr1) -> renderer -> (pw2=pr2) -> SanitizeReader -> finalProcessor -> output
+ // no sanitizer: input -> (pw1=pr1) -> renderer -> pw2(finalProcessor) -> output
+ pr1, pw1, close1 := pipes()
+ defer close1()
+
+ eg, _ := errgroup.WithContext(ctx.Ctx)
+ var pw2 io.WriteCloser = util.NopCloser{Writer: finalProcessor}
+
+ if r, ok := renderer.(ExternalRenderer); !ok || !r.SanitizerDisabled() {
+ var pr2 io.ReadCloser
+ var close2 func()
+ pr2, pw2, close2 = pipes()
+ defer close2()
+ eg.Go(func() error {
+ defer pr2.Close()
+ return SanitizeReader(pr2, renderer.Name(), finalProcessor)
+ })
}
- if !sanitizerDisabled {
- pr2, pw2 = io.Pipe()
- defer func() {
- _ = pr2.Close()
- _ = pw2.Close()
- }()
-
- wg.Add(1)
- go func() {
- err = SanitizeReader(pr2, renderer.Name(), output)
- _ = pr2.Close()
- wg.Done()
- }()
- } else {
- pw2 = util.NopCloser{Writer: output}
- }
-
- wg.Add(1)
- go func() {
+ eg.Go(func() (err error) {
if r, ok := renderer.(PostProcessRenderer); ok && r.NeedPostProcess() {
- err = PostProcess(ctx, pr, pw2)
+ err = PostProcess(ctx, pr1, pw2)
} else {
- _, err = io.Copy(pw2, pr)
+ _, err = io.Copy(pw2, pr1)
}
- _ = pr.Close()
- _ = pw2.Close()
- wg.Done()
- }()
+ _, _ = pr1.Close(), pw2.Close()
+ return err
+ })
- if err1 := renderer.Render(ctx, input, pw); err1 != nil {
- return err1
+ if err := renderer.Render(ctx, input, pw1); err != nil {
+ return err
}
- _ = pw.Close()
+ _ = pw1.Close()
- wg.Wait()
- return err
+ return eg.Wait()
}
// Init initializes the render global variables
diff --git a/modules/markup/sanitizer_custom.go b/modules/markup/sanitizer_custom.go
index 7978973166..7f96556fd7 100644
--- a/modules/markup/sanitizer_custom.go
+++ b/modules/markup/sanitizer_custom.go
@@ -4,6 +4,9 @@
package markup
import (
+ "regexp"
+ "strings"
+
"code.gitea.io/gitea/modules/setting"
"github.com/microcosm-cc/bluemonday"
@@ -15,8 +18,11 @@ func (st *Sanitizer) addSanitizerRules(policy *bluemonday.Policy, rules []settin
policy.AllowDataURIImages()
}
if rule.Element != "" {
- if rule.Regexp != nil {
- policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
+ if rule.Regexp != "" {
+ if !strings.HasPrefix(rule.Regexp, "^") || !strings.HasSuffix(rule.Regexp, "$") {
+ panic("Markup sanitizer rule regexp must start with ^ and end with $ to be strict")
+ }
+ policy.AllowAttrs(rule.AllowAttr).Matching(regexp.MustCompile(rule.Regexp)).OnElements(rule.Element)
} else {
policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
}
diff --git a/modules/markup/sanitizer_default.go b/modules/markup/sanitizer_default.go
index 476ae5e26f..0fa54efd45 100644
--- a/modules/markup/sanitizer_default.go
+++ b/modules/markup/sanitizer_default.go
@@ -16,37 +16,12 @@ import (
func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
policy := bluemonday.UGCPolicy()
- // For JS code copy and Mermaid loading state
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre")
+ // NOTICE: DO NOT add special "class" regexp rules here anymore, use RenderInternal.SafeAttr instead
- // For code preview
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-preview-[-\w]+( file-content)?$`)).Globally()
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-num$`)).OnElements("td")
- policy.AllowAttrs("data-line-number").OnElements("span")
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-code chroma$`)).OnElements("td")
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-inner$`)).OnElements("div")
-
- // For code preview (unicode escape)
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^file-view( unicode-escaped)?$`)).OnElements("table")
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-escape$`)).OnElements("td")
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^toggle-escape-button btn interact-bg$`)).OnElements("a") // don't use button, button might submit a form
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(ambiguous-code-point|escaped-code-point|broken-code-point)$`)).OnElements("span")
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^char$`)).OnElements("span")
- policy.AllowAttrs("data-tooltip-content", "data-escaped").OnElements("span")
-
- // For color preview
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^color-preview$`)).OnElements("span")
-
- // For attention
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-header attention-\w+$`)).OnElements("blockquote")
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-\w+$`)).OnElements("strong")
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-icon attention-\w+ svg octicon-[\w-]+$`)).OnElements("svg")
- policy.AllowAttrs("viewBox", "width", "height", "aria-hidden").OnElements("svg")
+ // General safe SVG attributes
+ policy.AllowAttrs("viewBox", "width", "height", "aria-hidden", "data-attr-class").OnElements("svg")
policy.AllowAttrs("fill-rule", "d").OnElements("path")
- // For Chroma markdown plugin
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+( display)?( is-loading)?$`)).OnElements("code")
-
// Checkboxes
policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
@@ -66,28 +41,15 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
policy.AllowURLSchemeWithCustomPolicy("data", disallowScheme)
}
- // Allow classes for anchors
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a")
-
- // Allow classes for task lists
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
-
// Allow classes for org mode list item status.
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(unchecked|checked|indeterminate)$`)).OnElements("li")
- // Allow icons
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
-
- // Allow classes for emojis
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
-
- // Allow icons, emojis, chroma syntax and keyword markup on span
- policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
-
// Allow 'color' and 'background-color' properties for the style attribute on text elements.
policy.AllowStyles("color", "background-color").OnElements("span", "p")
- // Allow generally safe attributes
+ policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
+
+ // Allow generally safe attributes (reference: https://github.com/jch/html-pipeline)
generalSafeAttrs := []string{
"abbr", "accept", "accept-charset",
"accesskey", "action", "align", "alt",
@@ -106,10 +68,9 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
"selected", "shape", "size", "span",
"start", "summary", "tabindex", "target",
"title", "type", "usemap", "valign", "value",
- "vspace", "width", "itemprop",
- "data-markdown-generated-content",
+ "vspace", "width", "itemprop", "itemscope", "itemtype",
+ "data-markdown-generated-content", "data-attr-class",
}
-
generalSafeElements := []string{
"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
"div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
@@ -117,14 +78,8 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
"details", "caption", "figure", "figcaption",
"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
}
-
- policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
-
- policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
-
- policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
-
// FIXME: Need to handle longdesc in img but there is no easy way to do it
+ policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
// Custom keyword markup
defaultSanitizer.addSanitizerRules(policy, setting.ExternalSanitizerRules)
diff --git a/modules/markup/sanitizer_default_test.go b/modules/markup/sanitizer_default_test.go
index 20370509c1..c5c43695ea 100644
--- a/modules/markup/sanitizer_default_test.go
+++ b/modules/markup/sanitizer_default_test.go
@@ -19,7 +19,6 @@ func TestSanitizer(t *testing.T) {
// Code highlighting class
`
`, `
`,
`
`, `
`,
- `
`, `
`,
// Input checkbox
``, ``,
@@ -38,10 +37,8 @@ func TestSanitizer(t *testing.T) {
// tags
`Ctrl + C`, `Ctrl + C`,
`NAUGHTY`, `NAUGHTY`,
- ``, ``,
`unchecked`, `unchecked`,
`NAUGHTY`, `NAUGHTY`,
- `contents`, `contents`,
// Color property
`Hello World`, `Hello World`,
diff --git a/modules/setting/markup.go b/modules/setting/markup.go
index 6c2246342b..dfce8afa77 100644
--- a/modules/setting/markup.go
+++ b/modules/setting/markup.go
@@ -54,7 +54,7 @@ type MarkupRenderer struct {
type MarkupSanitizerRule struct {
Element string
AllowAttr string
- Regexp *regexp.Regexp
+ Regexp string
AllowDataURIImages bool
}
@@ -117,15 +117,24 @@ func createMarkupSanitizerRule(name string, sec ConfigSection) (MarkupSanitizerR
regexpStr := sec.Key("REGEXP").Value()
if regexpStr != "" {
- // Validate when parsing the config that this is a valid regular
- // expression. Then we can use regexp.MustCompile(...) later.
- compiled, err := regexp.Compile(regexpStr)
+ hasPrefix := strings.HasPrefix(regexpStr, "^")
+ hasSuffix := strings.HasSuffix(regexpStr, "$")
+ if !hasPrefix || !hasSuffix {
+ log.Error("In markup.%s: REGEXP must start with ^ and end with $ to be strict", name)
+ // to avoid breaking existing user configurations and satisfy the strict requirement in addSanitizerRules
+ if !hasPrefix {
+ regexpStr = "^.*" + regexpStr
+ }
+ if !hasSuffix {
+ regexpStr += ".*$"
+ }
+ }
+ _, err := regexp.Compile(regexpStr)
if err != nil {
log.Error("In markup.%s: REGEXP (%s) failed to compile: %v", name, regexpStr, err)
return rule, false
}
-
- rule.Regexp = compiled
+ rule.Regexp = regexpStr
}
ok = true
diff --git a/modules/svg/svg.go b/modules/svg/svg.go
index 8132978cac..fded9d0873 100644
--- a/modules/svg/svg.go
+++ b/modules/svg/svg.go
@@ -9,7 +9,7 @@ import (
"path"
"strings"
- gitea_html "code.gitea.io/gitea/modules/html"
+ gitea_html "code.gitea.io/gitea/modules/htmlutil"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/public"
)
diff --git a/modules/templates/helper.go b/modules/templates/helper.go
index 3ef11772dc..d5b32358da 100644
--- a/modules/templates/helper.go
+++ b/modules/templates/helper.go
@@ -10,12 +10,12 @@ import (
"html/template"
"net/url"
"reflect"
- "slices"
"strings"
"time"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/base"
+ "code.gitea.io/gitea/modules/htmlutil"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/svg"
@@ -39,7 +39,7 @@ func NewFuncMap() template.FuncMap {
"Iif": iif,
"Eval": evalTokens,
"SafeHTML": safeHTML,
- "HTMLFormat": HTMLFormat,
+ "HTMLFormat": htmlutil.HTMLFormat,
"HTMLEscape": htmlEscape,
"QueryEscape": queryEscape,
"JSEscape": jsEscapeSafe,
@@ -184,23 +184,6 @@ func NewFuncMap() template.FuncMap {
}
}
-func HTMLFormat(s string, rawArgs ...any) template.HTML {
- args := slices.Clone(rawArgs)
- for i, v := range args {
- switch v := v.(type) {
- case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML:
- // for most basic types (including template.HTML which is safe), just do nothing and use it
- case string:
- args[i] = template.HTMLEscapeString(v)
- case fmt.Stringer:
- args[i] = template.HTMLEscapeString(v.String())
- default:
- args[i] = template.HTMLEscapeString(fmt.Sprint(v))
- }
- }
- return template.HTML(fmt.Sprintf(s, args...))
-}
-
// safeHTML render raw as HTML
func safeHTML(s any) template.HTML {
switch v := s.(type) {
diff --git a/modules/templates/helper_test.go b/modules/templates/helper_test.go
index b9fabb7016..3e17e86c66 100644
--- a/modules/templates/helper_test.go
+++ b/modules/templates/helper_test.go
@@ -61,10 +61,6 @@ func TestJSEscapeSafe(t *testing.T) {
assert.EqualValues(t, `\u0026\u003C\u003E\'\"`, jsEscapeSafe(`&<>'"`))
}
-func TestHTMLFormat(t *testing.T) {
- assert.Equal(t, template.HTML("< < 1"), HTMLFormat("%s %s %d", "<", template.HTML("<"), 1))
-}
-
func TestSanitizeHTML(t *testing.T) {
assert.Equal(t, template.HTML(`link xss inline`), SanitizeHTML(`link xss inline`))
}
diff --git a/modules/templates/util_avatar.go b/modules/templates/util_avatar.go
index afc1091516..f7dd408ee2 100644
--- a/modules/templates/util_avatar.go
+++ b/modules/templates/util_avatar.go
@@ -14,7 +14,7 @@ import (
"code.gitea.io/gitea/models/organization"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
- gitea_html "code.gitea.io/gitea/modules/html"
+ gitea_html "code.gitea.io/gitea/modules/htmlutil"
"code.gitea.io/gitea/modules/setting"
)
diff --git a/modules/templates/util_render.go b/modules/templates/util_render.go
index 8e443446bd..5776eefced 100644
--- a/modules/templates/util_render.go
+++ b/modules/templates/util_render.go
@@ -16,6 +16,7 @@ import (
issues_model "code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/modules/emoji"
+ "code.gitea.io/gitea/modules/htmlutil"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/markup/markdown"
@@ -140,7 +141,7 @@ func (ut *RenderUtils) RenderLabel(label *issues_model.Label) template.HTML {
if labelScope == "" {
// Regular label
- return HTMLFormat(`%s`,
+ return htmlutil.HTMLFormat(`%s`,
extraCSSClasses, textColor, label.Color, descriptionText, ut.RenderEmoji(label.Name))
}
@@ -174,7 +175,7 @@ func (ut *RenderUtils) RenderLabel(label *issues_model.Label) template.HTML {
itemColor := "#" + hex.EncodeToString(itemBytes)
scopeColor := "#" + hex.EncodeToString(scopeBytes)
- return HTMLFormat(``+
+ return htmlutil.HTMLFormat(``+
`%s`+
`%s`+
``,
diff --git a/modules/templates/util_render_test.go b/modules/templates/util_render_test.go
index 529507e7ea..cf6d839cbf 100644
--- a/modules/templates/util_render_test.go
+++ b/modules/templates/util_render_test.go
@@ -113,34 +113,34 @@ func TestRenderCommitBody(t *testing.T) {
}
expected := `/just/a/path.bin
-https://example.com/file.bin
+https://example.com/file.bin
[local link](file.bin)
-[remote link](https://example.com)
+[remote link](https://example.com)
[[local link|file.bin]]
-[[remote link|https://example.com]]
+[[remote link|https://example.com]]
![local image](image.jpg)
-![remote image](https://example.com/image.jpg)
+![remote image](https://example.com/image.jpg)
[[local image|image.jpg]]
-[[remote link|https://example.com/image.jpg]]
+[[remote link|https://example.com/image.jpg]]
88fc37a3c0...12fc37a3c0 (hash)
com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb...12fc37a3c0a4dda553bdcfc80c178a58247f42fb pare
88fc37a3c0
com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb mit
👍
-mail@domain.com
-@mention-user test
+mail@domain.com
+@mention-user test
#123
space`
assert.EqualValues(t, expected, string(newTestRenderUtils().RenderCommitBody(testInput(), testMetas)))
}
func TestRenderCommitMessage(t *testing.T) {
- expected := `space @mention-user `
+ expected := `space @mention-user `
assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessage(testInput(), testMetas))
}
func TestRenderCommitMessageLinkSubject(t *testing.T) {
- expected := `space @mention-user`
+ expected := `space @mention-user`
assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessageLinkSubject(testInput(), "https://example.com/link", testMetas))
}
diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go
index 13f6b69493..2732a67e71 100644
--- a/routers/web/repo/wiki.go
+++ b/routers/web/repo/wiki.go
@@ -326,7 +326,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
if rctx.SidebarTocNode != nil {
sb := &strings.Builder{}
- err = markdown.SpecializedMarkdown().Renderer().Render(sb, nil, rctx.SidebarTocNode)
+ err = markdown.SpecializedMarkdown(rctx).Renderer().Render(sb, nil, rctx.SidebarTocNode)
if err != nil {
log.Error("Failed to render wiki sidebar TOC: %v", err)
} else {