mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2024-12-23 01:14:00 +01:00
Fix several render issues (#14986)
* Fix an issue with panics related to attributes * Wrap goldmark render in a recovery function * Reduce memory use in render emoji * Use a pipe for rendering goldmark - still needs more work and a limiter Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: Lauris BH <lauris@nix.lv>
This commit is contained in:
parent
044cd4d016
commit
ed31ddc29a
6 changed files with 211 additions and 61 deletions
|
@ -30,6 +30,9 @@ var (
|
||||||
// aliasMap provides a map of the alias to its emoji data.
|
// aliasMap provides a map of the alias to its emoji data.
|
||||||
aliasMap map[string]int
|
aliasMap map[string]int
|
||||||
|
|
||||||
|
// emptyReplacer is the string replacer for emoji codes.
|
||||||
|
emptyReplacer *strings.Replacer
|
||||||
|
|
||||||
// codeReplacer is the string replacer for emoji codes.
|
// codeReplacer is the string replacer for emoji codes.
|
||||||
codeReplacer *strings.Replacer
|
codeReplacer *strings.Replacer
|
||||||
|
|
||||||
|
@ -49,6 +52,7 @@ func loadMap() {
|
||||||
|
|
||||||
// process emoji codes and aliases
|
// process emoji codes and aliases
|
||||||
codePairs := make([]string, 0)
|
codePairs := make([]string, 0)
|
||||||
|
emptyPairs := make([]string, 0)
|
||||||
aliasPairs := make([]string, 0)
|
aliasPairs := make([]string, 0)
|
||||||
|
|
||||||
// sort from largest to small so we match combined emoji first
|
// sort from largest to small so we match combined emoji first
|
||||||
|
@ -64,6 +68,7 @@ func loadMap() {
|
||||||
// setup codes
|
// setup codes
|
||||||
codeMap[e.Emoji] = i
|
codeMap[e.Emoji] = i
|
||||||
codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
|
codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
|
||||||
|
emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
|
||||||
|
|
||||||
// setup aliases
|
// setup aliases
|
||||||
for _, a := range e.Aliases {
|
for _, a := range e.Aliases {
|
||||||
|
@ -77,6 +82,7 @@ func loadMap() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// create replacers
|
// create replacers
|
||||||
|
emptyReplacer = strings.NewReplacer(emptyPairs...)
|
||||||
codeReplacer = strings.NewReplacer(codePairs...)
|
codeReplacer = strings.NewReplacer(codePairs...)
|
||||||
aliasReplacer = strings.NewReplacer(aliasPairs...)
|
aliasReplacer = strings.NewReplacer(aliasPairs...)
|
||||||
})
|
})
|
||||||
|
@ -127,38 +133,53 @@ func ReplaceAliases(s string) string {
|
||||||
return aliasReplacer.Replace(s)
|
return aliasReplacer.Replace(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type rememberSecondWriteWriter struct {
|
||||||
|
pos int
|
||||||
|
idx int
|
||||||
|
end int
|
||||||
|
writecount int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
|
||||||
|
n.writecount++
|
||||||
|
if n.writecount == 2 {
|
||||||
|
n.idx = n.pos
|
||||||
|
n.end = n.pos + len(p)
|
||||||
|
}
|
||||||
|
n.pos += len(p)
|
||||||
|
return len(p), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
|
||||||
|
n.writecount++
|
||||||
|
if n.writecount == 2 {
|
||||||
|
n.idx = n.pos
|
||||||
|
n.end = n.pos + len(s)
|
||||||
|
}
|
||||||
|
n.pos += len(s)
|
||||||
|
return len(s), nil
|
||||||
|
}
|
||||||
|
|
||||||
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
|
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
|
||||||
func FindEmojiSubmatchIndex(s string) []int {
|
func FindEmojiSubmatchIndex(s string) []int {
|
||||||
loadMap()
|
loadMap()
|
||||||
found := make(map[int]int)
|
secondWriteWriter := rememberSecondWriteWriter{}
|
||||||
keys := make([]int, 0)
|
|
||||||
|
|
||||||
//see if there are any emoji in string before looking for position of specific ones
|
// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
|
||||||
//no performance difference when there is a match but 10x faster when there are not
|
// we can be lazy here.
|
||||||
if s == ReplaceCodes(s) {
|
//
|
||||||
|
// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
|
||||||
|
// submatch is simply the second thing that is written to WriteString in the writer.
|
||||||
|
//
|
||||||
|
// Therefore we can simply take the index of the second write as our first emoji
|
||||||
|
//
|
||||||
|
// FIXME: just copy the trie implementation from strings.NewReplacer
|
||||||
|
_, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
|
||||||
|
|
||||||
|
// if we wrote less than twice then we never "replaced"
|
||||||
|
if secondWriteWriter.writecount < 2 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// get index of first emoji occurrence while also checking for longest combination
|
return []int{secondWriteWriter.idx, secondWriteWriter.end}
|
||||||
for j := range GemojiData {
|
|
||||||
i := strings.Index(s, GemojiData[j].Emoji)
|
|
||||||
if i != -1 {
|
|
||||||
if _, ok := found[i]; !ok {
|
|
||||||
if len(keys) == 0 || i < keys[0] {
|
|
||||||
found[i] = j
|
|
||||||
keys = []int{i}
|
|
||||||
}
|
|
||||||
if i == 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(keys) > 0 {
|
|
||||||
index := keys[0]
|
|
||||||
return []int{index, index + len(GemojiData[found[index]].Emoji)}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,8 @@ package emoji
|
||||||
import (
|
import (
|
||||||
"reflect"
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestDumpInfo(t *testing.T) {
|
func TestDumpInfo(t *testing.T) {
|
||||||
|
@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFindEmojiSubmatchIndex(t *testing.T) {
|
||||||
|
type testcase struct {
|
||||||
|
teststring string
|
||||||
|
expected []int
|
||||||
|
}
|
||||||
|
|
||||||
|
testcases := []testcase{
|
||||||
|
{
|
||||||
|
"\U0001f44d",
|
||||||
|
[]int{0, len("\U0001f44d")},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"\U0001f44d +1 \U0001f44d \U0001f37a",
|
||||||
|
[]int{0, 4},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
" \U0001f44d",
|
||||||
|
[]int{1, 1 + len("\U0001f44d")},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
string([]byte{'\u0001'}) + "\U0001f44d",
|
||||||
|
[]int{1, 1 + len("\U0001f44d")},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, kase := range testcases {
|
||||||
|
actual := FindEmojiSubmatchIndex(kase.teststring)
|
||||||
|
assert.Equal(t, kase.expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -313,41 +313,27 @@ func RenderEmoji(
|
||||||
return ctx.postProcess(rawHTML)
|
return ctx.postProcess(rawHTML)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL][ />]))`)
|
||||||
|
var nulCleaner = strings.NewReplacer("\000", "")
|
||||||
|
|
||||||
func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
|
func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
|
||||||
if ctx.procs == nil {
|
if ctx.procs == nil {
|
||||||
ctx.procs = defaultProcessors
|
ctx.procs = defaultProcessors
|
||||||
}
|
}
|
||||||
|
|
||||||
// give a generous extra 50 bytes
|
// give a generous extra 50 bytes
|
||||||
res := make([]byte, 0, len(rawHTML)+50)
|
res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50))
|
||||||
|
|
||||||
// prepend "<html><body>"
|
// prepend "<html><body>"
|
||||||
res = append(res, "<html><body>"...)
|
_, _ = res.WriteString("<html><body>")
|
||||||
|
|
||||||
// Strip out nuls - they're always invalid
|
// Strip out nuls - they're always invalid
|
||||||
start := bytes.IndexByte(rawHTML, '\000')
|
_, _ = nulCleaner.WriteString(res, string(tagCleaner.ReplaceAll(rawHTML, []byte("<$1"))))
|
||||||
if start >= 0 {
|
|
||||||
res = append(res, rawHTML[:start]...)
|
|
||||||
start++
|
|
||||||
for start < len(rawHTML) {
|
|
||||||
end := bytes.IndexByte(rawHTML[start:], '\000')
|
|
||||||
if end < 0 {
|
|
||||||
res = append(res, rawHTML[start:]...)
|
|
||||||
break
|
|
||||||
} else if end > 0 {
|
|
||||||
res = append(res, rawHTML[start:start+end]...)
|
|
||||||
}
|
|
||||||
start += end + 1
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
res = append(res, rawHTML...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// close the tags
|
// close the tags
|
||||||
res = append(res, "</body></html>"...)
|
_, _ = res.WriteString("</body></html>")
|
||||||
|
|
||||||
// parse the HTML
|
// parse the HTML
|
||||||
nodes, err := html.ParseFragment(bytes.NewReader(res), nil)
|
nodes, err := html.ParseFragment(res, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, &postProcessError{"invalid HTML", err}
|
return nil, &postProcessError{"invalid HTML", err}
|
||||||
}
|
}
|
||||||
|
@ -384,17 +370,17 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
|
||||||
// Create buffer in which the data will be placed again. We know that the
|
// Create buffer in which the data will be placed again. We know that the
|
||||||
// length will be at least that of res; to spare a few alloc+copy, we
|
// length will be at least that of res; to spare a few alloc+copy, we
|
||||||
// reuse res, resetting its length to 0.
|
// reuse res, resetting its length to 0.
|
||||||
buf := bytes.NewBuffer(res[:0])
|
res.Reset()
|
||||||
// Render everything to buf.
|
// Render everything to buf.
|
||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
err = html.Render(buf, node)
|
err = html.Render(res, node)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, &postProcessError{"error rendering processed HTML", err}
|
return nil, &postProcessError{"error rendering processed HTML", err}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Everything done successfully, return parsed data.
|
// Everything done successfully, return parsed data.
|
||||||
return buf.Bytes(), nil
|
return res.Bytes(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) {
|
func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) {
|
||||||
|
|
|
@ -77,6 +77,12 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
|
||||||
header.ID = util.BytesToReadOnlyString(id.([]byte))
|
header.ID = util.BytesToReadOnlyString(id.([]byte))
|
||||||
}
|
}
|
||||||
toc = append(toc, header)
|
toc = append(toc, header)
|
||||||
|
} else {
|
||||||
|
for _, attr := range v.Attributes() {
|
||||||
|
if _, ok := attr.Value.([]byte); !ok {
|
||||||
|
v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
case *ast.Image:
|
case *ast.Image:
|
||||||
// Images need two things:
|
// Images need two things:
|
||||||
|
|
|
@ -6,7 +6,8 @@
|
||||||
package markdown
|
package markdown
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"fmt"
|
||||||
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
@ -18,7 +19,7 @@ import (
|
||||||
|
|
||||||
chromahtml "github.com/alecthomas/chroma/formatters/html"
|
chromahtml "github.com/alecthomas/chroma/formatters/html"
|
||||||
"github.com/yuin/goldmark"
|
"github.com/yuin/goldmark"
|
||||||
"github.com/yuin/goldmark-highlighting"
|
highlighting "github.com/yuin/goldmark-highlighting"
|
||||||
meta "github.com/yuin/goldmark-meta"
|
meta "github.com/yuin/goldmark-meta"
|
||||||
"github.com/yuin/goldmark/extension"
|
"github.com/yuin/goldmark/extension"
|
||||||
"github.com/yuin/goldmark/parser"
|
"github.com/yuin/goldmark/parser"
|
||||||
|
@ -34,6 +35,44 @@ var urlPrefixKey = parser.NewContextKey()
|
||||||
var isWikiKey = parser.NewContextKey()
|
var isWikiKey = parser.NewContextKey()
|
||||||
var renderMetasKey = parser.NewContextKey()
|
var renderMetasKey = parser.NewContextKey()
|
||||||
|
|
||||||
|
type closesWithError interface {
|
||||||
|
io.WriteCloser
|
||||||
|
CloseWithError(err error) error
|
||||||
|
}
|
||||||
|
|
||||||
|
type limitWriter struct {
|
||||||
|
w closesWithError
|
||||||
|
sum int64
|
||||||
|
limit int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write implements the standard Write interface:
|
||||||
|
func (l *limitWriter) Write(data []byte) (int, error) {
|
||||||
|
leftToWrite := l.limit - l.sum
|
||||||
|
if leftToWrite < int64(len(data)) {
|
||||||
|
n, err := l.w.Write(data[:leftToWrite])
|
||||||
|
l.sum += int64(n)
|
||||||
|
if err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
_ = l.w.Close()
|
||||||
|
return n, fmt.Errorf("Rendered content too large - truncating render")
|
||||||
|
}
|
||||||
|
n, err := l.w.Write(data)
|
||||||
|
l.sum += int64(n)
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close closes the writer
|
||||||
|
func (l *limitWriter) Close() error {
|
||||||
|
return l.w.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// CloseWithError closes the writer
|
||||||
|
func (l *limitWriter) CloseWithError(err error) error {
|
||||||
|
return l.w.CloseWithError(err)
|
||||||
|
}
|
||||||
|
|
||||||
// NewGiteaParseContext creates a parser.Context with the gitea context set
|
// NewGiteaParseContext creates a parser.Context with the gitea context set
|
||||||
func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool) parser.Context {
|
func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool) parser.Context {
|
||||||
pc := parser.NewContext(parser.WithIDs(newPrefixedIDs()))
|
pc := parser.NewContext(parser.WithIDs(newPrefixedIDs()))
|
||||||
|
@ -43,8 +82,8 @@ func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool
|
||||||
return pc
|
return pc
|
||||||
}
|
}
|
||||||
|
|
||||||
// render renders Markdown to HTML without handling special links.
|
// actualRender renders Markdown to HTML without handling special links.
|
||||||
func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
|
func actualRender(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
|
||||||
once.Do(func() {
|
once.Do(func() {
|
||||||
converter = goldmark.New(
|
converter = goldmark.New(
|
||||||
goldmark.WithExtensions(extension.Table,
|
goldmark.WithExtensions(extension.Table,
|
||||||
|
@ -119,12 +158,57 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|
||||||
pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
|
rd, wr := io.Pipe()
|
||||||
var buf bytes.Buffer
|
defer func() {
|
||||||
if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil {
|
_ = rd.Close()
|
||||||
log.Error("Unable to render: %v", err)
|
_ = wr.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
lw := &limitWriter{
|
||||||
|
w: wr,
|
||||||
|
limit: setting.UI.MaxDisplayFileSize * 3,
|
||||||
}
|
}
|
||||||
return markup.SanitizeReader(&buf).Bytes()
|
|
||||||
|
// FIXME: should we include a timeout that closes the pipe to abort the parser and sanitizer if it takes too long?
|
||||||
|
go func() {
|
||||||
|
defer func() {
|
||||||
|
err := recover()
|
||||||
|
if err == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
|
||||||
|
if log.IsDebug() {
|
||||||
|
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
|
||||||
|
}
|
||||||
|
_ = lw.CloseWithError(fmt.Errorf("%v", err))
|
||||||
|
}()
|
||||||
|
|
||||||
|
pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
|
||||||
|
if err := converter.Convert(giteautil.NormalizeEOL(body), lw, parser.WithContext(pc)); err != nil {
|
||||||
|
log.Error("Unable to render: %v", err)
|
||||||
|
_ = lw.CloseWithError(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ = lw.Close()
|
||||||
|
}()
|
||||||
|
return markup.SanitizeReader(rd).Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) (ret []byte) {
|
||||||
|
defer func() {
|
||||||
|
err := recover()
|
||||||
|
if err == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Warn("Unable to render markdown due to panic in goldmark - will return sanitized raw bytes")
|
||||||
|
if log.IsDebug() {
|
||||||
|
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
|
||||||
|
}
|
||||||
|
ret = markup.SanitizeBytes(body)
|
||||||
|
}()
|
||||||
|
return actualRender(body, urlPrefix, metas, wikiMarkdown)
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
|
@ -309,6 +309,25 @@ func TestRender_RenderParagraphs(t *testing.T) {
|
||||||
test(t, "A\n\n\nB\nC\n", 2)
|
test(t, "A\n\n\nB\nC\n", 2)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMarkdownRenderRaw(t *testing.T) {
|
||||||
|
testcases := [][]byte{
|
||||||
|
{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6267570554535936
|
||||||
|
0x2a, 0x20, 0x2d, 0x0a, 0x09, 0x20, 0x60, 0x5b, 0x0a, 0x09, 0x20, 0x60,
|
||||||
|
0x5b,
|
||||||
|
},
|
||||||
|
{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6278827345051648
|
||||||
|
0x2d, 0x20, 0x2d, 0x0d, 0x09, 0x60, 0x0d, 0x09, 0x60,
|
||||||
|
},
|
||||||
|
{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6016973788020736[] = {
|
||||||
|
0x7b, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x35, 0x7d, 0x0a, 0x3d,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testcase := range testcases {
|
||||||
|
_ = RenderRaw(testcase, "", false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRenderSiblingImages_Issue12925(t *testing.T) {
|
func TestRenderSiblingImages_Issue12925(t *testing.T) {
|
||||||
testcase := `![image1](/image1)
|
testcase := `![image1](/image1)
|
||||||
![image2](/image2)
|
![image2](/image2)
|
||||||
|
@ -318,4 +337,5 @@ func TestRenderSiblingImages_Issue12925(t *testing.T) {
|
||||||
`
|
`
|
||||||
res := string(RenderRaw([]byte(testcase), "", false))
|
res := string(RenderRaw([]byte(testcase), "", false))
|
||||||
assert.Equal(t, expected, res)
|
assert.Equal(t, expected, res)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue