From 21cde5c439676d4aaa15dfc79505f364cc849ec0 Mon Sep 17 00:00:00 2001
From: KN4CK3R <admin@oldschoolhack.me>
Date: Mon, 7 Jun 2021 18:55:26 +0200
Subject: [PATCH] Fix data URI scramble (#16098)

* Removed unused method.

* No prefix for data uris.

* Added test to prevent regressions.
---
 modules/markup/html.go           | 11 +++--------
 modules/markup/html_test.go      | 20 ++++++++++++++++++++
 modules/markup/sanitizer.go      | 10 ----------
 modules/markup/sanitizer_test.go |  1 -
 4 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/modules/markup/html.go b/modules/markup/html.go
index 4d1b49e241..e5e622068d 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -364,24 +364,19 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node, visitText
 		}
 	case html.ElementNode:
 		if node.Data == "img" {
-			attrs := node.Attr
-			for idx, attr := range attrs {
+			for _, attr := range node.Attr {
 				if attr.Key != "src" {
 					continue
 				}
-				link := []byte(attr.Val)
-				if len(link) > 0 && !IsLink(link) {
+				if len(attr.Val) > 0 && !isLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
 					prefix := ctx.URLPrefix
 					if ctx.IsWiki {
 						prefix = util.URLJoin(prefix, "wiki", "raw")
 					}
 					prefix = strings.Replace(prefix, "/src/", "/media/", 1)
 
-					lnk := string(link)
-					lnk = util.URLJoin(prefix, lnk)
-					link = []byte(lnk)
+					attr.Val = util.URLJoin(prefix, attr.Val)
 				}
-				node.Attr[idx].Val = string(link)
 			}
 		} else if node.Data == "a" {
 			visitText = false
diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go
index fa8c848601..4c3c2399f5 100644
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@@ -444,3 +444,23 @@ func Test_ParseClusterFuzz(t *testing.T) {
 	assert.NoError(t, err)
 	assert.NotContains(t, res.String(), "<html")
 }
+
+func TestIssue16020(t *testing.T) {
+	setting.AppURL = AppURL
+	setting.AppSubURL = AppSubURL
+
+	var localMetas = map[string]string{
+		"user": "go-gitea",
+		"repo": "gitea",
+	}
+
+	data := `<img src="data:image/png;base64,i//V"/>`
+
+	var res strings.Builder
+	err := PostProcess(&RenderContext{
+		URLPrefix: "https://example.com",
+		Metas:     localMetas,
+	}, strings.NewReader(data), &res)
+	assert.NoError(t, err)
+	assert.Equal(t, data, res.String())
+}
diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go
index 0e05ddb085..8d2bf5d688 100644
--- a/modules/markup/sanitizer.go
+++ b/modules/markup/sanitizer.go
@@ -131,13 +131,3 @@ func SanitizeReader(r io.Reader) *bytes.Buffer {
 	NewSanitizer()
 	return sanitizer.policy.SanitizeReader(r)
 }
-
-// SanitizeBytes takes a []byte slice that contains a HTML fragment or document and applies policy whitelist.
-func SanitizeBytes(b []byte) []byte {
-	if len(b) == 0 {
-		// nothing to sanitize
-		return b
-	}
-	NewSanitizer()
-	return sanitizer.policy.SanitizeBytes(b)
-}
diff --git a/modules/markup/sanitizer_test.go b/modules/markup/sanitizer_test.go
index 9e173015d6..64189e1435 100644
--- a/modules/markup/sanitizer_test.go
+++ b/modules/markup/sanitizer_test.go
@@ -49,7 +49,6 @@ func Test_Sanitizer(t *testing.T) {
 
 	for i := 0; i < len(testCases); i += 2 {
 		assert.Equal(t, testCases[i+1], Sanitize(testCases[i]))
-		assert.Equal(t, testCases[i+1], string(SanitizeBytes([]byte(testCases[i]))))
 	}
 }