From b6a21f85a74c01c6a297e9595900a25d29fd5dfb Mon Sep 17 00:00:00 2001
From: bruvzg <7645683+bruvzg@users.noreply.github.com>
Date: Fri, 30 Apr 2021 21:22:39 +0300
Subject: [PATCH] Fix `url_decode` with mixed percent-encoding/Unicode strings.
 Treat Unix drive names as UTF-8 encoded.

---
 core/string/ustring.cpp          | 19 ++++++++++---------
 drivers/unix/dir_access_unix.cpp | 12 +++++++-----
 tests/test_string.h              | 11 +++++++++++
 3 files changed, 28 insertions(+), 14 deletions(-)
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index c8d71c3236..a3bbb5ac18 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -3784,27 +3784,28 @@ String String::uri_encode() const {
 }
 
 String String::uri_decode() const {
-	String res;
-	for (int i = 0; i < length(); ++i) {
-		if (unicode_at(i) == '%' && i + 2 < length()) {
-			char32_t ord1 = unicode_at(i + 1);
+	CharString src = utf8();
+	CharString res;
+	for (int i = 0; i < src.length(); ++i) {
+		if (src[i] == '%' && i + 2 < src.length()) {
+			char ord1 = src[i + 1];
 			if ((ord1 >= '0' && ord1 <= '9') || (ord1 >= 'A' && ord1 <= 'Z')) {
-				char32_t ord2 = unicode_at(i + 2);
+				char ord2 = src[i + 2];
 				if ((ord2 >= '0' && ord2 <= '9') || (ord2 >= 'A' && ord2 <= 'Z')) {
 					char bytes[3] = { (char)ord1, (char)ord2, 0 };
 					res += (char)strtol(bytes, nullptr, 16);
 					i += 2;
 				}
 			} else {
-				res += unicode_at(i);
+				res += src[i];
 			}
-		} else if (unicode_at(i) == '+') {
+		} else if (src[i] == '+') {
 			res += ' ';
 		} else {
-			res += unicode_at(i);
+			res += src[i];
 		}
 	}
-	return String::utf8(res.ascii());
+	return String::utf8(res);
 }
 
 String String::c_unescape() const {
diff --git a/drivers/unix/dir_access_unix.cpp b/drivers/unix/dir_access_unix.cpp
index 34ef6f3ce6..22151b60c1 100644
--- a/drivers/unix/dir_access_unix.cpp
+++ b/drivers/unix/dir_access_unix.cpp
@@ -226,8 +226,9 @@ static void _get_drives(List<String> *list) {
 		while (getmntent_r(mtab, &mnt, strings, sizeof(strings))) {
 			if (mnt.mnt_dir != nullptr && _filter_drive(&mnt)) {
 				// Avoid duplicates
-				if (!list->find(mnt.mnt_dir)) {
-					list->push_back(mnt.mnt_dir);
+				String name = String::utf8(mnt.mnt_dir);
+				if (!list->find(name)) {
+					list->push_back(name);
 				}
 			}
 		}
@@ -240,8 +241,9 @@ static void _get_drives(List<String> *list) {
 	const char *home = getenv("HOME");
 	if (home) {
 		// Only add if it's not a duplicate
-		if (!list->find(home)) {
-			list->push_back(home);
+		String home_name = String::utf8(home);
+		if (!list->find(home_name)) {
+			list->push_back(home_name);
 		}
 
 		// Check $HOME/.config/gtk-3.0/bookmarks
@@ -254,7 +256,7 @@ static void _get_drives(List<String> *list) {
 				// Parse only file:// links
 				if (strncmp(string, "file://", 7) == 0) {
 					// Strip any unwanted edges on the strings and push_back if it's not a duplicate
-					String fpath = String(string + 7).strip_edges().split_spaces()[0].uri_decode();
+					String fpath = String::utf8(string + 7).strip_edges().split_spaces()[0].uri_decode();
 					if (!list->find(fpath)) {
 						list->push_back(fpath);
 					}
diff --git a/tests/test_string.h b/tests/test_string.h
index 02147edc9b..94d14517ae 100644
--- a/tests/test_string.h
+++ b/tests/test_string.h
@@ -1156,6 +1156,17 @@ TEST_CASE("[String] uri_encode/unescape") {
 	String s = "Godot Engine:'docs'";
 	String t = "Godot%20Engine%3A%27docs%27";
 
+	String x1 = "T%C4%93%C5%A1t";
+	static const uint8_t u8str[] = { 0x54, 0xC4, 0x93, 0xC5, 0xA1, 0x74, 0x00 };
+	String x2 = String::utf8((const char *)u8str);
+	String x3 = U"Tēšt";
+
+	CHECK(x1.uri_decode() == x2);
+	CHECK(x1.uri_decode() == x3);
+	CHECK((x1 + x3).uri_decode() == (x2 + x3)); // Mixed unicode and URL encoded string, e.g. GTK+ bookmark.
+	CHECK(x2.uri_encode() == x1);
+	CHECK(x3.uri_encode() == x1);
+
 	CHECK(s.uri_encode() == t);
 	CHECK(t.uri_decode() == s);
 }