From b6a21f85a74c01c6a297e9595900a25d29fd5dfb Mon Sep 17 00:00:00 2001 From: bruvzg <7645683+bruvzg@users.noreply.github.com> Date: Fri, 30 Apr 2021 21:22:39 +0300 Subject: [PATCH] Fix `url_decode` with mixed percent-encoding/Unicode strings. Treat Unix drive names as UTF-8 encoded. --- core/string/ustring.cpp | 19 ++++++++++--------- drivers/unix/dir_access_unix.cpp | 12 +++++++----- tests/test_string.h | 11 +++++++++++ 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index c8d71c3236..a3bbb5ac18 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -3784,27 +3784,28 @@ String String::uri_encode() const { } String String::uri_decode() const { - String res; - for (int i = 0; i < length(); ++i) { - if (unicode_at(i) == '%' && i + 2 < length()) { - char32_t ord1 = unicode_at(i + 1); + CharString src = utf8(); + CharString res; + for (int i = 0; i < src.length(); ++i) { + if (src[i] == '%' && i + 2 < src.length()) { + char ord1 = src[i + 1]; if ((ord1 >= '0' && ord1 <= '9') || (ord1 >= 'A' && ord1 <= 'Z')) { - char32_t ord2 = unicode_at(i + 2); + char ord2 = src[i + 2]; if ((ord2 >= '0' && ord2 <= '9') || (ord2 >= 'A' && ord2 <= 'Z')) { char bytes[3] = { (char)ord1, (char)ord2, 0 }; res += (char)strtol(bytes, nullptr, 16); i += 2; } } else { - res += unicode_at(i); + res += src[i]; } - } else if (unicode_at(i) == '+') { + } else if (src[i] == '+') { res += ' '; } else { - res += unicode_at(i); + res += src[i]; } } - return String::utf8(res.ascii()); + return String::utf8(res); } String String::c_unescape() const { diff --git a/drivers/unix/dir_access_unix.cpp b/drivers/unix/dir_access_unix.cpp index 34ef6f3ce6..22151b60c1 100644 --- a/drivers/unix/dir_access_unix.cpp +++ b/drivers/unix/dir_access_unix.cpp @@ -226,8 +226,9 @@ static void _get_drives(List *list) { while (getmntent_r(mtab, &mnt, strings, sizeof(strings))) { if (mnt.mnt_dir != nullptr && _filter_drive(&mnt)) { // Avoid duplicates - if (!list->find(mnt.mnt_dir)) { - list->push_back(mnt.mnt_dir); + String name = String::utf8(mnt.mnt_dir); + if (!list->find(name)) { + list->push_back(name); } } } @@ -240,8 +241,9 @@ static void _get_drives(List *list) { const char *home = getenv("HOME"); if (home) { // Only add if it's not a duplicate - if (!list->find(home)) { - list->push_back(home); + String home_name = String::utf8(home); + if (!list->find(home_name)) { + list->push_back(home_name); } // Check $HOME/.config/gtk-3.0/bookmarks @@ -254,7 +256,7 @@ static void _get_drives(List *list) { // Parse only file:// links if (strncmp(string, "file://", 7) == 0) { // Strip any unwanted edges on the strings and push_back if it's not a duplicate - String fpath = String(string + 7).strip_edges().split_spaces()[0].uri_decode(); + String fpath = String::utf8(string + 7).strip_edges().split_spaces()[0].uri_decode(); if (!list->find(fpath)) { list->push_back(fpath); } diff --git a/tests/test_string.h b/tests/test_string.h index 02147edc9b..94d14517ae 100644 --- a/tests/test_string.h +++ b/tests/test_string.h @@ -1156,6 +1156,17 @@ TEST_CASE("[String] uri_encode/unescape") { String s = "Godot Engine:'docs'"; String t = "Godot%20Engine%3A%27docs%27"; + String x1 = "T%C4%93%C5%A1t"; + static const uint8_t u8str[] = { 0x54, 0xC4, 0x93, 0xC5, 0xA1, 0x74, 0x00 }; + String x2 = String::utf8((const char *)u8str); + String x3 = U"Tēšt"; + + CHECK(x1.uri_decode() == x2); + CHECK(x1.uri_decode() == x3); + CHECK((x1 + x3).uri_decode() == (x2 + x3)); // Mixed unicode and URL encoded string, e.g. GTK+ bookmark. + CHECK(x2.uri_encode() == x1); + CHECK(x3.uri_encode() == x1); + CHECK(s.uri_encode() == t); CHECK(t.uri_decode() == s); }