Fix garbling when copying multibyte text via OSC 52 (#7870)
This commit adds a missing conversion utf8 to utf16 in decoding base64 for handling multibyte text in copying via OSC 52. ## Validation Steps Performed * automatically * Tests w/ multibyte characters * manually * case1 * Executed `printf "\x1b]52;;%s\x1b\\" "$(printf '👍👍🏻👍🏼👍🏽👍🏾👍🏿' | base64)"` * Verified `👍👍🏻👍🏼👍🏽👍🏾👍🏿` in my clipboard * case2 * Copied `👍👍🏻👍🏼👍🏽👍🏾👍🏿` by tmux 2.6 default copy function (OSC 52) * Verified `👍👍🏻👍🏼👍🏽👍🏾👍🏿` in my clipboard Closes #7819
This commit is contained in:
parent
4a4a41eadf
commit
743283e434
|
@ -49,6 +49,7 @@ GETDESKWALLPAPER
|
|||
UPDATEINIFILE
|
||||
spsc
|
||||
STDCPP
|
||||
strchr
|
||||
syscall
|
||||
tmp
|
||||
tx
|
||||
|
|
|
@ -19,5 +19,5 @@ TestUtils::VerifyExpectedString\(tb, L"[^"]+"
|
|||
0x[0-9A-Za-z]+
|
||||
Base64::s_(?:En|De)code\(L"[^"]+"
|
||||
VERIFY_ARE_EQUAL\(L"[^"]+"
|
||||
L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
|
||||
std::memory_order_[\w]+
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
|
||||
using namespace Microsoft::Console::VirtualTerminal;
|
||||
|
||||
static const wchar_t base64Chars[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
static const wchar_t padChar = L'=';
|
||||
static const char base64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
static const char padChar = '=';
|
||||
|
||||
#pragma warning(disable : 26446 26447 26482 26485 26493 26494)
|
||||
|
||||
|
@ -75,15 +75,16 @@ std::wstring Base64::s_Encode(const std::wstring_view src) noexcept
|
|||
// - true if decoding successfully, otherwise false.
|
||||
bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
|
||||
{
|
||||
std::string mbStr;
|
||||
int state = 0;
|
||||
wchar_t tmp;
|
||||
char tmp;
|
||||
|
||||
const auto len = src.size() / 4 * 3;
|
||||
if (len == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
dst.reserve(len);
|
||||
mbStr.reserve(len);
|
||||
|
||||
auto iter = src.cbegin();
|
||||
while (iter < src.cend())
|
||||
|
@ -99,7 +100,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
|
|||
break;
|
||||
}
|
||||
|
||||
auto pos = wcschr(base64Chars, *iter);
|
||||
auto pos = strchr(base64Chars, *iter);
|
||||
if (!pos) // A non-base64 character found.
|
||||
{
|
||||
return false;
|
||||
|
@ -108,24 +109,24 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
|
|||
switch (state)
|
||||
{
|
||||
case 0:
|
||||
tmp = (wchar_t)(pos - base64Chars) << 2;
|
||||
tmp = (char)(pos - base64Chars) << 2;
|
||||
state = 1;
|
||||
break;
|
||||
case 1:
|
||||
tmp |= (pos - base64Chars) >> 4;
|
||||
dst.push_back(tmp);
|
||||
tmp = (wchar_t)((pos - base64Chars) & 0x0f) << 4;
|
||||
tmp |= (char)(pos - base64Chars) >> 4;
|
||||
mbStr += tmp;
|
||||
tmp = (char)((pos - base64Chars) & 0x0f) << 4;
|
||||
state = 2;
|
||||
break;
|
||||
case 2:
|
||||
tmp |= (pos - base64Chars) >> 2;
|
||||
dst.push_back(tmp);
|
||||
tmp = (wchar_t)((pos - base64Chars) & 0x03) << 6;
|
||||
tmp |= (char)(pos - base64Chars) >> 2;
|
||||
mbStr += tmp;
|
||||
tmp = (char)((pos - base64Chars) & 0x03) << 6;
|
||||
state = 3;
|
||||
break;
|
||||
case 3:
|
||||
tmp |= pos - base64Chars;
|
||||
dst.push_back(tmp);
|
||||
mbStr += tmp;
|
||||
state = 0;
|
||||
break;
|
||||
default:
|
||||
|
@ -176,7 +177,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
|
|||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return SUCCEEDED(til::u8u16(mbStr, dst));
|
||||
}
|
||||
|
||||
// Routine Description:
|
||||
|
|
|
@ -84,5 +84,18 @@ class Microsoft::Console::VirtualTerminal::Base64Test
|
|||
|
||||
success = Base64::s_Decode(L"Zm9vYg=", result);
|
||||
VERIFY_ARE_EQUAL(false, success);
|
||||
|
||||
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
|
||||
result = L"";
|
||||
success = Base64::s_Decode(L"44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt", result);
|
||||
VERIFY_ARE_EQUAL(true, success);
|
||||
VERIFY_ARE_EQUAL(L"にほんご汉语한국", result);
|
||||
|
||||
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
|
||||
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
|
||||
result = L"";
|
||||
success = Base64::s_Decode(L"8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=", result);
|
||||
VERIFY_ARE_EQUAL(true, success);
|
||||
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", result);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -3139,6 +3139,21 @@ class StateMachineExternalTest final
|
|||
|
||||
pDispatch->ClearState();
|
||||
|
||||
// Passing an empty `Pc` param and a base64-encoded multibyte text `Pd` works.
|
||||
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
|
||||
mach.ProcessString(L"\x1b]52;;44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt\x07");
|
||||
VERIFY_ARE_EQUAL(L"にほんご汉语한국", pDispatch->_copyContent);
|
||||
|
||||
pDispatch->ClearState();
|
||||
|
||||
// Passing an empty `Pc` param and a base64-encoded multibyte text w/ emoji sequences `Pd` works.
|
||||
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
|
||||
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
|
||||
mach.ProcessString(L"\x1b]52;;8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=\x07");
|
||||
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", pDispatch->_copyContent);
|
||||
|
||||
pDispatch->ClearState();
|
||||
|
||||
// Passing a non-empty `Pc` param (`s0` is ignored) and a valid `Pd` param works.
|
||||
mach.ProcessString(L"\x1b]52;s0;Zm9v\x07");
|
||||
VERIFY_ARE_EQUAL(L"foo", pDispatch->_copyContent);
|
||||
|
|
Loading…
Reference in a new issue