Fix garbling when copying multibyte text via OSC 52 (#7870)
This commit adds a missing conversion utf8 to utf16 in decoding base64 for handling multibyte text in copying via OSC 52. ## Validation Steps Performed * automatically * Tests w/ multibyte characters * manually * case1 * Executed `printf "\x1b]52;;%s\x1b\\" "$(printf '👍👍🏻👍🏼👍🏽👍🏾👍🏿' | base64)"` * Verified `👍👍🏻👍🏼👍🏽👍🏾👍🏿` in my clipboard * case2 * Copied `👍👍🏻👍🏼👍🏽👍🏾👍🏿` by tmux 2.6 default copy function (OSC 52) * Verified `👍👍🏻👍🏼👍🏽👍🏾👍🏿` in my clipboard Closes #7819
This commit is contained in:
parent
4a4a41eadf
commit
743283e434
|
@ -49,6 +49,7 @@ GETDESKWALLPAPER
|
||||||
UPDATEINIFILE
|
UPDATEINIFILE
|
||||||
spsc
|
spsc
|
||||||
STDCPP
|
STDCPP
|
||||||
|
strchr
|
||||||
syscall
|
syscall
|
||||||
tmp
|
tmp
|
||||||
tx
|
tx
|
||||||
|
|
|
@ -19,5 +19,5 @@ TestUtils::VerifyExpectedString\(tb, L"[^"]+"
|
||||||
0x[0-9A-Za-z]+
|
0x[0-9A-Za-z]+
|
||||||
Base64::s_(?:En|De)code\(L"[^"]+"
|
Base64::s_(?:En|De)code\(L"[^"]+"
|
||||||
VERIFY_ARE_EQUAL\(L"[^"]+"
|
VERIFY_ARE_EQUAL\(L"[^"]+"
|
||||||
L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
|
||||||
std::memory_order_[\w]+
|
std::memory_order_[\w]+
|
||||||
|
|
|
@ -6,8 +6,8 @@
|
||||||
|
|
||||||
using namespace Microsoft::Console::VirtualTerminal;
|
using namespace Microsoft::Console::VirtualTerminal;
|
||||||
|
|
||||||
static const wchar_t base64Chars[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
static const char base64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||||
static const wchar_t padChar = L'=';
|
static const char padChar = '=';
|
||||||
|
|
||||||
#pragma warning(disable : 26446 26447 26482 26485 26493 26494)
|
#pragma warning(disable : 26446 26447 26482 26485 26493 26494)
|
||||||
|
|
||||||
|
@ -75,15 +75,16 @@ std::wstring Base64::s_Encode(const std::wstring_view src) noexcept
|
||||||
// - true if decoding successfully, otherwise false.
|
// - true if decoding successfully, otherwise false.
|
||||||
bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
|
bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
|
||||||
{
|
{
|
||||||
|
std::string mbStr;
|
||||||
int state = 0;
|
int state = 0;
|
||||||
wchar_t tmp;
|
char tmp;
|
||||||
|
|
||||||
const auto len = src.size() / 4 * 3;
|
const auto len = src.size() / 4 * 3;
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
dst.reserve(len);
|
mbStr.reserve(len);
|
||||||
|
|
||||||
auto iter = src.cbegin();
|
auto iter = src.cbegin();
|
||||||
while (iter < src.cend())
|
while (iter < src.cend())
|
||||||
|
@ -99,7 +100,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto pos = wcschr(base64Chars, *iter);
|
auto pos = strchr(base64Chars, *iter);
|
||||||
if (!pos) // A non-base64 character found.
|
if (!pos) // A non-base64 character found.
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
@ -108,24 +109,24 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
|
||||||
switch (state)
|
switch (state)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
tmp = (wchar_t)(pos - base64Chars) << 2;
|
tmp = (char)(pos - base64Chars) << 2;
|
||||||
state = 1;
|
state = 1;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
tmp |= (pos - base64Chars) >> 4;
|
tmp |= (char)(pos - base64Chars) >> 4;
|
||||||
dst.push_back(tmp);
|
mbStr += tmp;
|
||||||
tmp = (wchar_t)((pos - base64Chars) & 0x0f) << 4;
|
tmp = (char)((pos - base64Chars) & 0x0f) << 4;
|
||||||
state = 2;
|
state = 2;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
tmp |= (pos - base64Chars) >> 2;
|
tmp |= (char)(pos - base64Chars) >> 2;
|
||||||
dst.push_back(tmp);
|
mbStr += tmp;
|
||||||
tmp = (wchar_t)((pos - base64Chars) & 0x03) << 6;
|
tmp = (char)((pos - base64Chars) & 0x03) << 6;
|
||||||
state = 3;
|
state = 3;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
tmp |= pos - base64Chars;
|
tmp |= pos - base64Chars;
|
||||||
dst.push_back(tmp);
|
mbStr += tmp;
|
||||||
state = 0;
|
state = 0;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -176,7 +177,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return SUCCEEDED(til::u8u16(mbStr, dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Routine Description:
|
// Routine Description:
|
||||||
|
|
|
@ -84,5 +84,18 @@ class Microsoft::Console::VirtualTerminal::Base64Test
|
||||||
|
|
||||||
success = Base64::s_Decode(L"Zm9vYg=", result);
|
success = Base64::s_Decode(L"Zm9vYg=", result);
|
||||||
VERIFY_ARE_EQUAL(false, success);
|
VERIFY_ARE_EQUAL(false, success);
|
||||||
|
|
||||||
|
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
|
||||||
|
result = L"";
|
||||||
|
success = Base64::s_Decode(L"44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt", result);
|
||||||
|
VERIFY_ARE_EQUAL(true, success);
|
||||||
|
VERIFY_ARE_EQUAL(L"にほんご汉语한국", result);
|
||||||
|
|
||||||
|
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
|
||||||
|
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
|
||||||
|
result = L"";
|
||||||
|
success = Base64::s_Decode(L"8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=", result);
|
||||||
|
VERIFY_ARE_EQUAL(true, success);
|
||||||
|
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", result);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -3139,6 +3139,21 @@ class StateMachineExternalTest final
|
||||||
|
|
||||||
pDispatch->ClearState();
|
pDispatch->ClearState();
|
||||||
|
|
||||||
|
// Passing an empty `Pc` param and a base64-encoded multibyte text `Pd` works.
|
||||||
|
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
|
||||||
|
mach.ProcessString(L"\x1b]52;;44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt\x07");
|
||||||
|
VERIFY_ARE_EQUAL(L"にほんご汉语한국", pDispatch->_copyContent);
|
||||||
|
|
||||||
|
pDispatch->ClearState();
|
||||||
|
|
||||||
|
// Passing an empty `Pc` param and a base64-encoded multibyte text w/ emoji sequences `Pd` works.
|
||||||
|
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
|
||||||
|
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
|
||||||
|
mach.ProcessString(L"\x1b]52;;8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=\x07");
|
||||||
|
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", pDispatch->_copyContent);
|
||||||
|
|
||||||
|
pDispatch->ClearState();
|
||||||
|
|
||||||
// Passing a non-empty `Pc` param (`s0` is ignored) and a valid `Pd` param works.
|
// Passing a non-empty `Pc` param (`s0` is ignored) and a valid `Pd` param works.
|
||||||
mach.ProcessString(L"\x1b]52;s0;Zm9v\x07");
|
mach.ProcessString(L"\x1b]52;s0;Zm9v\x07");
|
||||||
VERIFY_ARE_EQUAL(L"foo", pDispatch->_copyContent);
|
VERIFY_ARE_EQUAL(L"foo", pDispatch->_copyContent);
|
||||||
|
|
Loading…
Reference in a new issue