Fix garbling when copying multibyte text via OSC 52 (#7870)

This commit adds a missing conversion utf8 to utf16 in decoding base64
for handling multibyte text in copying via OSC 52.

## Validation Steps Performed
* automatically
    * Tests w/ multibyte characters
* manually
    * case1
        * Executed `printf "\x1b]52;;%s\x1b\\" "$(printf '👍👍🏻👍🏼👍🏽👍🏾👍🏿' | base64)"`
        * Verified `👍👍🏻👍🏼👍🏽👍🏾👍🏿` in my clipboard
    * case2
        * Copied `👍👍🏻👍🏼👍🏽👍🏾👍🏿` by tmux 2.6 default copy function (OSC 52)
        * Verified `👍👍🏻👍🏼👍🏽👍🏾👍🏿` in my clipboard

Closes #7819
This commit is contained in:
Ryuichi Ito 2020-10-16 11:02:59 +09:00 committed by GitHub
parent 4a4a41eadf
commit 743283e434
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 45 additions and 15 deletions

View file

@ -49,6 +49,7 @@ GETDESKWALLPAPER
UPDATEINIFILE
spsc
STDCPP
strchr
syscall
tmp
tx

View file

@ -19,5 +19,5 @@ TestUtils::VerifyExpectedString\(tb, L"[^"]+"
0x[0-9A-Za-z]+
Base64::s_(?:En|De)code\(L"[^"]+"
VERIFY_ARE_EQUAL\(L"[^"]+"
L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
std::memory_order_[\w]+

View file

@ -6,8 +6,8 @@
using namespace Microsoft::Console::VirtualTerminal;
static const wchar_t base64Chars[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const wchar_t padChar = L'=';
static const char base64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const char padChar = '=';
#pragma warning(disable : 26446 26447 26482 26485 26493 26494)
@ -75,15 +75,16 @@ std::wstring Base64::s_Encode(const std::wstring_view src) noexcept
// - true if decoding successfully, otherwise false.
bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
{
std::string mbStr;
int state = 0;
wchar_t tmp;
char tmp;
const auto len = src.size() / 4 * 3;
if (len == 0)
{
return false;
}
dst.reserve(len);
mbStr.reserve(len);
auto iter = src.cbegin();
while (iter < src.cend())
@ -99,7 +100,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
break;
}
auto pos = wcschr(base64Chars, *iter);
auto pos = strchr(base64Chars, *iter);
if (!pos) // A non-base64 character found.
{
return false;
@ -108,24 +109,24 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
switch (state)
{
case 0:
tmp = (wchar_t)(pos - base64Chars) << 2;
tmp = (char)(pos - base64Chars) << 2;
state = 1;
break;
case 1:
tmp |= (pos - base64Chars) >> 4;
dst.push_back(tmp);
tmp = (wchar_t)((pos - base64Chars) & 0x0f) << 4;
tmp |= (char)(pos - base64Chars) >> 4;
mbStr += tmp;
tmp = (char)((pos - base64Chars) & 0x0f) << 4;
state = 2;
break;
case 2:
tmp |= (pos - base64Chars) >> 2;
dst.push_back(tmp);
tmp = (wchar_t)((pos - base64Chars) & 0x03) << 6;
tmp |= (char)(pos - base64Chars) >> 2;
mbStr += tmp;
tmp = (char)((pos - base64Chars) & 0x03) << 6;
state = 3;
break;
case 3:
tmp |= pos - base64Chars;
dst.push_back(tmp);
mbStr += tmp;
state = 0;
break;
default:
@ -176,7 +177,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
return false;
}
return true;
return SUCCEEDED(til::u8u16(mbStr, dst));
}
// Routine Description:

View file

@ -84,5 +84,18 @@ class Microsoft::Console::VirtualTerminal::Base64Test
success = Base64::s_Decode(L"Zm9vYg=", result);
VERIFY_ARE_EQUAL(false, success);
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
result = L"";
success = Base64::s_Decode(L"44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt", result);
VERIFY_ARE_EQUAL(true, success);
VERIFY_ARE_EQUAL(L"にほんご汉语한국", result);
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
result = L"";
success = Base64::s_Decode(L"8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=", result);
VERIFY_ARE_EQUAL(true, success);
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", result);
}
};

View file

@ -3139,6 +3139,21 @@ class StateMachineExternalTest final
pDispatch->ClearState();
// Passing an empty `Pc` param and a base64-encoded multibyte text `Pd` works.
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
mach.ProcessString(L"\x1b]52;;44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt\x07");
VERIFY_ARE_EQUAL(L"にほんご汉语한국", pDispatch->_copyContent);
pDispatch->ClearState();
// Passing an empty `Pc` param and a base64-encoded multibyte text w/ emoji sequences `Pd` works.
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
mach.ProcessString(L"\x1b]52;;8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=\x07");
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", pDispatch->_copyContent);
pDispatch->ClearState();
// Passing a non-empty `Pc` param (`s0` is ignored) and a valid `Pd` param works.
mach.ProcessString(L"\x1b]52;s0;Zm9v\x07");
VERIFY_ARE_EQUAL(L"foo", pDispatch->_copyContent);