Fix garbling when copying multibyte text via OSC 52 (#7870)

This commit adds a missing conversion utf8 to utf16 in decoding base64
for handling multibyte text in copying via OSC 52.

## Validation Steps Performed
* automatically
    * Tests w/ multibyte characters
* manually
    * case1
        * Executed `printf "\x1b]52;;%s\x1b\\" "$(printf '👍👍🏻👍🏼👍🏽👍🏾👍🏿' | base64)"`
        * Verified `👍👍🏻👍🏼👍🏽👍🏾👍🏿` in my clipboard
    * case2
        * Copied `👍👍🏻👍🏼👍🏽👍🏾👍🏿` by tmux 2.6 default copy function (OSC 52)
        * Verified `👍👍🏻👍🏼👍🏽👍🏾👍🏿` in my clipboard

Closes #7819
This commit is contained in:
Ryuichi Ito 2020-10-16 11:02:59 +09:00 committed by GitHub
parent 4a4a41eadf
commit 743283e434
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 45 additions and 15 deletions

View file

@ -49,6 +49,7 @@ GETDESKWALLPAPER
UPDATEINIFILE UPDATEINIFILE
spsc spsc
STDCPP STDCPP
strchr
syscall syscall
tmp tmp
tx tx

View file

@ -19,5 +19,5 @@ TestUtils::VerifyExpectedString\(tb, L"[^"]+"
0x[0-9A-Za-z]+ 0x[0-9A-Za-z]+
Base64::s_(?:En|De)code\(L"[^"]+" Base64::s_(?:En|De)code\(L"[^"]+"
VERIFY_ARE_EQUAL\(L"[^"]+" VERIFY_ARE_EQUAL\(L"[^"]+"
L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
std::memory_order_[\w]+ std::memory_order_[\w]+

View file

@ -6,8 +6,8 @@
using namespace Microsoft::Console::VirtualTerminal; using namespace Microsoft::Console::VirtualTerminal;
static const wchar_t base64Chars[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static const char base64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const wchar_t padChar = L'='; static const char padChar = '=';
#pragma warning(disable : 26446 26447 26482 26485 26493 26494) #pragma warning(disable : 26446 26447 26482 26485 26493 26494)
@ -75,15 +75,16 @@ std::wstring Base64::s_Encode(const std::wstring_view src) noexcept
// - true if decoding successfully, otherwise false. // - true if decoding successfully, otherwise false.
bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
{ {
std::string mbStr;
int state = 0; int state = 0;
wchar_t tmp; char tmp;
const auto len = src.size() / 4 * 3; const auto len = src.size() / 4 * 3;
if (len == 0) if (len == 0)
{ {
return false; return false;
} }
dst.reserve(len); mbStr.reserve(len);
auto iter = src.cbegin(); auto iter = src.cbegin();
while (iter < src.cend()) while (iter < src.cend())
@ -99,7 +100,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
break; break;
} }
auto pos = wcschr(base64Chars, *iter); auto pos = strchr(base64Chars, *iter);
if (!pos) // A non-base64 character found. if (!pos) // A non-base64 character found.
{ {
return false; return false;
@ -108,24 +109,24 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
switch (state) switch (state)
{ {
case 0: case 0:
tmp = (wchar_t)(pos - base64Chars) << 2; tmp = (char)(pos - base64Chars) << 2;
state = 1; state = 1;
break; break;
case 1: case 1:
tmp |= (pos - base64Chars) >> 4; tmp |= (char)(pos - base64Chars) >> 4;
dst.push_back(tmp); mbStr += tmp;
tmp = (wchar_t)((pos - base64Chars) & 0x0f) << 4; tmp = (char)((pos - base64Chars) & 0x0f) << 4;
state = 2; state = 2;
break; break;
case 2: case 2:
tmp |= (pos - base64Chars) >> 2; tmp |= (char)(pos - base64Chars) >> 2;
dst.push_back(tmp); mbStr += tmp;
tmp = (wchar_t)((pos - base64Chars) & 0x03) << 6; tmp = (char)((pos - base64Chars) & 0x03) << 6;
state = 3; state = 3;
break; break;
case 3: case 3:
tmp |= pos - base64Chars; tmp |= pos - base64Chars;
dst.push_back(tmp); mbStr += tmp;
state = 0; state = 0;
break; break;
default: default:
@ -176,7 +177,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
return false; return false;
} }
return true; return SUCCEEDED(til::u8u16(mbStr, dst));
} }
// Routine Description: // Routine Description:

View file

@ -84,5 +84,18 @@ class Microsoft::Console::VirtualTerminal::Base64Test
success = Base64::s_Decode(L"Zm9vYg=", result); success = Base64::s_Decode(L"Zm9vYg=", result);
VERIFY_ARE_EQUAL(false, success); VERIFY_ARE_EQUAL(false, success);
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
result = L"";
success = Base64::s_Decode(L"44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt", result);
VERIFY_ARE_EQUAL(true, success);
VERIFY_ARE_EQUAL(L"にほんご汉语한국", result);
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
result = L"";
success = Base64::s_Decode(L"8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=", result);
VERIFY_ARE_EQUAL(true, success);
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", result);
} }
}; };

View file

@ -3139,6 +3139,21 @@ class StateMachineExternalTest final
pDispatch->ClearState(); pDispatch->ClearState();
// Passing an empty `Pc` param and a base64-encoded multibyte text `Pd` works.
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
mach.ProcessString(L"\x1b]52;;44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt\x07");
VERIFY_ARE_EQUAL(L"にほんご汉语한국", pDispatch->_copyContent);
pDispatch->ClearState();
// Passing an empty `Pc` param and a base64-encoded multibyte text w/ emoji sequences `Pd` works.
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
mach.ProcessString(L"\x1b]52;;8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=\x07");
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", pDispatch->_copyContent);
pDispatch->ClearState();
// Passing a non-empty `Pc` param (`s0` is ignored) and a valid `Pd` param works. // Passing a non-empty `Pc` param (`s0` is ignored) and a valid `Pd` param works.
mach.ProcessString(L"\x1b]52;s0;Zm9v\x07"); mach.ProcessString(L"\x1b]52;s0;Zm9v\x07");
VERIFY_ARE_EQUAL(L"foo", pDispatch->_copyContent); VERIFY_ARE_EQUAL(L"foo", pDispatch->_copyContent);