make sure caching of partials still works if the string consists of a single lead byte only (GH#4673) (#4685)
## Summary of the Pull Request Fixes a flaw that happened if `til::u8u16` received a single lead byte. ## PR Checklist * [x] Closes #4673 * [x] Tests added/passed ## Detailed Description of the Pull Request / Additional comments The loop for caching partials didn't run and thus, the lead byte was converted to U+FFFD. That's because the loop starts with `sequenceLen` initialized with 1. And if the string has a length of 1 the initial condition is `1<1` which is evaluated to `false` and the body of the loop was never executed. ## Validation Steps Performed 1) updated the code of the state class and tested manually that `printf "\xE2"; printf "\x98\xBA\n"` prints a U+263A character 2) updated the unit tests to make sure that still up to 3 partials are cached 3) updated the unit tests to make sure caching also works if the string consists of a lead byte only 4) tested manually that #4086 is still resolved
This commit is contained in:
parent
671110c88a
commit
b8e33560f9
|
@ -84,8 +84,8 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
|
|||
if ((*backIter & _Utf8BitMasks::MaskAsciiByte) > _Utf8BitMasks::IsAsciiByte)
|
||||
{
|
||||
// Check only up to 3 last bytes, if no Lead Byte was found then the byte before must be the Lead Byte and no partials are in the string
|
||||
const size_t stopLen{ std::min(in.length(), gsl::narrow_cast<size_t>(4u)) };
|
||||
for (size_t sequenceLen{ 1u }; sequenceLen < stopLen; ++sequenceLen, --backIter)
|
||||
const size_t stopLen{ std::min(in.length(), gsl::narrow_cast<size_t>(3u)) };
|
||||
for (size_t sequenceLen{ 1u }; sequenceLen <= stopLen; ++sequenceLen, --backIter)
|
||||
{
|
||||
// If Lead Byte found
|
||||
if ((*backIter & _Utf8BitMasks::MaskContinuationByte) > _Utf8BitMasks::IsContinuationByte)
|
||||
|
|
|
@ -83,31 +83,55 @@ void Utf8Utf16ConvertTests::TestU8ToU16Partials()
|
|||
'\xA4',
|
||||
'\xBD',
|
||||
'\x9C',
|
||||
'\xF0' // CJK UNIFIED IDEOGRAPH-24F5C (lead byte only)
|
||||
'\xF0', // CJK UNIFIED IDEOGRAPH-24F5C (lead byte + 2 complementary bytes)
|
||||
'\xA4',
|
||||
'\xBD'
|
||||
};
|
||||
|
||||
const std::string u8String2{
|
||||
'\xA4', // CJK UNIFIED IDEOGRAPH-24F5C (complementary bytes)
|
||||
'\xBD',
|
||||
'\x9C'
|
||||
'\x9C' // CJK UNIFIED IDEOGRAPH-24F5C (last complementary byte)
|
||||
};
|
||||
|
||||
const std::wstring u16StringComp{
|
||||
const std::wstring u16StringComp1{
|
||||
gsl::narrow_cast<wchar_t>(0xD853), // CJK UNIFIED IDEOGRAPH-24F5C (surrogate pair)
|
||||
gsl::narrow_cast<wchar_t>(0xDF5C)
|
||||
};
|
||||
|
||||
// GH#4673
|
||||
const std::string u8String3{
|
||||
'\xE2' // WHITE SMILING FACE (lead byte)
|
||||
};
|
||||
|
||||
const std::string u8String4{
|
||||
'\x98', // WHITE SMILING FACE (complementary bytes)
|
||||
'\xBA'
|
||||
};
|
||||
|
||||
const std::wstring u16StringComp2{
|
||||
gsl::narrow_cast<wchar_t>(0x263A) // WHITE SMILING FACE
|
||||
};
|
||||
|
||||
til::u8state state{};
|
||||
|
||||
std::wstring u16Out1{};
|
||||
const HRESULT hRes1{ til::u8u16(u8String1, u16Out1, state) };
|
||||
VERIFY_ARE_EQUAL(S_OK, hRes1);
|
||||
VERIFY_ARE_EQUAL(u16StringComp, u16Out1);
|
||||
VERIFY_ARE_EQUAL(u16StringComp1, u16Out1);
|
||||
|
||||
std::wstring u16Out2{};
|
||||
const HRESULT hRes2{ til::u8u16(u8String2, u16Out2, state) };
|
||||
VERIFY_ARE_EQUAL(S_OK, hRes2);
|
||||
VERIFY_ARE_EQUAL(u16StringComp, u16Out2);
|
||||
VERIFY_ARE_EQUAL(u16StringComp1, u16Out2);
|
||||
|
||||
std::wstring u16Out3{};
|
||||
const HRESULT hRes3{ til::u8u16(u8String3, u16Out3, state) };
|
||||
VERIFY_ARE_EQUAL(S_OK, hRes3);
|
||||
VERIFY_ARE_EQUAL(std::wstring{}, u16Out3);
|
||||
|
||||
std::wstring u16Out4{};
|
||||
const HRESULT hRes4{ til::u8u16(u8String4, u16Out4, state) };
|
||||
VERIFY_ARE_EQUAL(S_OK, hRes4);
|
||||
VERIFY_ARE_EQUAL(u16StringComp2, u16Out4);
|
||||
}
|
||||
|
||||
void Utf8Utf16ConvertTests::TestU16ToU8Partials()
|
||||
|
|
Loading…
Reference in a new issue