2019-05-03 00:29:04 +02:00
|
|
|
// Copyright (c) Microsoft Corporation.
|
|
|
|
// Licensed under the MIT license.
|
|
|
|
|
|
|
|
#include "precomp.h"
|
|
|
|
#include "inc/convert.hpp"
|
|
|
|
|
|
|
|
#include "../inc/unicode.hpp"
|
|
|
|
|
|
|
|
#ifdef BUILD_ONECORE_INTERACTIVITY
|
|
|
|
#include "../../interactivity/inc/VtApiRedirection.hpp"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#pragma hdrstop
|
|
|
|
|
|
|
|
// TODO: MSFT 14150722 - can these const values be generated at
|
|
|
|
// runtime without breaking compatibility?
|
|
|
|
static const WORD altScanCode = 0x38;
|
|
|
|
static const WORD leftShiftScanCode = 0x2A;
|
|
|
|
|
|
|
|
// Routine Description:
|
|
|
|
// - Takes a multibyte string, allocates the appropriate amount of memory for the conversion, performs the conversion,
|
|
|
|
// and returns the Unicode UTF-16 result in the smart pointer (and the length).
|
|
|
|
// Arguments:
|
|
|
|
// - codepage - Windows Code Page representing the multibyte source text
|
|
|
|
// - source - View of multibyte characters of source text
|
|
|
|
// Return Value:
|
|
|
|
// - The UTF-16 wide string.
|
|
|
|
// - NOTE: Throws suitable HRESULT errors from memory allocation, safe math, or MultiByteToWideChar failures.
|
2019-06-11 22:27:09 +02:00
|
|
|
[[nodiscard]] std::wstring ConvertToW(const UINT codePage, const std::string_view source)
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
|
|
|
// If there's nothing to convert, bail early.
|
|
|
|
if (source.empty())
|
|
|
|
{
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
int iSource; // convert to int because Mb2Wc requires it.
|
|
|
|
THROW_IF_FAILED(SizeTToInt(source.size(), &iSource));
|
|
|
|
|
|
|
|
// Ask how much space we will need.
|
2020-08-25 19:17:21 +02:00
|
|
|
// In certain codepages, Mb2Wc will "successfully" produce zero characters (like in CP50220, where a SHIFT-IN character
|
|
|
|
// is consumed but not transformed into anything) without explicitly failing. When it does this, GetLastError will return
|
|
|
|
// the last error encountered by the last function that actually did have an error.
|
|
|
|
// This is arguably correct (as the documentation says "The function returns 0 if it does not succeed"). There is a
|
|
|
|
// difference that we **don't actually care about** between failing and successfully producing zero characters.,
|
|
|
|
// Anyway: we need to clear the last error so that we can fail out and IGNORE_BAD_GLE after it inevitably succeed-fails.
|
|
|
|
SetLastError(0);
|
2019-05-03 00:29:04 +02:00
|
|
|
int const iTarget = MultiByteToWideChar(codePage, 0, source.data(), iSource, nullptr, 0);
|
2020-02-10 23:09:08 +01:00
|
|
|
THROW_LAST_ERROR_IF_AND_IGNORE_BAD_GLE(0 == iTarget);
|
2019-05-03 00:29:04 +02:00
|
|
|
|
|
|
|
size_t cchNeeded;
|
|
|
|
THROW_IF_FAILED(IntToSizeT(iTarget, &cchNeeded));
|
|
|
|
|
2019-09-10 01:01:28 +02:00
|
|
|
// Allocate ourselves some space
|
2019-09-03 20:27:43 +02:00
|
|
|
std::wstring out;
|
|
|
|
out.resize(cchNeeded);
|
2019-05-03 00:29:04 +02:00
|
|
|
|
|
|
|
// Attempt conversion for real.
|
2020-02-10 23:09:08 +01:00
|
|
|
THROW_LAST_ERROR_IF_AND_IGNORE_BAD_GLE(0 == MultiByteToWideChar(codePage, 0, source.data(), iSource, out.data(), iTarget));
|
2019-05-03 00:29:04 +02:00
|
|
|
|
|
|
|
// Return as a string
|
2019-09-03 20:27:43 +02:00
|
|
|
return out;
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Routine Description:
|
|
|
|
// - Takes a wide string, allocates the appropriate amount of memory for the conversion, performs the conversion,
|
|
|
|
// and returns the Multibyte result
|
|
|
|
// Arguments:
|
|
|
|
// - codepage - Windows Code Page representing the multibyte destination text
|
|
|
|
// - source - Unicode (UTF-16) characters of source text
|
|
|
|
// Return Value:
|
|
|
|
// - The multibyte string encoded in the given codepage
|
|
|
|
// - NOTE: Throws suitable HRESULT errors from memory allocation, safe math, or MultiByteToWideChar failures.
|
2019-06-11 22:27:09 +02:00
|
|
|
[[nodiscard]] std::string ConvertToA(const UINT codepage, const std::wstring_view source)
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
|
|
|
// If there's nothing to convert, bail early.
|
|
|
|
if (source.empty())
|
|
|
|
{
|
|
|
|
return {};
|
|
|
|
}
|
2019-06-11 22:27:09 +02:00
|
|
|
|
2019-05-03 00:29:04 +02:00
|
|
|
int iSource; // convert to int because Wc2Mb requires it.
|
|
|
|
THROW_IF_FAILED(SizeTToInt(source.size(), &iSource));
|
|
|
|
|
|
|
|
// Ask how much space we will need.
|
2019-06-11 22:27:09 +02:00
|
|
|
// clang-format off
|
|
|
|
#pragma prefast(suppress: __WARNING_W2A_BEST_FIT, "WC_NO_BEST_FIT_CHARS doesn't work in many codepages. Retain old behavior.")
|
|
|
|
// clang-format on
|
2019-05-03 00:29:04 +02:00
|
|
|
int const iTarget = WideCharToMultiByte(codepage, 0, source.data(), iSource, nullptr, 0, nullptr, nullptr);
|
|
|
|
THROW_LAST_ERROR_IF(0 == iTarget);
|
|
|
|
|
|
|
|
size_t cchNeeded;
|
|
|
|
THROW_IF_FAILED(IntToSizeT(iTarget, &cchNeeded));
|
|
|
|
|
2019-09-10 01:01:28 +02:00
|
|
|
// Allocate ourselves some space
|
2019-09-03 20:27:43 +02:00
|
|
|
std::string out;
|
|
|
|
out.resize(cchNeeded);
|
2019-05-03 00:29:04 +02:00
|
|
|
|
|
|
|
// Attempt conversion for real.
|
2019-06-11 22:27:09 +02:00
|
|
|
// clang-format off
|
|
|
|
#pragma prefast(suppress: __WARNING_W2A_BEST_FIT, "WC_NO_BEST_FIT_CHARS doesn't work in many codepages. Retain old behavior.")
|
|
|
|
// clang-format on
|
2019-09-03 20:27:43 +02:00
|
|
|
THROW_LAST_ERROR_IF(0 == WideCharToMultiByte(codepage, 0, source.data(), iSource, out.data(), iTarget, nullptr, nullptr));
|
2019-05-03 00:29:04 +02:00
|
|
|
|
|
|
|
// Return as a string
|
2019-09-03 20:27:43 +02:00
|
|
|
return out;
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Routine Description:
|
|
|
|
// - Takes a wide string, and determines how many bytes it would take to store it with the given Multibyte codepage.
|
|
|
|
// Arguments:
|
|
|
|
// - codepage - Windows Code Page representing the multibyte destination text
|
|
|
|
// - source - Array of Unicode characters of source text
|
|
|
|
// Return Value:
|
|
|
|
// - Length in characters of multibyte buffer that would be required to hold this text after conversion
|
|
|
|
// - NOTE: Throws suitable HRESULT errors from memory allocation, safe math, or WideCharToMultiByte failures.
|
2019-06-11 22:27:09 +02:00
|
|
|
[[nodiscard]] size_t GetALengthFromW(const UINT codepage, const std::wstring_view source)
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
|
|
|
// If there's no bytes, bail early.
|
|
|
|
if (source.empty())
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int iSource; // convert to int because Wc2Mb requires it
|
|
|
|
THROW_IF_FAILED(SizeTToInt(source.size(), &iSource));
|
|
|
|
|
|
|
|
// Ask how many bytes this string consumes in the other codepage
|
2019-06-11 22:27:09 +02:00
|
|
|
// clang-format off
|
|
|
|
#pragma prefast(suppress: __WARNING_W2A_BEST_FIT, "WC_NO_BEST_FIT_CHARS doesn't work in many codepages. Retain old behavior.")
|
|
|
|
// clang-format on
|
2019-05-03 00:29:04 +02:00
|
|
|
int const iTarget = WideCharToMultiByte(codepage, 0, source.data(), iSource, nullptr, 0, nullptr, nullptr);
|
|
|
|
THROW_LAST_ERROR_IF(0 == iTarget);
|
|
|
|
|
|
|
|
// Convert types safely.
|
|
|
|
size_t cchTarget;
|
|
|
|
THROW_IF_FAILED(IntToSizeT(iTarget, &cchTarget));
|
|
|
|
|
|
|
|
return cchTarget;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::deque<std::unique_ptr<KeyEvent>> CharToKeyEvents(const wchar_t wch,
|
|
|
|
const unsigned int codepage)
|
|
|
|
{
|
|
|
|
const short invalidKey = -1;
|
|
|
|
short keyState = VkKeyScanW(wch);
|
|
|
|
|
|
|
|
if (keyState == invalidKey)
|
|
|
|
{
|
|
|
|
// Determine DBCS character because these character does not know by VkKeyScan.
|
|
|
|
// GetStringTypeW(CT_CTYPE3) & C3_ALPHA can determine all linguistic characters. However, this is
|
|
|
|
// not include symbolic character for DBCS.
|
|
|
|
WORD CharType = 0;
|
|
|
|
GetStringTypeW(CT_CTYPE3, &wch, 1, &CharType);
|
|
|
|
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
if (WI_IsFlagSet(CharType, C3_ALPHA) || GetQuickCharWidth(wch) == CodepointWidth::Wide)
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
|
|
|
keyState = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::deque<std::unique_ptr<KeyEvent>> convertedEvents;
|
|
|
|
if (keyState == invalidKey)
|
|
|
|
{
|
|
|
|
// if VkKeyScanW fails (char is not in kbd layout), we must
|
|
|
|
// emulate the key being input through the numpad
|
|
|
|
convertedEvents = SynthesizeNumpadEvents(wch, codepage);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
convertedEvents = SynthesizeKeyboardEvents(wch, keyState);
|
|
|
|
}
|
|
|
|
|
|
|
|
return convertedEvents;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Routine Description:
|
|
|
|
// - converts a wchar_t into a series of KeyEvents as if it was typed
|
|
|
|
// using the keyboard
|
|
|
|
// Arguments:
|
|
|
|
// - wch - the wchar_t to convert
|
|
|
|
// Return Value:
|
|
|
|
// - deque of KeyEvents that represent the wchar_t being typed
|
|
|
|
// Note:
|
|
|
|
// - will throw exception on error
|
|
|
|
std::deque<std::unique_ptr<KeyEvent>> SynthesizeKeyboardEvents(const wchar_t wch, const short keyState)
|
|
|
|
{
|
|
|
|
const byte modifierState = HIBYTE(keyState);
|
|
|
|
|
|
|
|
bool altGrSet = false;
|
|
|
|
bool shiftSet = false;
|
|
|
|
std::deque<std::unique_ptr<KeyEvent>> keyEvents;
|
|
|
|
|
|
|
|
// add modifier key event if necessary
|
|
|
|
if (WI_AreAllFlagsSet(modifierState, VkKeyScanModState::CtrlAndAltPressed))
|
|
|
|
{
|
|
|
|
altGrSet = true;
|
|
|
|
keyEvents.push_back(std::make_unique<KeyEvent>(true,
|
|
|
|
1ui16,
|
|
|
|
static_cast<WORD>(VK_MENU),
|
|
|
|
altScanCode,
|
|
|
|
UNICODE_NULL,
|
|
|
|
(ENHANCED_KEY | LEFT_CTRL_PRESSED | RIGHT_ALT_PRESSED)));
|
|
|
|
}
|
|
|
|
else if (WI_IsFlagSet(modifierState, VkKeyScanModState::ShiftPressed))
|
|
|
|
{
|
|
|
|
shiftSet = true;
|
|
|
|
keyEvents.push_back(std::make_unique<KeyEvent>(true,
|
|
|
|
1ui16,
|
|
|
|
static_cast<WORD>(VK_SHIFT),
|
|
|
|
leftShiftScanCode,
|
|
|
|
UNICODE_NULL,
|
|
|
|
SHIFT_PRESSED));
|
|
|
|
}
|
|
|
|
|
2019-10-17 23:58:41 +02:00
|
|
|
const auto vk = LOBYTE(keyState);
|
|
|
|
const WORD virtualScanCode = gsl::narrow<WORD>(MapVirtualKeyW(vk, MAPVK_VK_TO_VSC));
|
2019-05-03 00:29:04 +02:00
|
|
|
KeyEvent keyEvent{ true, 1, LOBYTE(keyState), virtualScanCode, wch, 0 };
|
|
|
|
|
|
|
|
// add modifier flags if necessary
|
|
|
|
if (WI_IsFlagSet(modifierState, VkKeyScanModState::ShiftPressed))
|
|
|
|
{
|
|
|
|
keyEvent.ActivateModifierKey(ModifierKeyState::Shift);
|
|
|
|
}
|
|
|
|
if (WI_IsFlagSet(modifierState, VkKeyScanModState::CtrlPressed))
|
|
|
|
{
|
|
|
|
keyEvent.ActivateModifierKey(ModifierKeyState::LeftCtrl);
|
|
|
|
}
|
|
|
|
if (WI_AreAllFlagsSet(modifierState, VkKeyScanModState::CtrlAndAltPressed))
|
|
|
|
{
|
|
|
|
keyEvent.ActivateModifierKey(ModifierKeyState::RightAlt);
|
|
|
|
}
|
|
|
|
|
|
|
|
// add key event down and up
|
|
|
|
keyEvents.push_back(std::make_unique<KeyEvent>(keyEvent));
|
|
|
|
keyEvent.SetKeyDown(false);
|
|
|
|
keyEvents.push_back(std::make_unique<KeyEvent>(keyEvent));
|
|
|
|
|
|
|
|
// add modifier key up event
|
|
|
|
if (altGrSet)
|
|
|
|
{
|
|
|
|
keyEvents.push_back(std::make_unique<KeyEvent>(false,
|
|
|
|
1ui16,
|
|
|
|
static_cast<WORD>(VK_MENU),
|
|
|
|
altScanCode,
|
|
|
|
UNICODE_NULL,
|
|
|
|
ENHANCED_KEY));
|
|
|
|
}
|
|
|
|
else if (shiftSet)
|
|
|
|
{
|
|
|
|
keyEvents.push_back(std::make_unique<KeyEvent>(false,
|
|
|
|
1ui16,
|
|
|
|
static_cast<WORD>(VK_SHIFT),
|
|
|
|
leftShiftScanCode,
|
|
|
|
UNICODE_NULL,
|
|
|
|
0));
|
|
|
|
}
|
|
|
|
|
|
|
|
return keyEvents;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Routine Description:
|
|
|
|
// - converts a wchar_t into a series of KeyEvents as if it was typed
|
|
|
|
// using Alt + numpad
|
|
|
|
// Arguments:
|
|
|
|
// - wch - the wchar_t to convert
|
|
|
|
// Return Value:
|
|
|
|
// - deque of KeyEvents that represent the wchar_t being typed using
|
|
|
|
// alt + numpad
|
|
|
|
// Note:
|
|
|
|
// - will throw exception on error
|
|
|
|
std::deque<std::unique_ptr<KeyEvent>> SynthesizeNumpadEvents(const wchar_t wch, const unsigned int codepage)
|
|
|
|
{
|
|
|
|
std::deque<std::unique_ptr<KeyEvent>> keyEvents;
|
|
|
|
|
|
|
|
//alt keydown
|
|
|
|
keyEvents.push_back(std::make_unique<KeyEvent>(true,
|
|
|
|
1ui16,
|
|
|
|
static_cast<WORD>(VK_MENU),
|
|
|
|
altScanCode,
|
|
|
|
UNICODE_NULL,
|
|
|
|
LEFT_ALT_PRESSED));
|
|
|
|
|
|
|
|
const int radix = 10;
|
|
|
|
std::wstring wstr{ wch };
|
|
|
|
const auto convertedChars = ConvertToA(codepage, wstr);
|
|
|
|
if (convertedChars.size() == 1)
|
|
|
|
{
|
|
|
|
// It is OK if the char is "signed -1", we want to interpret that as "unsigned 255" for the
|
|
|
|
// "integer to character" conversion below with ::to_string, thus the static_cast.
|
|
|
|
// Prime example is nonbreaking space U+00A0 will convert to OEM by codepage 437 to 0xFF which is -1 signed.
|
|
|
|
// But it is absolutely valid as 0xFF or 255 unsigned as the correct CP437 character.
|
|
|
|
// We need to treat it as unsigned because we're going to pretend it was a keypad entry
|
|
|
|
// and you don't enter negative numbers on the keypad.
|
2019-08-29 20:05:32 +02:00
|
|
|
unsigned char const uch = static_cast<unsigned char>(convertedChars.at(0));
|
2019-05-03 00:29:04 +02:00
|
|
|
|
|
|
|
// unsigned char values are in the range [0, 255] so we need to be
|
|
|
|
// able to store up to 4 chars from the conversion (including the end of string char)
|
|
|
|
auto charString = std::to_string(uch);
|
|
|
|
|
|
|
|
for (auto& ch : std::string_view(charString))
|
|
|
|
{
|
|
|
|
if (ch == 0)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
const WORD virtualKey = ch - '0' + VK_NUMPAD0;
|
|
|
|
const WORD virtualScanCode = gsl::narrow<WORD>(MapVirtualKeyW(virtualKey, MAPVK_VK_TO_VSC));
|
|
|
|
|
|
|
|
keyEvents.push_back(std::make_unique<KeyEvent>(true,
|
|
|
|
1ui16,
|
|
|
|
virtualKey,
|
|
|
|
virtualScanCode,
|
|
|
|
UNICODE_NULL,
|
|
|
|
LEFT_ALT_PRESSED));
|
|
|
|
keyEvents.push_back(std::make_unique<KeyEvent>(false,
|
|
|
|
1ui16,
|
|
|
|
virtualKey,
|
|
|
|
virtualScanCode,
|
|
|
|
UNICODE_NULL,
|
|
|
|
LEFT_ALT_PRESSED));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// alt keyup
|
|
|
|
keyEvents.push_back(std::make_unique<KeyEvent>(false,
|
|
|
|
1ui16,
|
|
|
|
static_cast<WORD>(VK_MENU),
|
|
|
|
altScanCode,
|
|
|
|
wch,
|
|
|
|
0));
|
|
|
|
return keyEvents;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Routine Description:
|
|
|
|
// - naively determines the width of a UCS2 encoded wchar
|
|
|
|
// Arguments:
|
|
|
|
// - wch - the wchar_t to measure
|
|
|
|
// Return Value:
|
|
|
|
// - CodepointWidth indicating width of wch
|
|
|
|
// Notes:
|
|
|
|
// 04-08-92 ShunK Created.
|
|
|
|
// Jul-27-1992 KazuM Added Screen Information and Code Page Information.
|
|
|
|
// Jan-29-1992 V-Hirots Substruct Screen Information.
|
|
|
|
// Oct-06-1996 KazuM Not use RtlUnicodeToMultiByteSize and WideCharToMultiByte
|
|
|
|
// Because 950 (Chinese Traditional) only defined 13500 chars,
|
|
|
|
// and unicode defined almost 18000 chars.
|
|
|
|
// So there are almost 4000 chars can not be mapped to big5 code.
|
|
|
|
// Apr-30-2015 MiNiksa Corrected unknown character code assumption. Max Width in Text Metric
|
|
|
|
// is not reliable for calculating half/full width. Must use current
|
|
|
|
// display font data (cached) instead.
|
|
|
|
// May-23-2017 migrie Forced Box-Drawing Characters (x2500-x257F) to narrow.
|
2020-02-10 21:40:01 +01:00
|
|
|
// Jan-16-2018 migrie Separated core lookup from asking the renderer the width
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
// May-01-2019 MiNiksa Forced lookup-via-renderer for retroactively recategorized emoji
|
|
|
|
// that used to be narrow but now might be wide. (approx x2194-x2b55, not inclusive)
|
|
|
|
// Also forced block characters segment (x2580-x259F) to narrow
|
2020-10-27 18:36:28 +01:00
|
|
|
// Oct-25-2020 DuHowett Replaced the entire table with a set of overrides that get built into
|
|
|
|
// CodepointWidthDetector (unicode_width_overrides.xml)
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
CodepointWidth GetQuickCharWidth(const wchar_t wch) noexcept
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
|
|
|
if (0x20 <= wch && wch <= 0x7e)
|
|
|
|
{
|
|
|
|
/* ASCII */
|
|
|
|
return CodepointWidth::Narrow;
|
|
|
|
}
|
2020-10-27 18:36:28 +01:00
|
|
|
return CodepointWidth::Invalid;
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
wchar_t Utf16ToUcs2(const std::wstring_view charData)
|
|
|
|
{
|
|
|
|
THROW_HR_IF(E_INVALIDARG, charData.empty());
|
|
|
|
if (charData.size() > 1)
|
|
|
|
{
|
|
|
|
return UNICODE_REPLACEMENT;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return charData.front();
|
|
|
|
}
|
|
|
|
}
|