terminal/src/types/CodepointWidthDetector.cpp
Leonard Hecker ebbc14c113 wip
2021-10-11 17:10:31 +02:00

464 lines
18 KiB
C++

// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#include "precomp.h"
#include "inc/CodepointWidthDetector.hpp"
#pragma warning(disable : 4463)
struct UnicodeRangeBasic
{
uint16_t upperBound;
// lowerBound = it->upperBound - it->boundWidth
// Code points can be up to 2^16, however no range in our table exceeds 2^15.
// This is why we store the "width" between the lower and upper boundary instead.
uint16_t boundWidth : 15;
uint16_t isAmbiguous : 1;
};
struct UnicodeRangeSurrogate
{
uint32_t upperBound;
// This fields is identical to the one in UnicodeRangeBasic.
// Technically we could store the absolute code point value here,
// but it doesn't make any practical performance difference.
uint32_t boundWidth : 31;
uint32_t isAmbiguous : 1;
};
constexpr bool operator<(const UnicodeRangeBasic& range, const uint16_t searchTerm) noexcept
{
return range.upperBound < searchTerm;
}
constexpr bool operator<(const UnicodeRangeSurrogate& range, const uint32_t searchTerm) noexcept
{
return range.upperBound < searchTerm;
}
// The following two tables are generated by:
// * Downloading ucd.nounihan.flat.xml from https://www.unicode.org/Public/UCD/latest/ucdxml
// * Running .\tools\Generate-CodepointWidthsFromUCD.ps1 -Pack -OverridePath .\src\types\unicode_width_overrides.xml ucd.nounihan.flat.xml
// Generated by Generate-CodepointWidthsFromUCD.ps1 -Pack:$True -Full:$False -NoOverrides:$False
// on 10.10.2021 22:40:05 (UTC) from Unicode 14.0.0.
// 321259 (0x4E6EB) codepoints covered.
// 240 (0xF0) codepoints overridden.
// Override path: .\src\types\unicode_width_overrides.xml
static constexpr std::array<UnicodeRangeBasic, 226> tableBasic{ {
UnicodeRangeBasic{ 0xa1, 0x0, 1 },
UnicodeRangeBasic{ 0xa4, 0x0, 1 },
UnicodeRangeBasic{ 0xa8, 0x1, 1 },
UnicodeRangeBasic{ 0xaa, 0x0, 1 },
UnicodeRangeBasic{ 0xae, 0x1, 1 },
UnicodeRangeBasic{ 0xb4, 0x4, 1 },
UnicodeRangeBasic{ 0xba, 0x4, 1 },
UnicodeRangeBasic{ 0xbf, 0x3, 1 },
UnicodeRangeBasic{ 0xc6, 0x0, 1 },
UnicodeRangeBasic{ 0xd0, 0x0, 1 },
UnicodeRangeBasic{ 0xd8, 0x1, 1 },
UnicodeRangeBasic{ 0xe1, 0x3, 1 },
UnicodeRangeBasic{ 0xe6, 0x0, 1 },
UnicodeRangeBasic{ 0xea, 0x2, 1 },
UnicodeRangeBasic{ 0xed, 0x1, 1 },
UnicodeRangeBasic{ 0xf0, 0x0, 1 },
UnicodeRangeBasic{ 0xf3, 0x1, 1 },
UnicodeRangeBasic{ 0xfa, 0x3, 1 },
UnicodeRangeBasic{ 0xfc, 0x0, 1 },
UnicodeRangeBasic{ 0xfe, 0x0, 1 },
UnicodeRangeBasic{ 0x101, 0x0, 1 },
UnicodeRangeBasic{ 0x111, 0x0, 1 },
UnicodeRangeBasic{ 0x113, 0x0, 1 },
UnicodeRangeBasic{ 0x11b, 0x0, 1 },
UnicodeRangeBasic{ 0x127, 0x1, 1 },
UnicodeRangeBasic{ 0x12b, 0x0, 1 },
UnicodeRangeBasic{ 0x133, 0x2, 1 },
UnicodeRangeBasic{ 0x138, 0x0, 1 },
UnicodeRangeBasic{ 0x142, 0x3, 1 },
UnicodeRangeBasic{ 0x144, 0x0, 1 },
UnicodeRangeBasic{ 0x14b, 0x3, 1 },
UnicodeRangeBasic{ 0x14d, 0x0, 1 },
UnicodeRangeBasic{ 0x153, 0x1, 1 },
UnicodeRangeBasic{ 0x167, 0x1, 1 },
UnicodeRangeBasic{ 0x16b, 0x0, 1 },
UnicodeRangeBasic{ 0x1ce, 0x0, 1 },
UnicodeRangeBasic{ 0x1d0, 0x0, 1 },
UnicodeRangeBasic{ 0x1d2, 0x0, 1 },
UnicodeRangeBasic{ 0x1d4, 0x0, 1 },
UnicodeRangeBasic{ 0x1d6, 0x0, 1 },
UnicodeRangeBasic{ 0x1d8, 0x0, 1 },
UnicodeRangeBasic{ 0x1da, 0x0, 1 },
UnicodeRangeBasic{ 0x1dc, 0x0, 1 },
UnicodeRangeBasic{ 0x251, 0x0, 1 },
UnicodeRangeBasic{ 0x261, 0x0, 1 },
UnicodeRangeBasic{ 0x2c4, 0x0, 1 },
UnicodeRangeBasic{ 0x2c7, 0x0, 1 },
UnicodeRangeBasic{ 0x2cb, 0x2, 1 },
UnicodeRangeBasic{ 0x2cd, 0x0, 1 },
UnicodeRangeBasic{ 0x2d0, 0x0, 1 },
UnicodeRangeBasic{ 0x2db, 0x3, 1 },
UnicodeRangeBasic{ 0x2dd, 0x0, 1 },
UnicodeRangeBasic{ 0x2df, 0x0, 1 },
UnicodeRangeBasic{ 0x36f, 0x6f, 1 },
UnicodeRangeBasic{ 0x3a1, 0x10, 1 },
UnicodeRangeBasic{ 0x3a9, 0x6, 1 },
UnicodeRangeBasic{ 0x3c1, 0x10, 1 },
UnicodeRangeBasic{ 0x3c9, 0x6, 1 },
UnicodeRangeBasic{ 0x401, 0x0, 1 },
UnicodeRangeBasic{ 0x44f, 0x3f, 1 },
UnicodeRangeBasic{ 0x451, 0x0, 1 },
UnicodeRangeBasic{ 0x115f, 0x5f, 0 },
UnicodeRangeBasic{ 0x2010, 0x0, 1 },
UnicodeRangeBasic{ 0x2016, 0x3, 1 },
UnicodeRangeBasic{ 0x2019, 0x1, 1 },
UnicodeRangeBasic{ 0x201d, 0x1, 1 },
UnicodeRangeBasic{ 0x2022, 0x2, 1 },
UnicodeRangeBasic{ 0x2027, 0x3, 1 },
UnicodeRangeBasic{ 0x2030, 0x0, 1 },
UnicodeRangeBasic{ 0x2033, 0x1, 1 },
UnicodeRangeBasic{ 0x2035, 0x0, 1 },
UnicodeRangeBasic{ 0x203b, 0x0, 1 },
UnicodeRangeBasic{ 0x203e, 0x0, 1 },
UnicodeRangeBasic{ 0x2074, 0x0, 1 },
UnicodeRangeBasic{ 0x207f, 0x0, 1 },
UnicodeRangeBasic{ 0x2084, 0x3, 1 },
UnicodeRangeBasic{ 0x20ac, 0x0, 1 },
UnicodeRangeBasic{ 0x2103, 0x0, 1 },
UnicodeRangeBasic{ 0x2105, 0x0, 1 },
UnicodeRangeBasic{ 0x2109, 0x0, 1 },
UnicodeRangeBasic{ 0x2113, 0x0, 1 },
UnicodeRangeBasic{ 0x2116, 0x0, 1 },
UnicodeRangeBasic{ 0x2122, 0x1, 1 },
UnicodeRangeBasic{ 0x2126, 0x0, 1 },
UnicodeRangeBasic{ 0x212b, 0x0, 1 },
UnicodeRangeBasic{ 0x2154, 0x1, 1 },
UnicodeRangeBasic{ 0x215e, 0x3, 1 },
UnicodeRangeBasic{ 0x216b, 0xb, 1 },
UnicodeRangeBasic{ 0x2179, 0x9, 1 },
UnicodeRangeBasic{ 0x2189, 0x0, 1 },
UnicodeRangeBasic{ 0x2199, 0x9, 1 },
UnicodeRangeBasic{ 0x21b9, 0x1, 1 },
UnicodeRangeBasic{ 0x21d2, 0x0, 1 },
UnicodeRangeBasic{ 0x21d4, 0x0, 1 },
UnicodeRangeBasic{ 0x21e7, 0x0, 1 },
UnicodeRangeBasic{ 0x2200, 0x0, 1 },
UnicodeRangeBasic{ 0x2203, 0x1, 1 },
UnicodeRangeBasic{ 0x2208, 0x1, 1 },
UnicodeRangeBasic{ 0x220b, 0x0, 1 },
UnicodeRangeBasic{ 0x220f, 0x0, 1 },
UnicodeRangeBasic{ 0x2211, 0x0, 1 },
UnicodeRangeBasic{ 0x2215, 0x0, 1 },
UnicodeRangeBasic{ 0x221a, 0x0, 1 },
UnicodeRangeBasic{ 0x2220, 0x3, 1 },
UnicodeRangeBasic{ 0x2223, 0x0, 1 },
UnicodeRangeBasic{ 0x2225, 0x0, 1 },
UnicodeRangeBasic{ 0x222c, 0x5, 1 },
UnicodeRangeBasic{ 0x222e, 0x0, 1 },
UnicodeRangeBasic{ 0x2237, 0x3, 1 },
UnicodeRangeBasic{ 0x223d, 0x1, 1 },
UnicodeRangeBasic{ 0x2248, 0x0, 1 },
UnicodeRangeBasic{ 0x224c, 0x0, 1 },
UnicodeRangeBasic{ 0x2252, 0x0, 1 },
UnicodeRangeBasic{ 0x2261, 0x1, 1 },
UnicodeRangeBasic{ 0x2267, 0x3, 1 },
UnicodeRangeBasic{ 0x226b, 0x1, 1 },
UnicodeRangeBasic{ 0x226f, 0x1, 1 },
UnicodeRangeBasic{ 0x2283, 0x1, 1 },
UnicodeRangeBasic{ 0x2287, 0x1, 1 },
UnicodeRangeBasic{ 0x2295, 0x0, 1 },
UnicodeRangeBasic{ 0x2299, 0x0, 1 },
UnicodeRangeBasic{ 0x22a5, 0x0, 1 },
UnicodeRangeBasic{ 0x22bf, 0x0, 1 },
UnicodeRangeBasic{ 0x2312, 0x0, 1 },
UnicodeRangeBasic{ 0x231b, 0x1, 0 },
UnicodeRangeBasic{ 0x232a, 0x1, 0 },
UnicodeRangeBasic{ 0x23ec, 0x3, 0 },
UnicodeRangeBasic{ 0x23f0, 0x0, 0 },
UnicodeRangeBasic{ 0x23f3, 0x0, 0 },
UnicodeRangeBasic{ 0x24e9, 0x89, 1 },
UnicodeRangeBasic{ 0x24ff, 0x14, 1 },
UnicodeRangeBasic{ 0x25a1, 0x1, 1 },
UnicodeRangeBasic{ 0x25a9, 0x6, 1 },
UnicodeRangeBasic{ 0x25b3, 0x1, 1 },
UnicodeRangeBasic{ 0x25b7, 0x1, 1 },
UnicodeRangeBasic{ 0x25bd, 0x1, 1 },
UnicodeRangeBasic{ 0x25c1, 0x1, 1 },
UnicodeRangeBasic{ 0x25c8, 0x2, 1 },
UnicodeRangeBasic{ 0x25cb, 0x0, 1 },
UnicodeRangeBasic{ 0x25d1, 0x3, 1 },
UnicodeRangeBasic{ 0x25e5, 0x3, 1 },
UnicodeRangeBasic{ 0x25ef, 0x0, 1 },
UnicodeRangeBasic{ 0x25fe, 0x1, 0 },
UnicodeRangeBasic{ 0x2606, 0x1, 1 },
UnicodeRangeBasic{ 0x2609, 0x0, 1 },
UnicodeRangeBasic{ 0x260f, 0x1, 1 },
UnicodeRangeBasic{ 0x2615, 0x1, 0 },
UnicodeRangeBasic{ 0x261c, 0x0, 1 },
UnicodeRangeBasic{ 0x261e, 0x0, 1 },
UnicodeRangeBasic{ 0x2640, 0x0, 1 },
UnicodeRangeBasic{ 0x2642, 0x0, 1 },
UnicodeRangeBasic{ 0x2653, 0xb, 0 },
UnicodeRangeBasic{ 0x2661, 0x1, 1 },
UnicodeRangeBasic{ 0x2665, 0x2, 1 },
UnicodeRangeBasic{ 0x266a, 0x3, 1 },
UnicodeRangeBasic{ 0x266d, 0x1, 1 },
UnicodeRangeBasic{ 0x266f, 0x0, 1 },
UnicodeRangeBasic{ 0x267f, 0x0, 0 },
UnicodeRangeBasic{ 0x2693, 0x0, 0 },
UnicodeRangeBasic{ 0x269f, 0x1, 1 },
UnicodeRangeBasic{ 0x26a1, 0x0, 0 },
UnicodeRangeBasic{ 0x26ab, 0x1, 0 },
UnicodeRangeBasic{ 0x26be, 0x1, 0 },
UnicodeRangeBasic{ 0x26bf, 0x0, 1 },
UnicodeRangeBasic{ 0x26c5, 0x1, 0 },
UnicodeRangeBasic{ 0x26cd, 0x7, 1 },
UnicodeRangeBasic{ 0x26ce, 0x0, 0 },
UnicodeRangeBasic{ 0x26d3, 0x4, 1 },
UnicodeRangeBasic{ 0x26d4, 0x0, 0 },
UnicodeRangeBasic{ 0x26e1, 0xc, 1 },
UnicodeRangeBasic{ 0x26e3, 0x0, 1 },
UnicodeRangeBasic{ 0x26e9, 0x1, 1 },
UnicodeRangeBasic{ 0x26ea, 0x0, 0 },
UnicodeRangeBasic{ 0x26f1, 0x6, 1 },
UnicodeRangeBasic{ 0x26f3, 0x1, 0 },
UnicodeRangeBasic{ 0x26f4, 0x0, 1 },
UnicodeRangeBasic{ 0x26f5, 0x0, 0 },
UnicodeRangeBasic{ 0x26f9, 0x3, 1 },
UnicodeRangeBasic{ 0x26fa, 0x0, 0 },
UnicodeRangeBasic{ 0x26fc, 0x1, 1 },
UnicodeRangeBasic{ 0x26fd, 0x0, 0 },
UnicodeRangeBasic{ 0x26ff, 0x1, 1 },
UnicodeRangeBasic{ 0x2705, 0x0, 0 },
UnicodeRangeBasic{ 0x270b, 0x1, 0 },
UnicodeRangeBasic{ 0x2728, 0x0, 0 },
UnicodeRangeBasic{ 0x273d, 0x0, 1 },
UnicodeRangeBasic{ 0x274c, 0x0, 0 },
UnicodeRangeBasic{ 0x274e, 0x0, 0 },
UnicodeRangeBasic{ 0x2755, 0x2, 0 },
UnicodeRangeBasic{ 0x2757, 0x0, 0 },
UnicodeRangeBasic{ 0x277f, 0x9, 1 },
UnicodeRangeBasic{ 0x2797, 0x2, 0 },
UnicodeRangeBasic{ 0x27b0, 0x0, 0 },
UnicodeRangeBasic{ 0x27bf, 0x0, 0 },
UnicodeRangeBasic{ 0x2b1c, 0x1, 0 },
UnicodeRangeBasic{ 0x2b50, 0x0, 0 },
UnicodeRangeBasic{ 0x2b55, 0x0, 0 },
UnicodeRangeBasic{ 0x2b59, 0x3, 1 },
UnicodeRangeBasic{ 0x2e99, 0x19, 0 },
UnicodeRangeBasic{ 0x2ef3, 0x58, 0 },
UnicodeRangeBasic{ 0x2fd5, 0xd5, 0 },
UnicodeRangeBasic{ 0x2ffb, 0xb, 0 },
UnicodeRangeBasic{ 0x303e, 0x3e, 0 },
UnicodeRangeBasic{ 0x3096, 0x55, 0 },
UnicodeRangeBasic{ 0x30ff, 0x66, 0 },
UnicodeRangeBasic{ 0x312f, 0x2a, 0 },
UnicodeRangeBasic{ 0x318e, 0x5d, 0 },
UnicodeRangeBasic{ 0x31e3, 0x53, 0 },
UnicodeRangeBasic{ 0x321e, 0x2e, 0 },
UnicodeRangeBasic{ 0x3247, 0x27, 0 },
UnicodeRangeBasic{ 0x324f, 0x7, 1 },
UnicodeRangeBasic{ 0x4dbf, 0x1b6f, 0 },
UnicodeRangeBasic{ 0xa48c, 0x568c, 0 },
UnicodeRangeBasic{ 0xa4c6, 0x36, 0 },
UnicodeRangeBasic{ 0xa97c, 0x1c, 0 },
UnicodeRangeBasic{ 0xd7a3, 0x2ba3, 0 },
UnicodeRangeBasic{ 0xf8ff, 0x18ff, 1 },
UnicodeRangeBasic{ 0xfaff, 0x1ff, 0 },
UnicodeRangeBasic{ 0xfe0f, 0xf, 1 },
UnicodeRangeBasic{ 0xfe19, 0x9, 0 },
UnicodeRangeBasic{ 0xfe52, 0x22, 0 },
UnicodeRangeBasic{ 0xfe66, 0x12, 0 },
UnicodeRangeBasic{ 0xfe6b, 0x3, 0 },
UnicodeRangeBasic{ 0xff60, 0x5f, 0 },
UnicodeRangeBasic{ 0xffe6, 0x6, 0 },
UnicodeRangeBasic{ 0xfffd, 0x0, 1 },
} };
static constexpr std::array<UnicodeRangeSurrogate, 71> tableSurrogates{ {
UnicodeRangeSurrogate{ 0x16fe4, 0x4, 0 },
UnicodeRangeSurrogate{ 0x16ff1, 0x1, 0 },
UnicodeRangeSurrogate{ 0x187f7, 0x17f7, 0 },
UnicodeRangeSurrogate{ 0x18cd5, 0x4d5, 0 },
UnicodeRangeSurrogate{ 0x18d08, 0x8, 0 },
UnicodeRangeSurrogate{ 0x1aff3, 0x3, 0 },
UnicodeRangeSurrogate{ 0x1affb, 0x6, 0 },
UnicodeRangeSurrogate{ 0x1affe, 0x1, 0 },
UnicodeRangeSurrogate{ 0x1b122, 0x122, 0 },
UnicodeRangeSurrogate{ 0x1b152, 0x2, 0 },
UnicodeRangeSurrogate{ 0x1b167, 0x3, 0 },
UnicodeRangeSurrogate{ 0x1b2fb, 0x18b, 0 },
UnicodeRangeSurrogate{ 0x1f004, 0x0, 0 },
UnicodeRangeSurrogate{ 0x1f0cf, 0x0, 0 },
UnicodeRangeSurrogate{ 0x1f10a, 0xa, 1 },
UnicodeRangeSurrogate{ 0x1f12d, 0x1d, 1 },
UnicodeRangeSurrogate{ 0x1f169, 0x39, 1 },
UnicodeRangeSurrogate{ 0x1f18d, 0x1d, 1 },
UnicodeRangeSurrogate{ 0x1f18e, 0x0, 0 },
UnicodeRangeSurrogate{ 0x1f190, 0x1, 1 },
UnicodeRangeSurrogate{ 0x1f19a, 0x9, 0 },
UnicodeRangeSurrogate{ 0x1f1ac, 0x11, 1 },
UnicodeRangeSurrogate{ 0x1f202, 0x1c, 0 },
UnicodeRangeSurrogate{ 0x1f23b, 0x2b, 0 },
UnicodeRangeSurrogate{ 0x1f248, 0x8, 0 },
UnicodeRangeSurrogate{ 0x1f251, 0x1, 0 },
UnicodeRangeSurrogate{ 0x1f265, 0x5, 0 },
UnicodeRangeSurrogate{ 0x1f320, 0x20, 0 },
UnicodeRangeSurrogate{ 0x1f335, 0x8, 0 },
UnicodeRangeSurrogate{ 0x1f37c, 0x45, 0 },
UnicodeRangeSurrogate{ 0x1f393, 0x15, 0 },
UnicodeRangeSurrogate{ 0x1f3ca, 0x2a, 0 },
UnicodeRangeSurrogate{ 0x1f3d3, 0x4, 0 },
UnicodeRangeSurrogate{ 0x1f3f0, 0x10, 0 },
UnicodeRangeSurrogate{ 0x1f3f4, 0x0, 0 },
UnicodeRangeSurrogate{ 0x1f43e, 0x46, 0 },
UnicodeRangeSurrogate{ 0x1f440, 0x0, 0 },
UnicodeRangeSurrogate{ 0x1f4fc, 0xba, 0 },
UnicodeRangeSurrogate{ 0x1f53d, 0x3e, 0 },
UnicodeRangeSurrogate{ 0x1f54e, 0x3, 0 },
UnicodeRangeSurrogate{ 0x1f567, 0x17, 0 },
UnicodeRangeSurrogate{ 0x1f57a, 0x0, 0 },
UnicodeRangeSurrogate{ 0x1f596, 0x1, 0 },
UnicodeRangeSurrogate{ 0x1f5a4, 0x0, 0 },
UnicodeRangeSurrogate{ 0x1f64f, 0x54, 0 },
UnicodeRangeSurrogate{ 0x1f6c5, 0x45, 0 },
UnicodeRangeSurrogate{ 0x1f6cc, 0x0, 0 },
UnicodeRangeSurrogate{ 0x1f6d2, 0x2, 0 },
UnicodeRangeSurrogate{ 0x1f6d7, 0x2, 0 },
UnicodeRangeSurrogate{ 0x1f6df, 0x2, 0 },
UnicodeRangeSurrogate{ 0x1f6ec, 0x1, 0 },
UnicodeRangeSurrogate{ 0x1f6fc, 0x8, 0 },
UnicodeRangeSurrogate{ 0x1f7eb, 0xb, 0 },
UnicodeRangeSurrogate{ 0x1f7f0, 0x0, 0 },
UnicodeRangeSurrogate{ 0x1f93a, 0x2e, 0 },
UnicodeRangeSurrogate{ 0x1f945, 0x9, 0 },
UnicodeRangeSurrogate{ 0x1f9ff, 0xb8, 0 },
UnicodeRangeSurrogate{ 0x1fa74, 0x4, 0 },
UnicodeRangeSurrogate{ 0x1fa7c, 0x4, 0 },
UnicodeRangeSurrogate{ 0x1fa86, 0x6, 0 },
UnicodeRangeSurrogate{ 0x1faac, 0x1c, 0 },
UnicodeRangeSurrogate{ 0x1faba, 0xa, 0 },
UnicodeRangeSurrogate{ 0x1fac5, 0x5, 0 },
UnicodeRangeSurrogate{ 0x1fad9, 0x9, 0 },
UnicodeRangeSurrogate{ 0x1fae7, 0x7, 0 },
UnicodeRangeSurrogate{ 0x1faf6, 0x6, 0 },
UnicodeRangeSurrogate{ 0x2fffd, 0xfffd, 0 },
UnicodeRangeSurrogate{ 0x3fffd, 0xfffd, 0 },
UnicodeRangeSurrogate{ 0xe01ef, 0xef, 1 },
UnicodeRangeSurrogate{ 0xffffd, 0xfffd, 1 },
UnicodeRangeSurrogate{ 0x10fffd, 0xfffd, 1 },
} };
// Routine Description:
// - returns the width type of codepoint as fast as we can by using quick lookup table and fallback cache.
// Arguments:
// - glyph - the utf16 encoded codepoint to search for
// Return Value:
// - the width type of the codepoint
CodepointWidth CodepointWidthDetector::GetWidth(const std::wstring_view& glyph) const noexcept
{
switch (glyph.size())
{
case 0:
return CodepointWidth::Ambiguous;
case 1:
{
const uint16_t codepoint = til::at(glyph, 0);
// ASCII code points below 0x80 are fairly common.
// We shouldn't have to call _getCodepointWidth for that.
// In fact those code points aren't even in tableBasic.
return codepoint < 0x80 ? CodepointWidth::Narrow : _getCodepointWidth(tableBasic, codepoint, glyph);
}
case 2:
{
uint32_t codepoint = (til::at(glyph, 0) & 0x3FF) << 10;
codepoint |= til::at(glyph, 1) & 0x3FF;
codepoint += 0x10000;
return _getCodepointWidth(tableSurrogates, codepoint, glyph);
}
default:
return CodepointWidth::Wide;
}
}
// Routine Description:
// - checks if wch is wide. will attempt to fallback as much possible until an answer is determined
// Arguments:
// - wch - the wchar to check width of
// Return Value:
// - true if wch is wide
bool CodepointWidthDetector::IsWide(const wchar_t wch) const noexcept
{
return wch < 0x80 ? false : IsWide({ &wch, 1 });
}
// Routine Description:
// - checks if codepoint is wide. will attempt to fallback as much possible until an answer is determined
// Arguments:
// - glyph - the utf16 encoded codepoint to check width of
// Return Value:
// - true if codepoint is wide
bool CodepointWidthDetector::IsWide(const std::wstring_view& glyph) const noexcept
{
return GetWidth(glyph) == CodepointWidth::Wide;
}
// Routine Description:
// - Checks the fallback function but caches the results until the font changes
// because the lookup function is usually very expensive and will return the same results
// for the same inputs.
// Arguments:
// - glyph - the utf16 encoded codepoint to check width of
// - true if codepoint is wide or false if it is narrow
CodepointWidth CodepointWidthDetector::_checkFallbackViaCache(uint32_t codepoint, const std::wstring_view& glyph) const noexcept
try
{
if (!_pfnFallbackMethod)
{
return CodepointWidth::Narrow;
}
// TODO: Cache needs to be emptied when font changes.
const auto it = _fallbackCache.find(codepoint);
if (it != _fallbackCache.end())
{
return it->second;
}
const auto result = _pfnFallbackMethod(glyph) ? CodepointWidth::Wide : CodepointWidth::Narrow;
_fallbackCache.insert_or_assign(codepoint, result);
return result;
}
catch (...)
{
LOG_CAUGHT_EXCEPTION();
return CodepointWidth::Narrow;
}
// Method Description:
// - Sets a function that should be used as the fallback mechanism for
// determining a particular glyph's width, should the glyph be an ambiguous
// width.
// A Terminal could hook in a Renderer's IsGlyphWideByFont method as the
// fallback to ask the renderer for the glyph's width (for example).
// Arguments:
// - pfnFallback - the function to use as the fallback method.
// Return Value:
// - <none>
void CodepointWidthDetector::SetFallbackMethod(std::function<bool(const std::wstring_view&)> pfnFallback) noexcept
{
_pfnFallbackMethod = std::move(pfnFallback);
}
// Method Description:
// - Resets the internal ambiguous character width cache mechanism
// since it will be different when the font changes and we should
// re-query the new font for that information.
// Arguments:
// - <none>
// Return Value:
// - <none>
void CodepointWidthDetector::NotifyFontChanged() const noexcept
{
#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'clear()' which may throw exceptions (f.6).
_fallbackCache.clear();
}