2019-05-03 00:29:04 +02:00
|
|
|
// Copyright (c) Microsoft Corporation.
|
|
|
|
// Licensed under the MIT license.
|
|
|
|
|
|
|
|
#include "precomp.h"
|
|
|
|
#include "inc/CodepointWidthDetector.hpp"
|
|
|
|
|
2019-08-16 19:54:17 +02:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
// used to store range data in CodepointWidthDetector's internal map
|
|
|
|
struct UnicodeRange final
|
|
|
|
{
|
|
|
|
unsigned int lowerBound;
|
|
|
|
unsigned int upperBound;
|
|
|
|
CodepointWidth width;
|
|
|
|
};
|
|
|
|
|
2019-08-30 00:23:07 +02:00
|
|
|
static bool operator<(const UnicodeRange& range, const unsigned int searchTerm) noexcept
|
2019-08-16 19:54:17 +02:00
|
|
|
{
|
|
|
|
return range.upperBound < searchTerm;
|
|
|
|
}
|
|
|
|
|
2020-10-27 18:36:28 +01:00
|
|
|
// Generated by Generate-CodepointWidthsFromUCD.ps1 -Pack:True -Full:False -NoOverrides:False
|
|
|
|
// on 10/25/2020 7:32:04 AM (UTC) from Unicode 13.0.0.
|
|
|
|
// 321205 (0x4E6B5) codepoints covered.
|
|
|
|
// 240 (0xF0) codepoints overridden.
|
|
|
|
// Override path: .\src\types\unicode_width_overrides.xml
|
|
|
|
static constexpr std::array<UnicodeRange, 295> s_wideAndAmbiguousTable{
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xa7, 0xa8, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xaa, 0xaa, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xad, 0xae, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xb0, 0xb4, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xb6, 0xba, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xbc, 0xbf, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xc6, 0xc6, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xd0, 0xd0, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xd7, 0xd8, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xde, 0xe1, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xe6, 0xe6, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xe8, 0xea, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xec, 0xed, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xf0, 0xf0, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xf2, 0xf3, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xf7, 0xfa, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xfc, 0xfc, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xfe, 0xfe, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x101, 0x101, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x111, 0x111, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x113, 0x113, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x11b, 0x11b, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x126, 0x127, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x12b, 0x12b, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x131, 0x133, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x138, 0x138, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x13f, 0x142, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x144, 0x144, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x148, 0x14b, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x14d, 0x14d, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x152, 0x153, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x166, 0x167, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x16b, 0x16b, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1ce, 0x1ce, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1d0, 0x1d0, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1d2, 0x1d2, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1d4, 0x1d4, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1d6, 0x1d6, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1d8, 0x1d8, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1da, 0x1da, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1dc, 0x1dc, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x251, 0x251, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x261, 0x261, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2c4, 0x2c4, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2c7, 0x2c7, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2c9, 0x2cb, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2cd, 0x2cd, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2d0, 0x2d0, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2d8, 0x2db, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2dd, 0x2dd, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2df, 0x2df, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x300, 0x36f, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x391, 0x3a1, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x3a3, 0x3a9, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x3b1, 0x3c1, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x3c3, 0x3c9, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x401, 0x401, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x410, 0x44f, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x451, 0x451, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1100, 0x115f, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2010, 0x2010, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2013, 0x2016, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2018, 0x2019, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x201c, 0x201d, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2020, 0x2022, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2024, 0x2027, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2030, 0x2030, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2032, 0x2033, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2035, 0x2035, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x203b, 0x203b, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x203e, 0x203e, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2074, 0x2074, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x207f, 0x207f, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2081, 0x2084, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x20ac, 0x20ac, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2103, 0x2103, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2105, 0x2105, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2109, 0x2109, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2113, 0x2113, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2116, 0x2116, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2121, 0x2122, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2126, 0x2126, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x212b, 0x212b, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2153, 0x2154, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x215b, 0x215e, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2160, 0x216b, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2170, 0x2179, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2189, 0x2189, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2190, 0x2199, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x21b8, 0x21b9, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x21d2, 0x21d2, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x21d4, 0x21d4, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x21e7, 0x21e7, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2200, 0x2200, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2202, 0x2203, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2207, 0x2208, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x220b, 0x220b, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x220f, 0x220f, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2211, 0x2211, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2215, 0x2215, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x221a, 0x221a, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x221d, 0x2220, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2223, 0x2223, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2225, 0x2225, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2227, 0x222c, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x222e, 0x222e, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2234, 0x2237, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x223c, 0x223d, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2248, 0x2248, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x224c, 0x224c, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2252, 0x2252, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2260, 0x2261, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2264, 0x2267, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x226a, 0x226b, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x226e, 0x226f, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2282, 0x2283, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2286, 0x2287, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2295, 0x2295, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2299, 0x2299, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x22a5, 0x22a5, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x22bf, 0x22bf, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2312, 0x2312, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x231a, 0x231b, CodepointWidth::Wide },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x2329, 0x232a, CodepointWidth::Wide },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x23e9, 0x23ec, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x23f0, 0x23f0, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x23f3, 0x23f3, CodepointWidth::Wide },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x2460, 0x24e9, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x24eb, 0x24ff, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2500, 0x259f, CodepointWidth::Narrow }, // box-drawing and block elements require 1-cell alignment
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x25a0, 0x25a1, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x25a3, 0x25a9, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x25b2, 0x25b3, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x25b6, 0x25b7, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x25bc, 0x25bd, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x25c0, 0x25c1, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x25c6, 0x25c8, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x25cb, 0x25cb, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x25ce, 0x25d1, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x25e2, 0x25e5, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x25ef, 0x25ef, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x25fd, 0x25fe, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x2605, 0x2606, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2609, 0x2609, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x260e, 0x260f, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x261c, 0x261c, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x261e, 0x261e, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x2660, 0x2661, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2663, 0x2665, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2667, 0x266a, CodepointWidth::Ambiguous },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x266f, 0x266f, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x267f, 0x267f, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2693, 0x2693, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x269e, 0x269f, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x26a1, 0x26a1, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x26aa, 0x26ab, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x26bd, 0x26be, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x26bf, 0x26bf, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x26c6, 0x26cd, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x26ce, 0x26ce, CodepointWidth::Wide },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x26cf, 0x26d3, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x26d4, 0x26d4, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x26d5, 0x26e1, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x26e3, 0x26e3, CodepointWidth::Ambiguous },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x26e8, 0x26e9, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x26ea, 0x26ea, CodepointWidth::Wide },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x26eb, 0x26f1, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x26f2, 0x26f3, CodepointWidth::Wide },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x26f4, 0x26f4, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x26f5, 0x26f5, CodepointWidth::Wide },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x26f6, 0x26f9, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x26fa, 0x26fa, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x26fb, 0x26fc, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x26fd, 0x26fd, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x26fe, 0x26ff, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x270a, 0x270b, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2728, 0x2728, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x273d, 0x273d, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x274c, 0x274c, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x2776, 0x277f, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x27b0, 0x27b0, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x27bf, 0x27bf, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2b1b, 0x2b1c, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2b50, 0x2b50, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2b55, 0x2b55, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x2b56, 0x2b59, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x2e80, 0x2e99, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2e9b, 0x2ef3, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2f00, 0x2fd5, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x2ff0, 0x2ffb, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x3000, 0x303e, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x3041, 0x3096, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x3099, 0x30ff, CodepointWidth::Wide },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x3105, 0x312f, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x3131, 0x318e, CodepointWidth::Wide },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x3190, 0x31e3, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x31f0, 0x321e, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x3220, 0x3247, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x3248, 0x324f, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x3250, 0x4dbf, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x4dc0, 0x4dff, CodepointWidth::Narrow }, // hexagrams are historically narrow
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x4e00, 0xa48c, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xa490, 0xa4c6, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xa960, 0xa97c, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xac00, 0xd7a3, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xe000, 0xf8ff, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xf900, 0xfaff, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xfe00, 0xfe0f, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xfe10, 0xfe19, CodepointWidth::Wide },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0xfe20, 0xfe2f, CodepointWidth::Narrow }, // narrow combining ligatures (split into left/right halves, which take 2 columns together)
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0xfe30, 0xfe52, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xfe54, 0xfe66, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xfe68, 0xfe6b, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xff01, 0xff60, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xffe0, 0xffe6, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xfffd, 0xfffd, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x16fe0, 0x16fe4, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x16ff0, 0x16ff1, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x17000, 0x187f7, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x18800, 0x18cd5, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x18d00, 0x18d08, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x1b000, 0x1b11e, CodepointWidth::Wide },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x1b150, 0x1b152, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1b164, 0x1b167, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x1b170, 0x1b2fb, CodepointWidth::Wide },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x1f004, 0x1f004, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f0cf, 0x1f0cf, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x1f100, 0x1f10a, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1f110, 0x1f12d, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0x1f130, 0x1f169, CodepointWidth::Ambiguous },
|
2020-05-17 22:32:43 +02:00
|
|
|
UnicodeRange{ 0x1f170, 0x1f18d, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x1f18e, 0x1f18e, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x1f18f, 0x1f190, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x1f191, 0x1f19a, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x1f19b, 0x1f1ac, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x1f1e6, 0x1f202, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x1f210, 0x1f23b, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f240, 0x1f248, CodepointWidth::Wide },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x1f250, 0x1f251, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x1f260, 0x1f265, CodepointWidth::Wide },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x1f300, 0x1f320, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f32d, 0x1f335, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f337, 0x1f37c, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f37e, 0x1f393, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f3a0, 0x1f3ca, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f3cf, 0x1f3d3, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f3e0, 0x1f3f0, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f3f4, 0x1f3f4, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f3f8, 0x1f43e, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f440, 0x1f440, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f442, 0x1f4fc, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f4ff, 0x1f53d, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f54b, 0x1f54e, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f550, 0x1f567, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f57a, 0x1f57a, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f595, 0x1f596, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f5a4, 0x1f5a4, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f5fb, 0x1f64f, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f680, 0x1f6c5, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f6cc, 0x1f6cc, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f6d0, 0x1f6d2, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f6d5, 0x1f6d7, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f6eb, 0x1f6ec, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f6f4, 0x1f6fc, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f7e0, 0x1f7eb, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f90c, 0x1f93a, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f93c, 0x1f945, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f947, 0x1f978, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f97a, 0x1f9cb, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1f9cd, 0x1f9ff, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1fa70, 0x1fa74, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1fa78, 0x1fa7a, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1fa80, 0x1fa86, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1fa90, 0x1faa8, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1fab0, 0x1fab6, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1fac0, 0x1fac2, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x1fad0, 0x1fad6, CodepointWidth::Wide },
|
2019-08-16 19:54:17 +02:00
|
|
|
UnicodeRange{ 0x20000, 0x2fffd, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0x30000, 0x3fffd, CodepointWidth::Wide },
|
|
|
|
UnicodeRange{ 0xe0100, 0xe01ef, CodepointWidth::Ambiguous },
|
|
|
|
UnicodeRange{ 0xf0000, 0xffffd, CodepointWidth::Ambiguous },
|
2020-10-27 18:36:28 +01:00
|
|
|
UnicodeRange{ 0x100000, 0x10fffd, CodepointWidth::Ambiguous },
|
2019-08-16 19:54:17 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2019-09-04 00:03:54 +02:00
|
|
|
// Routine Description:
|
|
|
|
// - Constructs an instance of the CodepointWidthDetector class
|
|
|
|
CodepointWidthDetector::CodepointWidthDetector() noexcept :
|
|
|
|
_fallbackCache{},
|
|
|
|
_pfnFallbackMethod{}
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2019-05-03 00:29:04 +02:00
|
|
|
// Routine Description:
|
2020-04-04 02:56:22 +02:00
|
|
|
// - returns the width type of codepoint as fast as we can by using quick lookup table and fallback cache.
|
2019-05-03 00:29:04 +02:00
|
|
|
// Arguments:
|
|
|
|
// - glyph - the utf16 encoded codepoint to search for
|
|
|
|
// Return Value:
|
|
|
|
// - the width type of the codepoint
|
2019-08-30 00:23:07 +02:00
|
|
|
CodepointWidth CodepointWidthDetector::GetWidth(const std::wstring_view glyph) const
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
2020-04-04 02:56:22 +02:00
|
|
|
THROW_HR_IF(E_INVALIDARG, glyph.empty());
|
|
|
|
if (glyph.size() == 1)
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
2020-04-04 02:56:22 +02:00
|
|
|
// We first attempt to look at our custom quick lookup table of char width preferences.
|
|
|
|
const auto width = GetQuickCharWidth(glyph.front());
|
2019-08-16 19:54:17 +02:00
|
|
|
|
2020-04-04 02:56:22 +02:00
|
|
|
// If it's invalid, the quick width had no opinion, so go to the lookup table.
|
|
|
|
if (width == CodepointWidth::Invalid)
|
|
|
|
{
|
|
|
|
return _lookupGlyphWidthWithCache(glyph);
|
|
|
|
}
|
|
|
|
// If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can.
|
|
|
|
// If not, go to the lookup table.
|
|
|
|
else if (width == CodepointWidth::Ambiguous)
|
|
|
|
{
|
|
|
|
if (_pfnFallbackMethod)
|
|
|
|
{
|
|
|
|
return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return _lookupGlyphWidthWithCache(glyph);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Otherwise, return Width as it is.
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return width;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
2020-04-04 02:56:22 +02:00
|
|
|
return _lookupGlyphWidthWithCache(glyph);
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Routine Description:
|
|
|
|
// - checks if wch is wide. will attempt to fallback as much possible until an answer is determined
|
|
|
|
// Arguments:
|
|
|
|
// - wch - the wchar to check width of
|
|
|
|
// Return Value:
|
|
|
|
// - true if wch is wide
|
|
|
|
bool CodepointWidthDetector::IsWide(const wchar_t wch) const noexcept
|
|
|
|
{
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
try
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
return IsWide({ &wch, 1 });
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
CATCH_LOG();
|
|
|
|
|
|
|
|
return true;
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Routine Description:
|
|
|
|
// - checks if codepoint is wide. will attempt to fallback as much possible until an answer is determined
|
|
|
|
// Arguments:
|
|
|
|
// - glyph - the utf16 encoded codepoint to check width of
|
|
|
|
// Return Value:
|
|
|
|
// - true if codepoint is wide
|
|
|
|
bool CodepointWidthDetector::IsWide(const std::wstring_view glyph) const
|
|
|
|
{
|
2020-04-04 02:56:22 +02:00
|
|
|
return GetWidth(glyph) == CodepointWidth::Wide;
|
|
|
|
}
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
|
2020-04-04 02:56:22 +02:00
|
|
|
// Routine Description:
|
|
|
|
// - returns the width type of codepoint by searching the map generated from the unicode spec
|
|
|
|
// Arguments:
|
|
|
|
// - glyph - the utf16 encoded codepoint to search for
|
|
|
|
// Return Value:
|
|
|
|
// - the width type of the codepoint
|
|
|
|
CodepointWidth CodepointWidthDetector::_lookupGlyphWidth(const std::wstring_view glyph) const
|
|
|
|
{
|
|
|
|
if (glyph.empty())
|
|
|
|
{
|
|
|
|
return CodepointWidth::Invalid;
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
2020-04-04 02:56:22 +02:00
|
|
|
|
|
|
|
const auto codepoint = _extractCodepoint(glyph);
|
|
|
|
const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint);
|
|
|
|
|
|
|
|
// For characters that are not _in_ the table, lower_bound will return the nearest item that is.
|
|
|
|
// We must check its bounds to make sure that our hit was a true hit.
|
|
|
|
if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound)
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
2020-04-04 02:56:22 +02:00
|
|
|
return it->width;
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
2020-04-04 02:56:22 +02:00
|
|
|
|
|
|
|
return CodepointWidth::Narrow;
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Routine Description:
|
2020-04-04 02:56:22 +02:00
|
|
|
// - returns the width type of codepoint using fallback methods.
|
2019-05-03 00:29:04 +02:00
|
|
|
// Arguments:
|
|
|
|
// - glyph - the utf16 encoded codepoint to check width of
|
|
|
|
// Return Value:
|
2020-04-04 02:56:22 +02:00
|
|
|
// - the width type of the codepoint
|
|
|
|
CodepointWidth CodepointWidthDetector::_lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
try
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
// Use our generated table to try to lookup the width based on the Unicode standard.
|
2020-04-04 02:56:22 +02:00
|
|
|
const CodepointWidth width = _lookupGlyphWidth(glyph);
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
|
|
|
|
// If it's ambiguous, then ask the font if we can.
|
|
|
|
if (width == CodepointWidth::Ambiguous)
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
2019-08-16 19:54:17 +02:00
|
|
|
if (_pfnFallbackMethod)
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
{
|
2020-04-04 02:56:22 +02:00
|
|
|
return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return CodepointWidth::Ambiguous;
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
}
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
2020-04-04 02:56:22 +02:00
|
|
|
// If it's not ambiguous, it should say wide or narrow.
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
else
|
|
|
|
{
|
2020-04-04 02:56:22 +02:00
|
|
|
return width;
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
CATCH_LOG();
|
|
|
|
|
|
|
|
// If we got this far, we couldn't figure it out.
|
|
|
|
// It's better to be too wide than too narrow.
|
2020-04-04 02:56:22 +02:00
|
|
|
return CodepointWidth::Wide;
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Routine Description:
|
|
|
|
// - Checks the fallback function but caches the results until the font changes
|
|
|
|
// because the lookup function is usually very expensive and will return the same results
|
|
|
|
// for the same inputs.
|
|
|
|
// Arguments:
|
|
|
|
// - glyph - the utf16 encoded codepoint to check width of
|
|
|
|
// - true if codepoint is wide or false if it is narrow
|
|
|
|
bool CodepointWidthDetector::_checkFallbackViaCache(const std::wstring_view glyph) const
|
|
|
|
{
|
|
|
|
const std::wstring findMe{ glyph };
|
|
|
|
|
|
|
|
// TODO: Cache needs to be emptied when font changes.
|
2019-08-29 20:27:39 +02:00
|
|
|
const auto it = _fallbackCache.find(findMe);
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
if (it == _fallbackCache.end())
|
|
|
|
{
|
|
|
|
auto result = _pfnFallbackMethod(glyph);
|
|
|
|
_fallbackCache.insert_or_assign(findMe, result);
|
|
|
|
return result;
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
return it->second;
|
2019-05-03 00:29:04 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Routine Description:
|
|
|
|
// - extract unicode codepoint from utf16 encoding
|
|
|
|
// Arguments:
|
|
|
|
// - glyph - the utf16 encoded codepoint convert
|
|
|
|
// Return Value:
|
|
|
|
// - the codepoint being stored
|
2019-08-16 19:54:17 +02:00
|
|
|
unsigned int CodepointWidthDetector::_extractCodepoint(const std::wstring_view glyph) noexcept
|
2019-05-03 00:29:04 +02:00
|
|
|
{
|
|
|
|
if (glyph.size() == 1)
|
|
|
|
{
|
|
|
|
return static_cast<unsigned int>(glyph.front());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const unsigned int mask = 0x3FF;
|
|
|
|
// leading bits, shifted over to make space for trailing bits
|
|
|
|
unsigned int codepoint = (glyph.at(0) & mask) << 10;
|
|
|
|
// trailing bits
|
|
|
|
codepoint |= (glyph.at(1) & mask);
|
|
|
|
// 0x10000 is subtracted from the codepoint to encode a surrogate pair, add it back
|
|
|
|
codepoint += 0x10000;
|
|
|
|
return codepoint;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Method Description:
|
|
|
|
// - Sets a function that should be used as the fallback mechanism for
|
|
|
|
// determining a particular glyph's width, should the glyph be an ambiguous
|
|
|
|
// width.
|
|
|
|
// A Terminal could hook in a Renderer's IsGlyphWideByFont method as the
|
|
|
|
// fallback to ask the renderer for the glyph's width (for example).
|
|
|
|
// Arguments:
|
|
|
|
// - pfnFallback - the function to use as the fallback method.
|
|
|
|
// Return Value:
|
|
|
|
// - <none>
|
|
|
|
void CodepointWidthDetector::SetFallbackMethod(std::function<bool(const std::wstring_view)> pfnFallback)
|
|
|
|
{
|
|
|
|
_pfnFallbackMethod = pfnFallback;
|
|
|
|
}
|
|
|
|
|
Merged PR 3215853: Fix spacing/layout for block characters and many retroactively-recategorized emoji (and more!)
This encompasses a handful of problems with column counting.
The Terminal project didn't set a fallback column counter. Oops. I've fixed this to use the `DxEngine` as the fallback.
The `DxEngine` didn't implement its fallback method. Oops. I've fixed this to use the `CustomTextLayout` to figure out the advances based on the same font and fallback pattern as the real final layout, just without "rounding" it into cells yet.
- `CustomTextLayout` has been updated to move the advance-correction into a separate phase from glyph shaping. Previously, we corrected the advances to nice round cell counts during shaping, which is fine for drawing, but hard for column count analysis.
- Now that there are separate phases, an `Analyze` method was added to the `CustomTextLayout` which just performs the text analysis steps and the glyph shaping, but no advance correction to column boundaries nor actual drawing.
I've taken the caching code that I was working on to improve chafa, and I've brought it into this. Now that we're doing a lot of fallback and heavy lifting in terms of analysis via the layout, we should cache the results until the font changes.
I've adjusted how column counting is done overall. It's always been in these phases:
1. We used a quick-lookup of ranges of characters we knew to rapidly decide `Narrow`, `Wide` or `Invalid` (a.k.a. "I dunno")
2. If it was `Invalid`, we consulted a table based off of the Unicode standard that has either `Narrow`, `Wide`, or `Ambiguous` as a result.
3. If it's still `Ambiguous`, we consult a render engine fallback (usually GDI or now DX) to see how many columns it would take.
4. If we still don't know, then it's `Wide` to be safe.
- I've added an additional flow here. The quick-lookup can now return `Ambiguous` off the bat for some glyph characters in the x2000-x3000 range that used to just be simple shapes but have been retroactively recategorized as emoji and are frequently now using full width color glyphs.
- This new state causes the lookup to go immediately to the render engine if it is available instead of consulting the Unicode standard table first because the half/fullwidth table doesn't appear to have been updated for this nuance to reclass these characters as ambiguous, but we'd like to keep that table as a "generated from the spec" sort of table and keep our exceptions in the "quick lookup" function.
I have confirmed the following things "just work" now:
- The windows logo flag from the demo. (⚫⚪💖✅🌌😊)
- The dotted chart on the side of crossterm demo (•)
- The powerline characters that make arrows with the Consolas patched font (██)
- An accented é
- The warning and checkmark symbols appearing same size as the X. (✔⚠🔥)
Related work items: #21167256, #21237515, #21243859, #21274645, #21296827
2019-05-02 01:13:53 +02:00
|
|
|
// Method Description:
|
|
|
|
// - Resets the internal ambiguous character width cache mechanism
|
|
|
|
// since it will be different when the font changes and we should
|
|
|
|
// re-query the new font for that information.
|
|
|
|
// Arguments:
|
|
|
|
// - <none>
|
|
|
|
// Return Value:
|
|
|
|
// - <none>
|
|
|
|
void CodepointWidthDetector::NotifyFontChanged() const noexcept
|
|
|
|
{
|
|
|
|
_fallbackCache.clear();
|
|
|
|
}
|