diff --git a/.github/actions/spell-check/expect/expect.txt b/.github/actions/spell-check/expect/expect.txt index bee163b1b..a3f0ed7ba 100644 --- a/.github/actions/spell-check/expect/expect.txt +++ b/.github/actions/spell-check/expect/expect.txt @@ -601,6 +601,7 @@ devops Dext df DFactory +DFF DFMT dh dhandler diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index 9eb637d00..de29ba196 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -19,39 +19,12 @@ namespace return range.upperBound < searchTerm; } - static constexpr std::array s_wideAndAmbiguousTable{ - // generated from http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt - // anything not present here is presumed to be Narrow. - // - // GH #900 - Supplemented with emoji codepoints from https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt - // Emojis in 0x2010 - 0x2B59 used to be marked as Ambiguous in GetQuickCharWidth() in order to - // force a font lookup, but since we default all Ambiguous width to Narrow, those emojis always - // came out looking squished/tiny. They've been moved into this table and marked as Wide. - // - // === UCD Definitions === - // EA - EastAsianWidth - // Emoji - Emoji - // EPres - Emoji Presentation - // ======================= - // - // This table has been partially regenerated from the Unicode Character Database as of 13.0, with - // the following rules: - // Codepoints whose EA is "W", "F" are Wide - // Codepoints whose EA is "A" are Ambiguous - // Codepoints where Emoji=Y and EPres=Y are Emoji, therefore Wide - // - - // Codepoints where Emoji=Y but EPres=*N* are only Emoji when followed - // by U+FE0F variation selector 15. - // - // There are a couple of codepoints that Microsoft specifically gave an emoji representation - // even if it's not specified as an emoji in the standard. I'll list the ones I'm aware of in this comment in case - // we decide to add them in the future: - // 0x261A-0x261C, 0x261E-0x261F - // 0x2661, - // 0x2662, - // 0x2664, - // 0x2666 0x2710, - // 0x270E 0x2765 0x1f000 - 0x1f02b except 0x1f004 0x1f594 + // Generated by Generate-CodepointWidthsFromUCD.ps1 -Pack:True -Full:False -NoOverrides:False + // on 10/25/2020 7:32:04 AM (UTC) from Unicode 13.0.0. + // 321205 (0x4E6B5) codepoints covered. + // 240 (0xF0) codepoints overridden. + // Override path: .\src\types\unicode_width_overrides.xml + static constexpr std::array s_wideAndAmbiguousTable{ UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous }, UnicodeRange{ 0xa7, 0xa8, CodepointWidth::Ambiguous }, @@ -176,16 +149,14 @@ namespace UnicodeRange{ 0x22a5, 0x22a5, CodepointWidth::Ambiguous }, UnicodeRange{ 0x22bf, 0x22bf, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2312, 0x2312, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x231a, 0x231b, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x231a, 0x231b, CodepointWidth::Wide }, UnicodeRange{ 0x2329, 0x232a, CodepointWidth::Wide }, - UnicodeRange{ 0x23e9, 0x23ec, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x23f0, 0x23f0, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x23f3, 0x23f3, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x23e9, 0x23ec, CodepointWidth::Wide }, + UnicodeRange{ 0x23f0, 0x23f0, CodepointWidth::Wide }, + UnicodeRange{ 0x23f3, 0x23f3, CodepointWidth::Wide }, UnicodeRange{ 0x2460, 0x24e9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x24eb, 0x254b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2550, 0x2573, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2580, 0x258f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2592, 0x2595, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x24eb, 0x24ff, CodepointWidth::Ambiguous }, + UnicodeRange{ 0x2500, 0x259f, CodepointWidth::Narrow }, // box-drawing and block elements require 1-cell alignment UnicodeRange{ 0x25a0, 0x25a1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25a3, 0x25a9, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25b2, 0x25b3, CodepointWidth::Ambiguous }, @@ -197,61 +168,61 @@ namespace UnicodeRange{ 0x25ce, 0x25d1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25e2, 0x25e5, CodepointWidth::Ambiguous }, UnicodeRange{ 0x25ef, 0x25ef, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25fd, 0x25fe, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x25fd, 0x25fe, CodepointWidth::Wide }, UnicodeRange{ 0x2605, 0x2606, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2609, 0x2609, CodepointWidth::Ambiguous }, UnicodeRange{ 0x260e, 0x260f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide }, UnicodeRange{ 0x261c, 0x261c, CodepointWidth::Ambiguous }, UnicodeRange{ 0x261e, 0x261e, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, UnicodeRange{ 0x2660, 0x2661, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2663, 0x2665, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2667, 0x266a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x266f, 0x266f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x267f, 0x267f, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x2693, 0x2693, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x267f, 0x267f, CodepointWidth::Wide }, + UnicodeRange{ 0x2693, 0x2693, CodepointWidth::Wide }, UnicodeRange{ 0x269e, 0x269f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26a1, 0x26a1, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x26aa, 0x26ab, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x26bd, 0x26be, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26a1, 0x26a1, CodepointWidth::Wide }, + UnicodeRange{ 0x26aa, 0x26ab, CodepointWidth::Wide }, + UnicodeRange{ 0x26bd, 0x26be, CodepointWidth::Wide }, UnicodeRange{ 0x26bf, 0x26bf, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide }, UnicodeRange{ 0x26c6, 0x26cd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26ce, 0x26ce, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26ce, 0x26ce, CodepointWidth::Wide }, UnicodeRange{ 0x26cf, 0x26d3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26d4, 0x26d4, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26d4, 0x26d4, CodepointWidth::Wide }, UnicodeRange{ 0x26d5, 0x26e1, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26e3, 0x26e3, CodepointWidth::Ambiguous }, UnicodeRange{ 0x26e8, 0x26e9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26ea, 0x26ea, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26ea, 0x26ea, CodepointWidth::Wide }, UnicodeRange{ 0x26eb, 0x26f1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f2, 0x26f3, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26f2, 0x26f3, CodepointWidth::Wide }, UnicodeRange{ 0x26f4, 0x26f4, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f5, 0x26f5, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26f5, 0x26f5, CodepointWidth::Wide }, UnicodeRange{ 0x26f6, 0x26f9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26fa, 0x26fa, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26fa, 0x26fa, CodepointWidth::Wide }, UnicodeRange{ 0x26fb, 0x26fc, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26fd, 0x26fd, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x26fd, 0x26fd, CodepointWidth::Wide }, UnicodeRange{ 0x26fe, 0x26ff, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x270a, 0x270b, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x2728, 0x2728, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide }, + UnicodeRange{ 0x270a, 0x270b, CodepointWidth::Wide }, + UnicodeRange{ 0x2728, 0x2728, CodepointWidth::Wide }, UnicodeRange{ 0x273d, 0x273d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x274c, 0x274c, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x274c, 0x274c, CodepointWidth::Wide }, + UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide }, + UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide }, + UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide }, UnicodeRange{ 0x2776, 0x277f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x27b0, 0x27b0, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x27bf, 0x27bf, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x2b1b, 0x2b1c, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x2b50, 0x2b50, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x2b55, 0x2b55, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide }, + UnicodeRange{ 0x27b0, 0x27b0, CodepointWidth::Wide }, + UnicodeRange{ 0x27bf, 0x27bf, CodepointWidth::Wide }, + UnicodeRange{ 0x2b1b, 0x2b1c, CodepointWidth::Wide }, + UnicodeRange{ 0x2b50, 0x2b50, CodepointWidth::Wide }, + UnicodeRange{ 0x2b55, 0x2b55, CodepointWidth::Wide }, UnicodeRange{ 0x2b56, 0x2b59, CodepointWidth::Ambiguous }, UnicodeRange{ 0x2e80, 0x2e99, CodepointWidth::Wide }, UnicodeRange{ 0x2e9b, 0x2ef3, CodepointWidth::Wide }, @@ -260,15 +231,14 @@ namespace UnicodeRange{ 0x3000, 0x303e, CodepointWidth::Wide }, UnicodeRange{ 0x3041, 0x3096, CodepointWidth::Wide }, UnicodeRange{ 0x3099, 0x30ff, CodepointWidth::Wide }, - UnicodeRange{ 0x3105, 0x312e, CodepointWidth::Wide }, + UnicodeRange{ 0x3105, 0x312f, CodepointWidth::Wide }, UnicodeRange{ 0x3131, 0x318e, CodepointWidth::Wide }, - UnicodeRange{ 0x3190, 0x31ba, CodepointWidth::Wide }, - UnicodeRange{ 0x31c0, 0x31e3, CodepointWidth::Wide }, + UnicodeRange{ 0x3190, 0x31e3, CodepointWidth::Wide }, UnicodeRange{ 0x31f0, 0x321e, CodepointWidth::Wide }, UnicodeRange{ 0x3220, 0x3247, CodepointWidth::Wide }, UnicodeRange{ 0x3248, 0x324f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x3250, 0x32fe, CodepointWidth::Wide }, - UnicodeRange{ 0x3300, 0x4dbf, CodepointWidth::Wide }, + UnicodeRange{ 0x3250, 0x4dbf, CodepointWidth::Wide }, + UnicodeRange{ 0x4dc0, 0x4dff, CodepointWidth::Narrow }, // hexagrams are historically narrow UnicodeRange{ 0x4e00, 0xa48c, CodepointWidth::Wide }, UnicodeRange{ 0xa490, 0xa4c6, CodepointWidth::Wide }, UnicodeRange{ 0xa960, 0xa97c, CodepointWidth::Wide }, @@ -277,75 +247,79 @@ namespace UnicodeRange{ 0xf900, 0xfaff, CodepointWidth::Wide }, UnicodeRange{ 0xfe00, 0xfe0f, CodepointWidth::Ambiguous }, UnicodeRange{ 0xfe10, 0xfe19, CodepointWidth::Wide }, + UnicodeRange{ 0xfe20, 0xfe2f, CodepointWidth::Narrow }, // narrow combining ligatures (split into left/right halves, which take 2 columns together) UnicodeRange{ 0xfe30, 0xfe52, CodepointWidth::Wide }, UnicodeRange{ 0xfe54, 0xfe66, CodepointWidth::Wide }, UnicodeRange{ 0xfe68, 0xfe6b, CodepointWidth::Wide }, UnicodeRange{ 0xff01, 0xff60, CodepointWidth::Wide }, UnicodeRange{ 0xffe0, 0xffe6, CodepointWidth::Wide }, UnicodeRange{ 0xfffd, 0xfffd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x16fe0, 0x16fe1, CodepointWidth::Wide }, - UnicodeRange{ 0x17000, 0x187ec, CodepointWidth::Wide }, - UnicodeRange{ 0x18800, 0x18af2, CodepointWidth::Wide }, + UnicodeRange{ 0x16fe0, 0x16fe4, CodepointWidth::Wide }, + UnicodeRange{ 0x16ff0, 0x16ff1, CodepointWidth::Wide }, + UnicodeRange{ 0x17000, 0x187f7, CodepointWidth::Wide }, + UnicodeRange{ 0x18800, 0x18cd5, CodepointWidth::Wide }, + UnicodeRange{ 0x18d00, 0x18d08, CodepointWidth::Wide }, UnicodeRange{ 0x1b000, 0x1b11e, CodepointWidth::Wide }, + UnicodeRange{ 0x1b150, 0x1b152, CodepointWidth::Wide }, + UnicodeRange{ 0x1b164, 0x1b167, CodepointWidth::Wide }, UnicodeRange{ 0x1b170, 0x1b2fb, CodepointWidth::Wide }, - UnicodeRange{ 0x1f004, 0x1f004, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f0cf, 0x1f0cf, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f004, 0x1f004, CodepointWidth::Wide }, + UnicodeRange{ 0x1f0cf, 0x1f0cf, CodepointWidth::Wide }, UnicodeRange{ 0x1f100, 0x1f10a, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f110, 0x1f12d, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f130, 0x1f169, CodepointWidth::Ambiguous }, UnicodeRange{ 0x1f170, 0x1f18d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f18e, 0x1f18e, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f18e, 0x1f18e, CodepointWidth::Wide }, UnicodeRange{ 0x1f18f, 0x1f190, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f191, 0x1f19a, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f191, 0x1f19a, CodepointWidth::Wide }, UnicodeRange{ 0x1f19b, 0x1f1ac, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f1e6, 0x1f1ff, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f200, 0x1f202, CodepointWidth::Wide }, + UnicodeRange{ 0x1f1e6, 0x1f202, CodepointWidth::Wide }, UnicodeRange{ 0x1f210, 0x1f23b, CodepointWidth::Wide }, UnicodeRange{ 0x1f240, 0x1f248, CodepointWidth::Wide }, - UnicodeRange{ 0x1f250, 0x1f251, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f250, 0x1f251, CodepointWidth::Wide }, UnicodeRange{ 0x1f260, 0x1f265, CodepointWidth::Wide }, - UnicodeRange{ 0x1f300, 0x1f320, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f32d, 0x1f335, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f337, 0x1f37c, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f37e, 0x1f393, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f3a0, 0x1f3ca, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f3cf, 0x1f3d3, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f3e0, 0x1f3f0, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f3f4, 0x1f3f4, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f3f8, 0x1f43e, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f440, 0x1f440, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f442, 0x1f4fc, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f4ff, 0x1f53d, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f54b, 0x1f54e, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f550, 0x1f567, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f57a, 0x1f57a, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f595, 0x1f596, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f5a4, 0x1f5a4, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f5fb, 0x1f64f, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f680, 0x1f6c5, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f6cc, 0x1f6cc, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f6d0, 0x1f6d2, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f6d5, 0x1f6d7, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f6eb, 0x1f6ec, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f6f4, 0x1f6fc, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f7e0, 0x1f7eb, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f90c, 0x1f93a, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f93c, 0x1f945, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f947, 0x1f978, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f97a, 0x1f9cb, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1f9cd, 0x1f9ff, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1fa70, 0x1fa74, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1fa78, 0x1fa7a, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1fa80, 0x1fa86, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1fa90, 0x1faa8, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1fab0, 0x1fab6, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1fac0, 0x1fac2, CodepointWidth::Wide }, // Emoji=Y EPres=Y - UnicodeRange{ 0x1fad0, 0x1fad6, CodepointWidth::Wide }, // Emoji=Y EPres=Y + UnicodeRange{ 0x1f300, 0x1f320, CodepointWidth::Wide }, + UnicodeRange{ 0x1f32d, 0x1f335, CodepointWidth::Wide }, + UnicodeRange{ 0x1f337, 0x1f37c, CodepointWidth::Wide }, + UnicodeRange{ 0x1f37e, 0x1f393, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3a0, 0x1f3ca, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3cf, 0x1f3d3, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3e0, 0x1f3f0, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3f4, 0x1f3f4, CodepointWidth::Wide }, + UnicodeRange{ 0x1f3f8, 0x1f43e, CodepointWidth::Wide }, + UnicodeRange{ 0x1f440, 0x1f440, CodepointWidth::Wide }, + UnicodeRange{ 0x1f442, 0x1f4fc, CodepointWidth::Wide }, + UnicodeRange{ 0x1f4ff, 0x1f53d, CodepointWidth::Wide }, + UnicodeRange{ 0x1f54b, 0x1f54e, CodepointWidth::Wide }, + UnicodeRange{ 0x1f550, 0x1f567, CodepointWidth::Wide }, + UnicodeRange{ 0x1f57a, 0x1f57a, CodepointWidth::Wide }, + UnicodeRange{ 0x1f595, 0x1f596, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5a4, 0x1f5a4, CodepointWidth::Wide }, + UnicodeRange{ 0x1f5fb, 0x1f64f, CodepointWidth::Wide }, + UnicodeRange{ 0x1f680, 0x1f6c5, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6cc, 0x1f6cc, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6d0, 0x1f6d2, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6d5, 0x1f6d7, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6eb, 0x1f6ec, CodepointWidth::Wide }, + UnicodeRange{ 0x1f6f4, 0x1f6fc, CodepointWidth::Wide }, + UnicodeRange{ 0x1f7e0, 0x1f7eb, CodepointWidth::Wide }, + UnicodeRange{ 0x1f90c, 0x1f93a, CodepointWidth::Wide }, + UnicodeRange{ 0x1f93c, 0x1f945, CodepointWidth::Wide }, + UnicodeRange{ 0x1f947, 0x1f978, CodepointWidth::Wide }, + UnicodeRange{ 0x1f97a, 0x1f9cb, CodepointWidth::Wide }, + UnicodeRange{ 0x1f9cd, 0x1f9ff, CodepointWidth::Wide }, + UnicodeRange{ 0x1fa70, 0x1fa74, CodepointWidth::Wide }, + UnicodeRange{ 0x1fa78, 0x1fa7a, CodepointWidth::Wide }, + UnicodeRange{ 0x1fa80, 0x1fa86, CodepointWidth::Wide }, + UnicodeRange{ 0x1fa90, 0x1faa8, CodepointWidth::Wide }, + UnicodeRange{ 0x1fab0, 0x1fab6, CodepointWidth::Wide }, + UnicodeRange{ 0x1fac0, 0x1fac2, CodepointWidth::Wide }, + UnicodeRange{ 0x1fad0, 0x1fad6, CodepointWidth::Wide }, UnicodeRange{ 0x20000, 0x2fffd, CodepointWidth::Wide }, UnicodeRange{ 0x30000, 0x3fffd, CodepointWidth::Wide }, UnicodeRange{ 0xe0100, 0xe01ef, CodepointWidth::Ambiguous }, UnicodeRange{ 0xf0000, 0xffffd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x100000, 0x10fffd, CodepointWidth::Ambiguous } + UnicodeRange{ 0x100000, 0x10fffd, CodepointWidth::Ambiguous }, }; } diff --git a/src/types/convert.cpp b/src/types/convert.cpp index dc7b71908..3e9d8e149 100644 --- a/src/types/convert.cpp +++ b/src/types/convert.cpp @@ -354,179 +354,16 @@ std::deque> SynthesizeNumpadEvents(const wchar_t wch, // May-01-2019 MiNiksa Forced lookup-via-renderer for retroactively recategorized emoji // that used to be narrow but now might be wide. (approx x2194-x2b55, not inclusive) // Also forced block characters segment (x2580-x259F) to narrow +// Oct-25-2020 DuHowett Replaced the entire table with a set of overrides that get built into +// CodepointWidthDetector (unicode_width_overrides.xml) CodepointWidth GetQuickCharWidth(const wchar_t wch) noexcept { - // 0x00-0x1F is ambiguous by font if (0x20 <= wch && wch <= 0x7e) { /* ASCII */ return CodepointWidth::Narrow; } - // 0x80 - 0x0451 varies from narrow to ambiguous by character and font (Unicode 9.0) - else if (0x0452 <= wch && wch <= 0x10FF) - { - // From Unicode 9.0, this range is narrow (assorted languages) - return CodepointWidth::Narrow; - } - else if (0x1100 <= wch && wch <= 0x115F) - { - // From Unicode 9.0, Hangul Choseong is wide - return CodepointWidth::Wide; - } - else if (0x1160 <= wch && wch <= 0x200F) - { - // From Unicode 9.0, this range is narrow (assorted languages) - return CodepointWidth::Narrow; - } - // 0x2500 - 0x257F is the box drawing character range - - // Technically, these are ambiguous width characters, but applications that - // use them generally assume that they're narrow to ensure proper alignment. - else if (0x2500 <= wch && wch <= 0x257F) - { - return CodepointWidth::Narrow; - } - // 0x2580 - 0x259F is the block element characters. - // Technically these are ambiguous width, but many many things assume they're narrow. - else if (0x2580 <= wch && wch <= 0x259F) - { - return CodepointWidth::Narrow; - } - else if (0x2B5A <= wch && wch <= 0x2E44) - { - // From Unicode 9.0, this range is narrow (assorted languages) - return CodepointWidth::Narrow; - } - else if (0x2E80 <= wch && wch <= 0x303e) - { - // From Unicode 9.0, this range is wide (assorted languages) - return CodepointWidth::Wide; - } - else if (0x3041 <= wch && wch <= 0x3094) - { - /* Hiragana */ - return CodepointWidth::Wide; - } - else if (0x30a1 <= wch && wch <= 0x30f6) - { - /* Katakana */ - return CodepointWidth::Wide; - } - else if (0x3105 <= wch && wch <= 0x312c) - { - /* Bopomofo */ - return CodepointWidth::Wide; - } - else if (0x3131 <= wch && wch <= 0x318e) - { - /* Hangul Elements */ - return CodepointWidth::Wide; - } - else if (0x3190 <= wch && wch <= 0x3247) - { - // From Unicode 9.0, this range is wide - return CodepointWidth::Wide; - } - else if (0x3251 <= wch && wch <= 0xA4C6) - { - // This exception range is narrow width hexagrams. - if (0x4DC0 <= wch && wch <= 0x4DFF) - { - return CodepointWidth::Narrow; - } - else - { - // From Unicode 9.0, this range is wide - // CJK Unified Ideograph and Yi and Reserved. - // Includes Han Ideographic range. - return CodepointWidth::Wide; - } - } - else if (0xA4D0 <= wch && wch <= 0xABF9) - { - // This exception range is wide Hangul Choseong - if (0xA960 <= wch && wch <= 0xA97C) - { - return CodepointWidth::Wide; - } - else - { - // From Unicode 9.0, this range is narrow (assorted languages) - return CodepointWidth::Narrow; - } - } - else if (0xac00 <= wch && wch <= 0xd7a3) - { - /* Korean Hangul Syllables */ - return CodepointWidth::Wide; - } - else if (0xD7B0 <= wch && wch <= 0xD7FB) - { - // From Unicode 9.0, this range is narrow - // Hangul Jungseong and Hangul Jongseong - return CodepointWidth::Narrow; - } - // 0xD800-0xDFFF is reserved for UTF-16 surrogate pairs. - // 0xE000-0xF8FF is reserved for private use characters and is therefore always ambiguous. - else if (0xF900 <= wch && wch <= 0xFAFF) - { - // From Unicode 9.0, this range is wide - // CJK Compatibility Ideographs - // Includes Han Compatibility Ideographs - return CodepointWidth::Wide; - } - else if (0xFB00 <= wch && wch <= 0xFDFD) - { - // From Unicode 9.0, this range is narrow (assorted languages) - return CodepointWidth::Narrow; - } - else if (0xFE10 <= wch && wch <= 0xFE6B) - { - // This exception range has narrow combining ligatures - if (0xFE20 <= wch && wch <= 0xFE2F) - { - return CodepointWidth::Narrow; - } - else - { - // From Unicode 9.0, this range is wide - // Presentation forms - return CodepointWidth::Wide; - } - } - else if (0xFE70 <= wch && wch <= 0xFEFF) - { - // From Unicode 9.0, this range is narrow - return CodepointWidth::Narrow; - } - else if (0xff01 <= wch && wch <= 0xff5e) - { - /* Fullwidth ASCII variants */ - return CodepointWidth::Wide; - } - else if (0xff61 <= wch && wch <= 0xff9f) - { - /* Halfwidth Katakana variants */ - return CodepointWidth::Narrow; - } - else if ((0xffa0 <= wch && wch <= 0xffbe) || - (0xffc2 <= wch && wch <= 0xffc7) || - (0xffca <= wch && wch <= 0xffcf) || - (0xffd2 <= wch && wch <= 0xffd7) || - (0xffda <= wch && wch <= 0xffdc)) - { - /* Halfwidth Hangul variants */ - return CodepointWidth::Narrow; - } - else if (0xffe0 <= wch && wch <= 0xffe6) - { - /* Fullwidth symbol variants */ - return CodepointWidth::Wide; - } - // Currently we do not support codepoints above 0xffff - else - { - return CodepointWidth::Invalid; - } + return CodepointWidth::Invalid; } wchar_t Utf16ToUcs2(const std::wstring_view charData) diff --git a/src/types/unicode_width_overrides.xml b/src/types/unicode_width_overrides.xml new file mode 100644 index 000000000..142038869 --- /dev/null +++ b/src/types/unicode_width_overrides.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/tools/Generate-CodepointWidthsFromUCD.ps1 b/tools/Generate-CodepointWidthsFromUCD.ps1 index 788e53892..d1eb56ea6 100644 --- a/tools/Generate-CodepointWidthsFromUCD.ps1 +++ b/tools/Generate-CodepointWidthsFromUCD.ps1 @@ -133,6 +133,11 @@ Class UnicodeRange : System.IComparable { Return $false } + # Comments are different: do not merge + If ($this.Comment -ne $Other.Comment) { + Return $false + } + # Flags are different: do not merge If ($this.Flags -ne $Other.Flags) { Return $false @@ -261,6 +266,7 @@ If (-not $NoOverrides) { " // {0} (0x{0:X}) codepoints covered." -f $c If (-not $NoOverrides) { " // {0} (0x{0:X}) codepoints overridden." -f $overrideCount +" // Override path: {0}" -f $OverridePath } " static constexpr std::array s_wideAndAmbiguousTable{{" -f $ranges.Count ForEach($_ in $ranges) {