From 4f8acb4b9fbd61196d0a5287749a58abe0fbaa28 Mon Sep 17 00:00:00 2001 From: Chester Liu Date: Sat, 4 Apr 2020 08:56:22 +0800 Subject: [PATCH] Make CodepointWidthDetector::GetWidth faster (#3727) This is a subset of #3578 which I think is harmless and the first step towards making things right. References #3546 #3578 ## Detailed Description of the Pull Request / Additional comments For more robust Unicode support, `CodepointWidthDetector` should provide concrete width information rather than a simple boolean of `IsWide`. Currently only `IsWide` is widely used and optimized using quick lookup table and fallback cache. This PR moves those optimization into `GetWidth`. ## Validation Steps Performed API remains unchanged. Things are not broken. --- src/types/CodepointWidthDetector.cpp | 125 +++++++++++++---------- src/types/inc/CodepointWidthDetector.hpp | 3 +- 2 files changed, 72 insertions(+), 56 deletions(-) diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index eb29a8af4..3d9b58bce 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -319,29 +319,47 @@ CodepointWidthDetector::CodepointWidthDetector() noexcept : } // Routine Description: -// - returns the width type of codepoint by searching the map generated from the unicode spec +// - returns the width type of codepoint as fast as we can by using quick lookup table and fallback cache. // Arguments: // - glyph - the utf16 encoded codepoint to search for // Return Value: // - the width type of the codepoint CodepointWidth CodepointWidthDetector::GetWidth(const std::wstring_view glyph) const { - if (glyph.empty()) + THROW_HR_IF(E_INVALIDARG, glyph.empty()); + if (glyph.size() == 1) { - return CodepointWidth::Invalid; + // We first attempt to look at our custom quick lookup table of char width preferences. + const auto width = GetQuickCharWidth(glyph.front()); + + // If it's invalid, the quick width had no opinion, so go to the lookup table. + if (width == CodepointWidth::Invalid) + { + return _lookupGlyphWidthWithCache(glyph); + } + // If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can. + // If not, go to the lookup table. + else if (width == CodepointWidth::Ambiguous) + { + if (_pfnFallbackMethod) + { + return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous; + } + else + { + return _lookupGlyphWidthWithCache(glyph); + } + } + // Otherwise, return Width as it is. + else + { + return width; + } } - - const auto codepoint = _extractCodepoint(glyph); - const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint); - - // For characters that are not _in_ the table, lower_bound will return the nearest item that is. - // We must check its bounds to make sure that our hit was a true hit. - if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound) + else { - return it->width; + return _lookupGlyphWidthWithCache(glyph); } - - return CodepointWidth::Narrow; } // Routine Description: @@ -369,74 +387,71 @@ bool CodepointWidthDetector::IsWide(const wchar_t wch) const noexcept // - true if codepoint is wide bool CodepointWidthDetector::IsWide(const std::wstring_view glyph) const { - THROW_HR_IF(E_INVALIDARG, glyph.empty()); - if (glyph.size() == 1) - { - // We first attempt to look at our custom quick lookup table of char width preferences. - const auto width = GetQuickCharWidth(glyph.front()); - - // If it's invalid, the quick width had no opinion, so go to the lookup table. - if (width == CodepointWidth::Invalid) - { - return _lookupIsWide(glyph); - } - // If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can. - // If not, go to the lookup table. - else if (width == CodepointWidth::Ambiguous) - { - if (_pfnFallbackMethod) - { - return _checkFallbackViaCache(glyph); - } - else - { - return _lookupIsWide(glyph); - } - } - // Otherwise, return Wide as True and Narrow as False. - else - { - return width == CodepointWidth::Wide; - } - } - else - { - return _lookupIsWide(glyph); - } + return GetWidth(glyph) == CodepointWidth::Wide; } // Routine Description: -// - checks if codepoint is wide using fallback methods. +// - returns the width type of codepoint by searching the map generated from the unicode spec +// Arguments: +// - glyph - the utf16 encoded codepoint to search for +// Return Value: +// - the width type of the codepoint +CodepointWidth CodepointWidthDetector::_lookupGlyphWidth(const std::wstring_view glyph) const +{ + if (glyph.empty()) + { + return CodepointWidth::Invalid; + } + + const auto codepoint = _extractCodepoint(glyph); + const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint); + + // For characters that are not _in_ the table, lower_bound will return the nearest item that is. + // We must check its bounds to make sure that our hit was a true hit. + if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound) + { + return it->width; + } + + return CodepointWidth::Narrow; +} + +// Routine Description: +// - returns the width type of codepoint using fallback methods. // Arguments: // - glyph - the utf16 encoded codepoint to check width of // Return Value: -// - true if codepoint is wide or if it can't be confirmed to be narrow -bool CodepointWidthDetector::_lookupIsWide(const std::wstring_view glyph) const noexcept +// - the width type of the codepoint +CodepointWidth CodepointWidthDetector::_lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept { try { // Use our generated table to try to lookup the width based on the Unicode standard. - const CodepointWidth width = GetWidth(glyph); + const CodepointWidth width = _lookupGlyphWidth(glyph); // If it's ambiguous, then ask the font if we can. if (width == CodepointWidth::Ambiguous) { if (_pfnFallbackMethod) { - return _checkFallbackViaCache(glyph); + return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous; + } + else + { + return CodepointWidth::Ambiguous; } } - // If it's not ambiguous, it should say wide or narrow. Turn that into True = Wide or False = Narrow. + // If it's not ambiguous, it should say wide or narrow. else { - return width == CodepointWidth::Wide; + return width; } } CATCH_LOG(); // If we got this far, we couldn't figure it out. // It's better to be too wide than too narrow. - return true; + return CodepointWidth::Wide; } // Routine Description: diff --git a/src/types/inc/CodepointWidthDetector.hpp b/src/types/inc/CodepointWidthDetector.hpp index 0dc7ac8b9..ae3a927e3 100644 --- a/src/types/inc/CodepointWidthDetector.hpp +++ b/src/types/inc/CodepointWidthDetector.hpp @@ -41,7 +41,8 @@ public: #endif private: - bool _lookupIsWide(const std::wstring_view glyph) const noexcept; + CodepointWidth _lookupGlyphWidth(const std::wstring_view glyph) const; + CodepointWidth _lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept; bool _checkFallbackViaCache(const std::wstring_view glyph) const; static unsigned int _extractCodepoint(const std::wstring_view glyph) noexcept;