Make CodepointWidthDetector::GetWidth faster (#3727)

This is a subset of #3578 which I think is harmless and the first step towards making things right.
References #3546 #3578 

## Detailed Description of the Pull Request / Additional comments

For more robust Unicode support, `CodepointWidthDetector` should provide concrete width information rather than a simple boolean of `IsWide`. Currently only `IsWide` is widely used and optimized using quick lookup table and fallback cache. This PR moves those optimization into `GetWidth`.

## Validation Steps Performed

API remains unchanged. Things are not broken.
This commit is contained in:
Chester Liu 2020-04-04 08:56:22 +08:00 committed by GitHub
parent 94215535cd
commit 4f8acb4b9f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 72 additions and 56 deletions

View file

@ -319,29 +319,47 @@ CodepointWidthDetector::CodepointWidthDetector() noexcept :
}
// Routine Description:
// - returns the width type of codepoint by searching the map generated from the unicode spec
// - returns the width type of codepoint as fast as we can by using quick lookup table and fallback cache.
// Arguments:
// - glyph - the utf16 encoded codepoint to search for
// Return Value:
// - the width type of the codepoint
CodepointWidth CodepointWidthDetector::GetWidth(const std::wstring_view glyph) const
{
if (glyph.empty())
THROW_HR_IF(E_INVALIDARG, glyph.empty());
if (glyph.size() == 1)
{
return CodepointWidth::Invalid;
// We first attempt to look at our custom quick lookup table of char width preferences.
const auto width = GetQuickCharWidth(glyph.front());
// If it's invalid, the quick width had no opinion, so go to the lookup table.
if (width == CodepointWidth::Invalid)
{
return _lookupGlyphWidthWithCache(glyph);
}
// If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can.
// If not, go to the lookup table.
else if (width == CodepointWidth::Ambiguous)
{
if (_pfnFallbackMethod)
{
return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous;
}
else
{
return _lookupGlyphWidthWithCache(glyph);
}
}
// Otherwise, return Width as it is.
else
{
return width;
}
}
const auto codepoint = _extractCodepoint(glyph);
const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint);
// For characters that are not _in_ the table, lower_bound will return the nearest item that is.
// We must check its bounds to make sure that our hit was a true hit.
if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound)
else
{
return it->width;
return _lookupGlyphWidthWithCache(glyph);
}
return CodepointWidth::Narrow;
}
// Routine Description:
@ -369,74 +387,71 @@ bool CodepointWidthDetector::IsWide(const wchar_t wch) const noexcept
// - true if codepoint is wide
bool CodepointWidthDetector::IsWide(const std::wstring_view glyph) const
{
THROW_HR_IF(E_INVALIDARG, glyph.empty());
if (glyph.size() == 1)
{
// We first attempt to look at our custom quick lookup table of char width preferences.
const auto width = GetQuickCharWidth(glyph.front());
// If it's invalid, the quick width had no opinion, so go to the lookup table.
if (width == CodepointWidth::Invalid)
{
return _lookupIsWide(glyph);
}
// If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can.
// If not, go to the lookup table.
else if (width == CodepointWidth::Ambiguous)
{
if (_pfnFallbackMethod)
{
return _checkFallbackViaCache(glyph);
}
else
{
return _lookupIsWide(glyph);
}
}
// Otherwise, return Wide as True and Narrow as False.
else
{
return width == CodepointWidth::Wide;
}
}
else
{
return _lookupIsWide(glyph);
}
return GetWidth(glyph) == CodepointWidth::Wide;
}
// Routine Description:
// - checks if codepoint is wide using fallback methods.
// - returns the width type of codepoint by searching the map generated from the unicode spec
// Arguments:
// - glyph - the utf16 encoded codepoint to search for
// Return Value:
// - the width type of the codepoint
CodepointWidth CodepointWidthDetector::_lookupGlyphWidth(const std::wstring_view glyph) const
{
if (glyph.empty())
{
return CodepointWidth::Invalid;
}
const auto codepoint = _extractCodepoint(glyph);
const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint);
// For characters that are not _in_ the table, lower_bound will return the nearest item that is.
// We must check its bounds to make sure that our hit was a true hit.
if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound)
{
return it->width;
}
return CodepointWidth::Narrow;
}
// Routine Description:
// - returns the width type of codepoint using fallback methods.
// Arguments:
// - glyph - the utf16 encoded codepoint to check width of
// Return Value:
// - true if codepoint is wide or if it can't be confirmed to be narrow
bool CodepointWidthDetector::_lookupIsWide(const std::wstring_view glyph) const noexcept
// - the width type of the codepoint
CodepointWidth CodepointWidthDetector::_lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept
{
try
{
// Use our generated table to try to lookup the width based on the Unicode standard.
const CodepointWidth width = GetWidth(glyph);
const CodepointWidth width = _lookupGlyphWidth(glyph);
// If it's ambiguous, then ask the font if we can.
if (width == CodepointWidth::Ambiguous)
{
if (_pfnFallbackMethod)
{
return _checkFallbackViaCache(glyph);
return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous;
}
else
{
return CodepointWidth::Ambiguous;
}
}
// If it's not ambiguous, it should say wide or narrow. Turn that into True = Wide or False = Narrow.
// If it's not ambiguous, it should say wide or narrow.
else
{
return width == CodepointWidth::Wide;
return width;
}
}
CATCH_LOG();
// If we got this far, we couldn't figure it out.
// It's better to be too wide than too narrow.
return true;
return CodepointWidth::Wide;
}
// Routine Description:

View file

@ -41,7 +41,8 @@ public:
#endif
private:
bool _lookupIsWide(const std::wstring_view glyph) const noexcept;
CodepointWidth _lookupGlyphWidth(const std::wstring_view glyph) const;
CodepointWidth _lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept;
bool _checkFallbackViaCache(const std::wstring_view glyph) const;
static unsigned int _extractCodepoint(const std::wstring_view glyph) noexcept;