Replace CodepointWidthDetector's runtime table with a static one (#2368)

This commit replaces CodepointWidthDetector's
dynamically-generated map with a static constexpr one that's compiled
into the binary.

It also almost totally removes the notion of an `Invalid` width. We
definitely had gaps in our character coverage where we'd report a
character as invalid, but we'd then flatten that down to `Narrow` when
asked. By combining the not-present state and the narrow state, we get
to save a significant chunk of data.

I've tested this by feeding it all 0x10FFFF codepoints (and then some)
and making sure they 100% match the old code's outputs.

|------------------------------|---------------|----------------|
| Metric                       | Then          | Now            |
|------------------------------|---------------|----------------|
| disk space                   | 56k (`.text`) | 3k (`.rdata`)  |
| runtime memory (allocations) | 1088          | 0              |
| runtime memory (bytes)       | 51k           | ~0             |
| memory behavior              | not shared    | fully shared   |
| lookup time                  | ~31ns         | ~9ns           |
| first hit penalty            | ~170000ns     | 0ns            |
| lines of code                | 1088          | 285            |
| clarity                      | extreme       | slightly worse |
|------------------------------|---------------|----------------|

I also took a moment and cleaned up a stray boolean that we didn't need.
This commit is contained in:
Dustin L. Howett (MSFT) 2019-08-16 10:54:17 -07:00 committed by GitHub
parent becdd16008
commit 16e1e29a12
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 318 additions and 1207 deletions

View file

@ -32,32 +32,12 @@ class CodepointWidthDetectorTests
{
TEST_CLASS(CodepointWidthDetectorTests);
TEST_METHOD(CodepointWidthDetectDefersMapPopulation)
{
CodepointWidthDetector widthDetector;
VERIFY_IS_TRUE(widthDetector._map.empty());
widthDetector.IsWide(UNICODE_SPACE);
VERIFY_IS_TRUE(widthDetector._map.empty());
// now force checking
widthDetector.GetWidth(emoji);
VERIFY_IS_FALSE(widthDetector._map.empty());
}
TEST_METHOD(CanLookUpEmoji)
{
CodepointWidthDetector widthDetector;
VERIFY_IS_TRUE(widthDetector.IsWide(emoji));
}
TEST_METHOD(TestUnicodeRangeCompare)
{
CodepointWidthDetector::UnicodeRangeCompare compare;
// test comparing 2 search terms
CodepointWidthDetector::UnicodeRange a{ 0x10 };
CodepointWidthDetector::UnicodeRange b{ 0x15 };
VERIFY_IS_TRUE(static_cast<bool>(compare(a, b)));
}
TEST_METHOD(CanExtractCodepoint)
{
CodepointWidthDetector widthDetector;

File diff suppressed because it is too large Load diff

View file

@ -22,79 +22,6 @@ static_assert(sizeof(unsigned int) == sizeof(wchar_t) * 2,
// use to measure the width of a codepoint
class CodepointWidthDetector final
{
protected:
// used to store range data in CodepointWidthDetector's internal map
class UnicodeRange final
{
public:
UnicodeRange(const unsigned int lowerBound,
const unsigned int upperBound) :
_lowerBound{ lowerBound },
_upperBound{ upperBound },
_isBounds{ true }
{
}
UnicodeRange(const unsigned int searchTerm) :
_lowerBound{ searchTerm },
_upperBound{ searchTerm },
_isBounds{ false }
{
}
bool IsBounds() const noexcept
{
return _isBounds;
}
unsigned int LowerBound() const
{
FAIL_FAST_IF(!_isBounds);
return _lowerBound;
}
unsigned int UpperBound() const
{
FAIL_FAST_IF(!_isBounds);
return _upperBound;
}
unsigned int SearchTerm() const
{
FAIL_FAST_IF(_isBounds);
return _lowerBound;
}
private:
unsigned int _lowerBound;
unsigned int _upperBound;
bool _isBounds;
};
// used for comparing if we've found the range that a searching UnicodeRange falls into
struct UnicodeRangeCompare final
{
bool operator()(const UnicodeRange& a, const UnicodeRange& b) const
{
if (!a.IsBounds() && b.IsBounds())
{
return a.SearchTerm() < b.LowerBound();
}
else if (a.IsBounds() && !b.IsBounds())
{
return a.UpperBound() < b.SearchTerm();
}
else if (a.IsBounds() && b.IsBounds())
{
return a.LowerBound() < b.LowerBound();
}
else
{
return a.SearchTerm() < b.SearchTerm();
}
}
};
public:
CodepointWidthDetector() = default;
CodepointWidthDetector(const CodepointWidthDetector&) = delete;
@ -115,11 +42,8 @@ public:
private:
bool _lookupIsWide(const std::wstring_view glyph) const noexcept;
bool _checkFallbackViaCache(const std::wstring_view glyph) const;
unsigned int _extractCodepoint(const std::wstring_view glyph) const noexcept;
void _populateUnicodeSearchMap();
static unsigned int _extractCodepoint(const std::wstring_view glyph) noexcept;
mutable std::map<std::wstring, bool> _fallbackCache;
std::map<UnicodeRange, CodepointWidth, UnicodeRangeCompare> _map;
std::function<bool(std::wstring_view)> _pfnFallbackMethod;
bool _hasFallback = false;
};