Move Attr, Cwid, Data into nested type. Unify Damage interface to support returning damage for one column (replace off+size with min/max damage) and for multiple cols.

2021-07-27 18:46:44 -05:00 · 2021-07-27 18:46:44 -05:00 · 596a8155ca
parent 58ad0a34fc
commit 596a8155ca
2 changed files with 156 additions and 148 deletions
--- a/src/buffer/out/Row.cpp
+++ b/src/buffer/out/Row.cpp
@ -16,13 +16,15 @@
 // Return Value:
 // - constructed object
 ROW::ROW(const SHORT /*rowId*/, const unsigned short rowWidth, const TextAttribute fillAttribute, TextBuffer* const /*pParent*/) :
-    _attrRow{ rowWidth, fillAttribute },
+    _data{
+        std::wstring(rowWidth, UNICODE_SPACE),
+        { { gsl::narrow_cast<uint8_t>(1), gsl::narrow_cast<uint16_t>(rowWidth) } },
+        { rowWidth, fillAttribute },
+        rowWidth
+    },
    _lineRendition{ LineRendition::SingleWidth },
    _wrapForced{ false },
-    _doubleBytePadded{ false },
-    _rowWidth(rowWidth),
-    _cwid(_rowWidth, 1),
-    _data(_rowWidth, UNICODE_SPACE)
+    _doubleBytePadded{ false }
 {
 }

@ -37,11 +39,11 @@ bool ROW::Reset(const TextAttribute Attr)
    _lineRendition = LineRendition::SingleWidth;
    _wrapForced = false;
    _doubleBytePadded = false;
-    _cwid.replace(0, _rowWidth, { 1, _rowWidth }); // replace entire RLE with one run
-    _data.replace(0, _rowWidth, _rowWidth, UNICODE_SPACE);
+    _data._cwid.replace(0, _data._width, { 1, _data._width }); // replace entire RLE with one run
+    _data._data.replace(0, _data._width, _data._width, UNICODE_SPACE);
    try
    {
-        _attrRow.Reset(Attr);
+        _data._attrRow.Reset(Attr);
    }
    catch (...)
    {
@ -59,20 +61,20 @@ bool ROW::Reset(const TextAttribute Attr)
 // - S_OK if successful, otherwise relevant error
 [[nodiscard]] HRESULT ROW::Resize(const unsigned short width)
 {
-    _data.resize(width, L' ');
-    auto oldEnd{ _cwid.size() };
-    _cwid.resize_trailing_extent(width);
+    _data._data.resize(width, L' ');
+    auto oldEnd{ _data._cwid.size() };
+    _data._cwid.resize_trailing_extent(width);
    if (width > oldEnd)
    {
-        _cwid.replace(oldEnd, width, { 1, gsl::narrow_cast<uint16_t>(width - oldEnd) });
+        _data._cwid.replace(oldEnd, width, { 1, gsl::narrow_cast<uint16_t>(width - oldEnd) });
    }
    try
    {
-        _attrRow.Resize(width);
+        _data._attrRow.Resize(width);
    }
    CATCH_RETURN();

-    _rowWidth = width;
+    _data._width = width;

    return S_OK;
 }
@ -85,7 +87,7 @@ bool ROW::Reset(const TextAttribute Attr)
 // - <none>
 void ROW::ClearColumn(const size_t column)
 {
-    THROW_HR_IF(E_INVALIDARG, column >= _rowWidth);
+    THROW_HR_IF(E_INVALIDARG, column >= _data._width);
    WriteGlyphAtMeasured(column, 1, L" ");
 }

@ -100,11 +102,11 @@ void ROW::ClearColumn(const size_t column)
 // - iterator to first cell that was not written to this row.
 OutputCellIterator ROW::WriteCells(OutputCellIterator it, const size_t index, const std::optional<bool> wrap, std::optional<size_t> limitRight)
 {
-    THROW_HR_IF(E_INVALIDARG, index >= _rowWidth);
-    THROW_HR_IF(E_INVALIDARG, limitRight.value_or(0) >= _rowWidth);
+    THROW_HR_IF(E_INVALIDARG, index >= _data._width);
+    THROW_HR_IF(E_INVALIDARG, limitRight.value_or(0) >= _data._width);

    // If we're given a right-side column limit, use it. Otherwise, the write limit is the final column index available in the char row.
-    const auto finalColumnInRow = limitRight.value_or(_rowWidth - 1);
+    const auto finalColumnInRow = limitRight.value_or(_data._width - 1);

    auto currentColor = it->TextAttr();
    uint16_t colorUses = 0;
@ -126,7 +128,7 @@ OutputCellIterator ROW::WriteCells(OutputCellIterator it, const size_t index, co
            {
                // Otherwise, commit this color into the run and save off the new one.
                // Now commit the new color runs into the attr row.
-                _attrRow.Replace(colorStarts, currentIndex, currentColor);
+                _data._attrRow.Replace(colorStarts, currentIndex, currentColor);
                currentColor = it->TextAttr();
                colorUses = 1;
                colorStarts = currentIndex;
@ -196,7 +198,7 @@ OutputCellIterator ROW::WriteCells(OutputCellIterator it, const size_t index, co
    // Now commit the final color into the attr row
    if (colorUses)
    {
-        _attrRow.Replace(colorStarts, currentIndex, currentColor);
+        _data._attrRow.Replace(colorStarts, currentIndex, currentColor);
    }

    return it;
--- a/src/buffer/out/Row.hpp
+++ b/src/buffer/out/Row.hpp
@ -42,7 +42,7 @@ class ROW final
 public:
    ROW(const SHORT rowId, const unsigned short rowWidth, const TextAttribute fillAttribute, TextBuffer* const pParent);

-    size_t size() const noexcept { return _rowWidth; }
+    size_t size() const noexcept { return _data._width; }

    void SetWrapForced(const bool wrap) noexcept { _wrapForced = wrap; }
    bool WasWrapForced() const noexcept { return _wrapForced; }
@ -50,8 +50,8 @@ public:
    void SetDoubleBytePadded(const bool doubleBytePadded) noexcept { _doubleBytePadded = doubleBytePadded; }
    bool WasDoubleBytePadded() const noexcept { return _doubleBytePadded; }

-    const ATTR_ROW& GetAttrRow() const noexcept { return _attrRow; }
-    ATTR_ROW& GetAttrRow() noexcept { return _attrRow; }
+    const ATTR_ROW& GetAttrRow() const noexcept { return _data._attrRow; }
+    ATTR_ROW& GetAttrRow() noexcept { return _data._attrRow; }

    LineRendition GetLineRendition() const noexcept { return _lineRendition; }
    void SetLineRendition(const LineRendition lineRendition) noexcept { _lineRendition = lineRendition; }
@ -60,7 +60,7 @@ public:
    [[nodiscard]] HRESULT Resize(const unsigned short width);

    void ClearColumn(const size_t column);
-    std::wstring GetText() const { return _data; }
+    std::wstring GetText() const { return _data._data; }

    OutputCellIterator WriteCells(OutputCellIterator it, const size_t index, const std::optional<bool> wrap = std::nullopt, std::optional<size_t> limitRight = std::nullopt);

@ -69,127 +69,135 @@ public:
    friend class RowTests;
 #endif

+    struct DamageRanges
+    {
+        size_t dataOffset;
+        size_t dataLength;
+        uint16_t firstColumn;
+        uint16_t lastColumnExclusive;
+    };
+
+    struct RowData
+    {
+        std::wstring _data;
+        til::small_rle<uint8_t, uint16_t, 3> _cwid;
+        ATTR_ROW _attrRow;
+        unsigned short _width;
+
+        DamageRanges _damageForColumn(size_t col) const
+        {
+            size_t currentCol{ 0 };
+            size_t currentWchar{ 0 };
+            auto it{ _cwid.runs().cbegin() };
+            while (it != _cwid.runs().cend())
+            {
+                // Each compressed pair tells us how many columns x N wchar_t
+                const auto colsCoveredByRun{ it->value * it->length };
+                if (currentCol + colsCoveredByRun > col)
+                {
+                    // We want to break out of the loop to manually handle this run, because
+                    // we've determined that it is the run that covers the column of interest.
+                    break;
+                }
+                currentCol += colsCoveredByRun;
+                currentWchar += it->length;
+                it++;
+            }
+
+            if (it == _cwid.runs().cend())
+            {
+                // this is an interesting case- somebody requested a column we cannot answer for.
+                // The string might actually have data, and the caller might be interested in where that data is.
+                // Ideally, we would return the index of the first char out-of-bounds, and the length of the remaining data as a single unit.
+                // We can't answer for how much space it takes up, though.
+                return { currentWchar, _data.size() - currentWchar, 0u, 0u };
+            }
+            // currentWchar is how many wchar_t we are into the string before processing this run
+            // currentCol is how many columns we've covered before processing this run
+
+            // We are *guaranteed* that the hit is in this run -- no need to check it->length
+            // col-currentCol is how many columns are left unaccounted for (how far into this run we need to go)
+            const auto colsLeftToCountInCurrentRun{ col - currentCol };
+            currentWchar += colsLeftToCountInCurrentRun / it->value; // one wch per column unit -- rounds down (correct behavior)
+
+            size_t lenInWchars{ 1 }; // the first hit takes up one wchar
+
+            // We use this to determine if we have exhausted every column this run can cough up.
+            // colsLeftToCountInCurrentRun is 0-indexed, but colsConsumedByRun is 1-indexed (index 0 consumes N columns, etc.)
+            // therefore, we reindex colsLeftToCountInCurrentRun and compare it to colsConsumedByRun
+            const auto colsConsumedFromRun{ colsLeftToCountInCurrentRun + it->value };
+            const auto colsCoveredByRun{ it->value * it->length };
+            // If we *have* consumed every column this run can provide, we must check the run after it:
+            // if it contributes "0" columns, it is actually a set of trailing code units.
+            if (colsConsumedFromRun >= colsCoveredByRun && it != _cwid.runs().cend())
+            {
+                const auto nextRunIt{ it + 1 };
+                if (nextRunIt != _cwid.runs().cend() && nextRunIt->value == 0)
+                {
+                    // we were at the boundary of a column run, so if the next one is 0 it tells us that each
+                    // wchar after it is a trailer
+                    lenInWchars += nextRunIt->length;
+                }
+            }
+
+            return {
+                currentWchar, // wchar start
+                lenInWchars, // wchar size
+                gsl::narrow_cast<uint16_t>(col - (colsLeftToCountInCurrentRun % it->value)), // Column damage to the left (where we overlapped the right of a wide glyph)
+                gsl::narrow_cast<uint16_t>(col - (colsLeftToCountInCurrentRun % it->value) + it->value), // Column damage to the right (where we overlapped the left of a wide glyph)
+            };
+        }
+
+        DamageRanges _damageForColumns(size_t col, size_t ncols) const
+        {
+            // When we want to replace a column, or set of columns, with a glyph, we need to:
+            // * Figure out the physical extent of the character in that cell (UTF-16 code units).
+            // * Figure out the columnar extent of the character in that cell (how many columns it covers).
+            //  * In the simple case (1->1, 2->2), there will be no damage.
+            //  * In the complex case (2->1, 1->2, 2->2 with middle overlap), there *WILL* be damage.
+            // * Replace the physical character data in that cell with the new character data.
+            // * Insert padding characters to the left and right to account for damage.
+            //
+            // ## DAMAGE
+            // Damage is measured in the number of columns to the left
+            // and right of the new glyph that are now NO LONGER VALID because
+            // they were double-width characters that are being cut in half,
+            // or single-width characters that are collateral damage from stomping
+            // them with a double-width character.
+            auto damage{ _damageForColumn(col) };
+
+            while (damage.lastColumnExclusive < col + ncols)
+            {
+                auto nextDamage{ _damageForColumn(damage.lastColumnExclusive) };
+                // *INVARIANT* the beginning of the next column range must have a different beginning byte
+                // This column began at a different data index, so we have to delete its data too.
+                // Since it's contiguous, just increment len.
+                damage.dataLength += nextDamage.dataLength;
+                damage.lastColumnExclusive = nextDamage.lastColumnExclusive;
+            }
+
+            return damage;
+        }
+    };
+
 private:
-    ATTR_ROW _attrRow;
+    RowData _data;
    LineRendition _lineRendition;
-    unsigned short _rowWidth;
    // Occurs when the user runs out of text in a given row and we're forced to wrap the cursor to the next line
    bool _wrapForced;
    // Occurs when the user runs out of text to support a double byte character and we're forced to the next line
    bool _doubleBytePadded;

-    struct ColumnLookupResult
-    {
-        size_t dataOffset;
-        size_t dataLength;
-        uint8_t columnOffsetWithinGlyph;
-        uint8_t numberOfColumns;
-    };
-
-public:
-    std::wstring _data;
-    til::small_rle<uint8_t, uint16_t, 3> _cwid;
-
-    ColumnLookupResult _indicesForCol(size_t col) const
-    {
-        size_t currentCol{ 0 };
-        size_t currentWchar{ 0 };
-        auto it{ _cwid.runs().cbegin() };
-        while (it != _cwid.runs().cend())
-        {
-            // Each compressed pair tells us how many columns x N wchar_t
-            const auto colsCoveredByRun{ it->value * it->length };
-            if (currentCol + colsCoveredByRun > col)
-            {
-                // We want to break out of the loop to manually handle this run, because
-                // we've determined that it is the run that covers the column of interest.
-                break;
-            }
-            currentCol += colsCoveredByRun;
-            currentWchar += it->length;
-            it++;
-        }
-
-        if (it == _cwid.runs().cend())
-        {
-            // this is an interesting case- somebody requested a column we cannot answer for.
-            // The string might actually have data, and the caller might be interested in where that data is.
-            // Ideally, we would return the index of the first char out-of-bounds, and the length of the remaining data as a single unit.
-            // We can't answer for how much space it takes up, though.
-            return { currentWchar, _data.size() - currentWchar, 0u, 0u };
-        }
-        // currentWchar is how many wchar_t we are into the string before processing this run
-        // currentCol is how many columns we've covered before processing this run
-
-        // We are *guaranteed* that the hit is in this run -- no need to check it->length
-        // col-currentCol is how many columns are left unaccounted for (how far into this run we need to go)
-        const auto colsLeftToCountInCurrentRun{ col - currentCol };
-        currentWchar += colsLeftToCountInCurrentRun / it->value; // one wch per column unit -- rounds down (correct behavior)
-
-        size_t lenInWchars{ 1 }; // the first hit takes up one wchar
-
-        // We use this to determine if we have exhausted every column this run can cough up.
-        // colsLeftToCountInCurrentRun is 0-indexed, but colsConsumedByRun is 1-indexed (index 0 consumes N columns, etc.)
-        // therefore, we reindex colsLeftToCountInCurrentRun and compare it to colsConsumedByRun
-        const auto colsConsumedFromRun{ colsLeftToCountInCurrentRun + it->value };
-        const auto colsCoveredByRun{ it->value * it->length };
-        // If we *have* consumed every column this run can provide, we must check the run after it:
-        // if it contributes "0" columns, it is actually a set of trailing code units.
-        if (colsConsumedFromRun >= colsCoveredByRun && it != _cwid.runs().cend())
-        {
-            const auto nextRunIt{ it + 1 };
-            if (nextRunIt != _cwid.runs().cend() && nextRunIt->value == 0)
-            {
-                // we were at the boundary of a column run, so if the next one is 0 it tells us that each
-                // wchar after it is a trailer
-                lenInWchars += nextRunIt->length;
-            }
-        }
-
-        return {
-            currentWchar, // wchar start
-            lenInWchars, // wchar size
-            colsLeftToCountInCurrentRun % it->value, // how far into the wide glyph we were (if we are partway through a 2-wide or 3-wide glyph)
-            it->value // how many columns is the thing we hit?
-        };
-    }
-
 public:
    std::wstring_view GlyphAt(size_t col) const
    {
-        const auto lookup{ _indicesForCol(col) };
-        return { _data.data() + lookup.dataOffset, lookup.dataLength };
+        const auto lookup{ _data._damageForColumn(col) };
+        return { _data._data.data() + lookup.dataOffset, lookup.dataLength };
    }

    std::pair<size_t, size_t> WriteGlyphAtMeasured(size_t col, size_t ncols, std::wstring_view glyph)
    {
-        // When we want to replace a column, or set of columns, with a glyph, we need to:
-        // * Figure out the physical extent of the character in that cell (UTF-16 code units).
-        // * Figure out the columnar extent of the character in that cell (how many columns it covers).
-        //  * In the simple case (1->1, 2->2), there will be no damage.
-        //  * In the complex case (2->1, 1->2, 2->2 with middle overlap), there *WILL* be damage.
-        // * Replace the physical character data in that cell with the new character data.
-        // * Insert padding characters to the left and right to account for damage.
-        //
-        // ## DAMAGE
-        // Damage is measured in the number of columns to the left
-        // and right of the new glyph that are now NO LONGER VALID because
-        // they were double-width characters that are being cut in half,
-        // or single-width characters that are collateral damage from stomping
-        // them with a double-width character.
-        auto [begin, len, off, cols]{ _indicesForCol(col) };
-        const auto minDamageColumn{ col - off }; // Column damage to the left (where we overlapped the right of a wide glyph)
-        auto maxDamageColumnExclusive{ minDamageColumn + cols }; // Column damage to the right (where we overlapped the left of a wide glyph)
-
-        while (maxDamageColumnExclusive < col + ncols)
-        {
-            auto [nbegin, nlen, noff, newcols]{ _indicesForCol(maxDamageColumnExclusive) };
-            // *INVARIANT* the beginning of the next column range must have a different beginning byte
-            // This column began at a different data index, so we have to delete its data too.
-            // Since it's contiguous, just increment len.
-            len += nlen;
-            maxDamageColumnExclusive += newcols;
-        }
+        const auto [begin, len, minDamageColumn, maxDamageColumnExclusive]{ _data._damageForColumns(col, ncols) };

        if (minDamageColumn == col && maxDamageColumnExclusive == col + ncols)
        {
@ -197,12 +205,12 @@ public:
            // We can replace the code units in the data directly, and we can replace the
            // column counts with [col, 0, 0...] (with as many zeroes as we need to account
            // for any code units past the first.)
-            _data.replace(begin, len, glyph);
-            typename decltype(_cwid)::rle_type newRuns[]{
+            _data._data.replace(begin, len, glyph);
+            typename decltype(_data._cwid)::rle_type newRuns[]{
                { gsl::narrow_cast<uint8_t>(ncols), 1 },
                { 0, gsl::narrow_cast<uint16_t>(glyph.size() - 1) },
            };
-            _cwid.replace(gsl::narrow_cast<uint16_t>(begin), gsl::narrow_cast<uint16_t>(begin + len), gsl::make_span(&newRuns[0], glyph.size() == 1 ? 1 : 2));
+            _data._cwid.replace(gsl::narrow_cast<uint16_t>(begin), gsl::narrow_cast<uint16_t>(begin + len), gsl::make_span(&newRuns[0], glyph.size() == 1 ? 1 : 2));
        }
        else
        {
@ -226,7 +234,7 @@ public:
            //  is one column wide. We have
            //  to insert [1]s for each
            //  damaged column.
-            boost::container::small_vector<typename decltype(_cwid)::rle_type, 4> newRuns;
+            boost::container::small_vector<typename decltype(_data._cwid)::rle_type, 4> newRuns;
            if (col - minDamageColumn)
            {
                newRuns.emplace_back((uint8_t)1, gsl::narrow_cast<uint16_t>(col - minDamageColumn));
@ -240,12 +248,8 @@ public:
            {
                newRuns.emplace_back((uint8_t)1, gsl::narrow_cast<uint16_t>(maxDamageColumnExclusive - (col + ncols)));
            }
-            _data.replace(begin, len, replacement);
-            _cwid.replace(gsl::narrow_cast<uint16_t>(begin), gsl::narrow_cast<uint16_t>(begin + len), gsl::make_span(newRuns));
-        }
-        if (_cwid.size() != _data.size())
-        {
-            _cwid.resize_trailing_extent(gsl::narrow_cast<uint16_t>(_data.size()));
+            _data._data.replace(begin, len, replacement);
+            _data._cwid.replace(gsl::narrow_cast<uint16_t>(begin), gsl::narrow_cast<uint16_t>(begin + len), gsl::make_span(newRuns));
        }

        // Distance from requested column to final
@ -255,20 +259,22 @@ public:

    DbcsAttribute DbcsAttrAt(size_t col) const
    {
-        auto [begin, len, off, ncols] = _indicesForCol(col);
-        if (ncols == 1)
+        const auto [begin, len, first, lastE] = _data._damageForColumn(col);
+        if (lastE - first == 1)
        {
+            // The glyph under this column is only onw column wide.
            return DbcsAttribute{ DbcsAttribute::Attribute::Single };
        }
-        else if (off >= 1)
+        else if (first != col)
        {
+            // The glyph under this column is >1 col wide, and we're bisecting it
            return DbcsAttribute{ DbcsAttribute::Attribute::Trailing };
        }
-        else if (off == 0)
+        else
        {
+            // The glyph under this column is >1 col wide, and we're at the head
            return DbcsAttribute{ DbcsAttribute::Attribute::Leading };
        }
-        return DbcsAttribute{ DbcsAttribute::Attribute::Single };
    }

    // Method Description:
@ -281,7 +287,7 @@ public:
    // - the delimiter class for the given char
    const DelimiterClass DelimiterClassAt(const size_t column, const std::wstring_view wordDelimiters) const
    {
-        THROW_HR_IF(E_INVALIDARG, column >= _rowWidth);
+        THROW_HR_IF(E_INVALIDARG, column >= _data._width);

        const auto glyph = *GlyphAt(column).begin();
        if (glyph <= UNICODE_SPACE)
@ -298,7 +304,7 @@ public:
        }
    }

-    size_t _maxc{};
+    uint16_t _maxc{};
    size_t MeasureRight() const
    {
        return _maxc;