Move Attr, Cwid, Data into nested type. Unify Damage interface to support returning damage for one column (replace off+size with min/max damage) and for multiple cols.

This commit is contained in:
Dustin Howett 2021-07-27 18:46:44 -05:00
parent 58ad0a34fc
commit 596a8155ca
2 changed files with 156 additions and 148 deletions

View file

@ -16,13 +16,15 @@
// Return Value:
// - constructed object
ROW::ROW(const SHORT /*rowId*/, const unsigned short rowWidth, const TextAttribute fillAttribute, TextBuffer* const /*pParent*/) :
_attrRow{ rowWidth, fillAttribute },
_data{
std::wstring(rowWidth, UNICODE_SPACE),
{ { gsl::narrow_cast<uint8_t>(1), gsl::narrow_cast<uint16_t>(rowWidth) } },
{ rowWidth, fillAttribute },
rowWidth
},
_lineRendition{ LineRendition::SingleWidth },
_wrapForced{ false },
_doubleBytePadded{ false },
_rowWidth(rowWidth),
_cwid(_rowWidth, 1),
_data(_rowWidth, UNICODE_SPACE)
_doubleBytePadded{ false }
{
}
@ -37,11 +39,11 @@ bool ROW::Reset(const TextAttribute Attr)
_lineRendition = LineRendition::SingleWidth;
_wrapForced = false;
_doubleBytePadded = false;
_cwid.replace(0, _rowWidth, { 1, _rowWidth }); // replace entire RLE with one run
_data.replace(0, _rowWidth, _rowWidth, UNICODE_SPACE);
_data._cwid.replace(0, _data._width, { 1, _data._width }); // replace entire RLE with one run
_data._data.replace(0, _data._width, _data._width, UNICODE_SPACE);
try
{
_attrRow.Reset(Attr);
_data._attrRow.Reset(Attr);
}
catch (...)
{
@ -59,20 +61,20 @@ bool ROW::Reset(const TextAttribute Attr)
// - S_OK if successful, otherwise relevant error
[[nodiscard]] HRESULT ROW::Resize(const unsigned short width)
{
_data.resize(width, L' ');
auto oldEnd{ _cwid.size() };
_cwid.resize_trailing_extent(width);
_data._data.resize(width, L' ');
auto oldEnd{ _data._cwid.size() };
_data._cwid.resize_trailing_extent(width);
if (width > oldEnd)
{
_cwid.replace(oldEnd, width, { 1, gsl::narrow_cast<uint16_t>(width - oldEnd) });
_data._cwid.replace(oldEnd, width, { 1, gsl::narrow_cast<uint16_t>(width - oldEnd) });
}
try
{
_attrRow.Resize(width);
_data._attrRow.Resize(width);
}
CATCH_RETURN();
_rowWidth = width;
_data._width = width;
return S_OK;
}
@ -85,7 +87,7 @@ bool ROW::Reset(const TextAttribute Attr)
// - <none>
void ROW::ClearColumn(const size_t column)
{
THROW_HR_IF(E_INVALIDARG, column >= _rowWidth);
THROW_HR_IF(E_INVALIDARG, column >= _data._width);
WriteGlyphAtMeasured(column, 1, L" ");
}
@ -100,11 +102,11 @@ void ROW::ClearColumn(const size_t column)
// - iterator to first cell that was not written to this row.
OutputCellIterator ROW::WriteCells(OutputCellIterator it, const size_t index, const std::optional<bool> wrap, std::optional<size_t> limitRight)
{
THROW_HR_IF(E_INVALIDARG, index >= _rowWidth);
THROW_HR_IF(E_INVALIDARG, limitRight.value_or(0) >= _rowWidth);
THROW_HR_IF(E_INVALIDARG, index >= _data._width);
THROW_HR_IF(E_INVALIDARG, limitRight.value_or(0) >= _data._width);
// If we're given a right-side column limit, use it. Otherwise, the write limit is the final column index available in the char row.
const auto finalColumnInRow = limitRight.value_or(_rowWidth - 1);
const auto finalColumnInRow = limitRight.value_or(_data._width - 1);
auto currentColor = it->TextAttr();
uint16_t colorUses = 0;
@ -126,7 +128,7 @@ OutputCellIterator ROW::WriteCells(OutputCellIterator it, const size_t index, co
{
// Otherwise, commit this color into the run and save off the new one.
// Now commit the new color runs into the attr row.
_attrRow.Replace(colorStarts, currentIndex, currentColor);
_data._attrRow.Replace(colorStarts, currentIndex, currentColor);
currentColor = it->TextAttr();
colorUses = 1;
colorStarts = currentIndex;
@ -196,7 +198,7 @@ OutputCellIterator ROW::WriteCells(OutputCellIterator it, const size_t index, co
// Now commit the final color into the attr row
if (colorUses)
{
_attrRow.Replace(colorStarts, currentIndex, currentColor);
_data._attrRow.Replace(colorStarts, currentIndex, currentColor);
}
return it;

View file

@ -42,7 +42,7 @@ class ROW final
public:
ROW(const SHORT rowId, const unsigned short rowWidth, const TextAttribute fillAttribute, TextBuffer* const pParent);
size_t size() const noexcept { return _rowWidth; }
size_t size() const noexcept { return _data._width; }
void SetWrapForced(const bool wrap) noexcept { _wrapForced = wrap; }
bool WasWrapForced() const noexcept { return _wrapForced; }
@ -50,8 +50,8 @@ public:
void SetDoubleBytePadded(const bool doubleBytePadded) noexcept { _doubleBytePadded = doubleBytePadded; }
bool WasDoubleBytePadded() const noexcept { return _doubleBytePadded; }
const ATTR_ROW& GetAttrRow() const noexcept { return _attrRow; }
ATTR_ROW& GetAttrRow() noexcept { return _attrRow; }
const ATTR_ROW& GetAttrRow() const noexcept { return _data._attrRow; }
ATTR_ROW& GetAttrRow() noexcept { return _data._attrRow; }
LineRendition GetLineRendition() const noexcept { return _lineRendition; }
void SetLineRendition(const LineRendition lineRendition) noexcept { _lineRendition = lineRendition; }
@ -60,7 +60,7 @@ public:
[[nodiscard]] HRESULT Resize(const unsigned short width);
void ClearColumn(const size_t column);
std::wstring GetText() const { return _data; }
std::wstring GetText() const { return _data._data; }
OutputCellIterator WriteCells(OutputCellIterator it, const size_t index, const std::optional<bool> wrap = std::nullopt, std::optional<size_t> limitRight = std::nullopt);
@ -69,127 +69,135 @@ public:
friend class RowTests;
#endif
struct DamageRanges
{
size_t dataOffset;
size_t dataLength;
uint16_t firstColumn;
uint16_t lastColumnExclusive;
};
struct RowData
{
std::wstring _data;
til::small_rle<uint8_t, uint16_t, 3> _cwid;
ATTR_ROW _attrRow;
unsigned short _width;
DamageRanges _damageForColumn(size_t col) const
{
size_t currentCol{ 0 };
size_t currentWchar{ 0 };
auto it{ _cwid.runs().cbegin() };
while (it != _cwid.runs().cend())
{
// Each compressed pair tells us how many columns x N wchar_t
const auto colsCoveredByRun{ it->value * it->length };
if (currentCol + colsCoveredByRun > col)
{
// We want to break out of the loop to manually handle this run, because
// we've determined that it is the run that covers the column of interest.
break;
}
currentCol += colsCoveredByRun;
currentWchar += it->length;
it++;
}
if (it == _cwid.runs().cend())
{
// this is an interesting case- somebody requested a column we cannot answer for.
// The string might actually have data, and the caller might be interested in where that data is.
// Ideally, we would return the index of the first char out-of-bounds, and the length of the remaining data as a single unit.
// We can't answer for how much space it takes up, though.
return { currentWchar, _data.size() - currentWchar, 0u, 0u };
}
// currentWchar is how many wchar_t we are into the string before processing this run
// currentCol is how many columns we've covered before processing this run
// We are *guaranteed* that the hit is in this run -- no need to check it->length
// col-currentCol is how many columns are left unaccounted for (how far into this run we need to go)
const auto colsLeftToCountInCurrentRun{ col - currentCol };
currentWchar += colsLeftToCountInCurrentRun / it->value; // one wch per column unit -- rounds down (correct behavior)
size_t lenInWchars{ 1 }; // the first hit takes up one wchar
// We use this to determine if we have exhausted every column this run can cough up.
// colsLeftToCountInCurrentRun is 0-indexed, but colsConsumedByRun is 1-indexed (index 0 consumes N columns, etc.)
// therefore, we reindex colsLeftToCountInCurrentRun and compare it to colsConsumedByRun
const auto colsConsumedFromRun{ colsLeftToCountInCurrentRun + it->value };
const auto colsCoveredByRun{ it->value * it->length };
// If we *have* consumed every column this run can provide, we must check the run after it:
// if it contributes "0" columns, it is actually a set of trailing code units.
if (colsConsumedFromRun >= colsCoveredByRun && it != _cwid.runs().cend())
{
const auto nextRunIt{ it + 1 };
if (nextRunIt != _cwid.runs().cend() && nextRunIt->value == 0)
{
// we were at the boundary of a column run, so if the next one is 0 it tells us that each
// wchar after it is a trailer
lenInWchars += nextRunIt->length;
}
}
return {
currentWchar, // wchar start
lenInWchars, // wchar size
gsl::narrow_cast<uint16_t>(col - (colsLeftToCountInCurrentRun % it->value)), // Column damage to the left (where we overlapped the right of a wide glyph)
gsl::narrow_cast<uint16_t>(col - (colsLeftToCountInCurrentRun % it->value) + it->value), // Column damage to the right (where we overlapped the left of a wide glyph)
};
}
DamageRanges _damageForColumns(size_t col, size_t ncols) const
{
// When we want to replace a column, or set of columns, with a glyph, we need to:
// * Figure out the physical extent of the character in that cell (UTF-16 code units).
// * Figure out the columnar extent of the character in that cell (how many columns it covers).
// * In the simple case (1->1, 2->2), there will be no damage.
// * In the complex case (2->1, 1->2, 2->2 with middle overlap), there *WILL* be damage.
// * Replace the physical character data in that cell with the new character data.
// * Insert padding characters to the left and right to account for damage.
//
// ## DAMAGE
// Damage is measured in the number of columns to the left
// and right of the new glyph that are now NO LONGER VALID because
// they were double-width characters that are being cut in half,
// or single-width characters that are collateral damage from stomping
// them with a double-width character.
auto damage{ _damageForColumn(col) };
while (damage.lastColumnExclusive < col + ncols)
{
auto nextDamage{ _damageForColumn(damage.lastColumnExclusive) };
// *INVARIANT* the beginning of the next column range must have a different beginning byte
// This column began at a different data index, so we have to delete its data too.
// Since it's contiguous, just increment len.
damage.dataLength += nextDamage.dataLength;
damage.lastColumnExclusive = nextDamage.lastColumnExclusive;
}
return damage;
}
};
private:
ATTR_ROW _attrRow;
RowData _data;
LineRendition _lineRendition;
unsigned short _rowWidth;
// Occurs when the user runs out of text in a given row and we're forced to wrap the cursor to the next line
bool _wrapForced;
// Occurs when the user runs out of text to support a double byte character and we're forced to the next line
bool _doubleBytePadded;
struct ColumnLookupResult
{
size_t dataOffset;
size_t dataLength;
uint8_t columnOffsetWithinGlyph;
uint8_t numberOfColumns;
};
public:
std::wstring _data;
til::small_rle<uint8_t, uint16_t, 3> _cwid;
ColumnLookupResult _indicesForCol(size_t col) const
{
size_t currentCol{ 0 };
size_t currentWchar{ 0 };
auto it{ _cwid.runs().cbegin() };
while (it != _cwid.runs().cend())
{
// Each compressed pair tells us how many columns x N wchar_t
const auto colsCoveredByRun{ it->value * it->length };
if (currentCol + colsCoveredByRun > col)
{
// We want to break out of the loop to manually handle this run, because
// we've determined that it is the run that covers the column of interest.
break;
}
currentCol += colsCoveredByRun;
currentWchar += it->length;
it++;
}
if (it == _cwid.runs().cend())
{
// this is an interesting case- somebody requested a column we cannot answer for.
// The string might actually have data, and the caller might be interested in where that data is.
// Ideally, we would return the index of the first char out-of-bounds, and the length of the remaining data as a single unit.
// We can't answer for how much space it takes up, though.
return { currentWchar, _data.size() - currentWchar, 0u, 0u };
}
// currentWchar is how many wchar_t we are into the string before processing this run
// currentCol is how many columns we've covered before processing this run
// We are *guaranteed* that the hit is in this run -- no need to check it->length
// col-currentCol is how many columns are left unaccounted for (how far into this run we need to go)
const auto colsLeftToCountInCurrentRun{ col - currentCol };
currentWchar += colsLeftToCountInCurrentRun / it->value; // one wch per column unit -- rounds down (correct behavior)
size_t lenInWchars{ 1 }; // the first hit takes up one wchar
// We use this to determine if we have exhausted every column this run can cough up.
// colsLeftToCountInCurrentRun is 0-indexed, but colsConsumedByRun is 1-indexed (index 0 consumes N columns, etc.)
// therefore, we reindex colsLeftToCountInCurrentRun and compare it to colsConsumedByRun
const auto colsConsumedFromRun{ colsLeftToCountInCurrentRun + it->value };
const auto colsCoveredByRun{ it->value * it->length };
// If we *have* consumed every column this run can provide, we must check the run after it:
// if it contributes "0" columns, it is actually a set of trailing code units.
if (colsConsumedFromRun >= colsCoveredByRun && it != _cwid.runs().cend())
{
const auto nextRunIt{ it + 1 };
if (nextRunIt != _cwid.runs().cend() && nextRunIt->value == 0)
{
// we were at the boundary of a column run, so if the next one is 0 it tells us that each
// wchar after it is a trailer
lenInWchars += nextRunIt->length;
}
}
return {
currentWchar, // wchar start
lenInWchars, // wchar size
colsLeftToCountInCurrentRun % it->value, // how far into the wide glyph we were (if we are partway through a 2-wide or 3-wide glyph)
it->value // how many columns is the thing we hit?
};
}
public:
std::wstring_view GlyphAt(size_t col) const
{
const auto lookup{ _indicesForCol(col) };
return { _data.data() + lookup.dataOffset, lookup.dataLength };
const auto lookup{ _data._damageForColumn(col) };
return { _data._data.data() + lookup.dataOffset, lookup.dataLength };
}
std::pair<size_t, size_t> WriteGlyphAtMeasured(size_t col, size_t ncols, std::wstring_view glyph)
{
// When we want to replace a column, or set of columns, with a glyph, we need to:
// * Figure out the physical extent of the character in that cell (UTF-16 code units).
// * Figure out the columnar extent of the character in that cell (how many columns it covers).
// * In the simple case (1->1, 2->2), there will be no damage.
// * In the complex case (2->1, 1->2, 2->2 with middle overlap), there *WILL* be damage.
// * Replace the physical character data in that cell with the new character data.
// * Insert padding characters to the left and right to account for damage.
//
// ## DAMAGE
// Damage is measured in the number of columns to the left
// and right of the new glyph that are now NO LONGER VALID because
// they were double-width characters that are being cut in half,
// or single-width characters that are collateral damage from stomping
// them with a double-width character.
auto [begin, len, off, cols]{ _indicesForCol(col) };
const auto minDamageColumn{ col - off }; // Column damage to the left (where we overlapped the right of a wide glyph)
auto maxDamageColumnExclusive{ minDamageColumn + cols }; // Column damage to the right (where we overlapped the left of a wide glyph)
while (maxDamageColumnExclusive < col + ncols)
{
auto [nbegin, nlen, noff, newcols]{ _indicesForCol(maxDamageColumnExclusive) };
// *INVARIANT* the beginning of the next column range must have a different beginning byte
// This column began at a different data index, so we have to delete its data too.
// Since it's contiguous, just increment len.
len += nlen;
maxDamageColumnExclusive += newcols;
}
const auto [begin, len, minDamageColumn, maxDamageColumnExclusive]{ _data._damageForColumns(col, ncols) };
if (minDamageColumn == col && maxDamageColumnExclusive == col + ncols)
{
@ -197,12 +205,12 @@ public:
// We can replace the code units in the data directly, and we can replace the
// column counts with [col, 0, 0...] (with as many zeroes as we need to account
// for any code units past the first.)
_data.replace(begin, len, glyph);
typename decltype(_cwid)::rle_type newRuns[]{
_data._data.replace(begin, len, glyph);
typename decltype(_data._cwid)::rle_type newRuns[]{
{ gsl::narrow_cast<uint8_t>(ncols), 1 },
{ 0, gsl::narrow_cast<uint16_t>(glyph.size() - 1) },
};
_cwid.replace(gsl::narrow_cast<uint16_t>(begin), gsl::narrow_cast<uint16_t>(begin + len), gsl::make_span(&newRuns[0], glyph.size() == 1 ? 1 : 2));
_data._cwid.replace(gsl::narrow_cast<uint16_t>(begin), gsl::narrow_cast<uint16_t>(begin + len), gsl::make_span(&newRuns[0], glyph.size() == 1 ? 1 : 2));
}
else
{
@ -226,7 +234,7 @@ public:
// is one column wide. We have
// to insert [1]s for each
// damaged column.
boost::container::small_vector<typename decltype(_cwid)::rle_type, 4> newRuns;
boost::container::small_vector<typename decltype(_data._cwid)::rle_type, 4> newRuns;
if (col - minDamageColumn)
{
newRuns.emplace_back((uint8_t)1, gsl::narrow_cast<uint16_t>(col - minDamageColumn));
@ -240,12 +248,8 @@ public:
{
newRuns.emplace_back((uint8_t)1, gsl::narrow_cast<uint16_t>(maxDamageColumnExclusive - (col + ncols)));
}
_data.replace(begin, len, replacement);
_cwid.replace(gsl::narrow_cast<uint16_t>(begin), gsl::narrow_cast<uint16_t>(begin + len), gsl::make_span(newRuns));
}
if (_cwid.size() != _data.size())
{
_cwid.resize_trailing_extent(gsl::narrow_cast<uint16_t>(_data.size()));
_data._data.replace(begin, len, replacement);
_data._cwid.replace(gsl::narrow_cast<uint16_t>(begin), gsl::narrow_cast<uint16_t>(begin + len), gsl::make_span(newRuns));
}
// Distance from requested column to final
@ -255,20 +259,22 @@ public:
DbcsAttribute DbcsAttrAt(size_t col) const
{
auto [begin, len, off, ncols] = _indicesForCol(col);
if (ncols == 1)
const auto [begin, len, first, lastE] = _data._damageForColumn(col);
if (lastE - first == 1)
{
// The glyph under this column is only onw column wide.
return DbcsAttribute{ DbcsAttribute::Attribute::Single };
}
else if (off >= 1)
else if (first != col)
{
// The glyph under this column is >1 col wide, and we're bisecting it
return DbcsAttribute{ DbcsAttribute::Attribute::Trailing };
}
else if (off == 0)
else
{
// The glyph under this column is >1 col wide, and we're at the head
return DbcsAttribute{ DbcsAttribute::Attribute::Leading };
}
return DbcsAttribute{ DbcsAttribute::Attribute::Single };
}
// Method Description:
@ -281,7 +287,7 @@ public:
// - the delimiter class for the given char
const DelimiterClass DelimiterClassAt(const size_t column, const std::wstring_view wordDelimiters) const
{
THROW_HR_IF(E_INVALIDARG, column >= _rowWidth);
THROW_HR_IF(E_INVALIDARG, column >= _data._width);
const auto glyph = *GlyphAt(column).begin();
if (glyph <= UNICODE_SPACE)
@ -298,7 +304,7 @@ public:
}
}
size_t _maxc{};
uint16_t _maxc{};
size_t MeasureRight() const
{
return _maxc;