terminal/src/inc/til/rle.h
2021-04-27 09:27:31 -07:00

1162 lines
50 KiB
C++

// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
#pragma once
#ifdef UNIT_TESTING
class RunLengthEncodingTests;
#endif
namespace til // Terminal Implementation Library. Also: "Today I Learned"
{
namespace details
{
template<typename ParentIt>
class rle_const_iterator
{
// If you use this as a sample for your own iterator, this looks
// a bit daunting. But it's almost entirely boilerplate.
// All you actually have to fill in is:
// A. size_type might not be necessary for you. It can be inferred
// from our parent so I defined it.
// 1. value_type, pointer, reference, and difference type. These
// specify the overall types. They're generally what you want to see
// when someone does *iterator or iterator-> or it1 - it2.
// If you have half an idea of what those return types should be,
// define them at the top or better yet, infer them from the underlying
// data source.
// 2. Fill in operator*() and operator->() pointing directly at the data value.
// 3. Fill in inc() and dec(). That gives you ++it, it++, --it, and it--.
// 4. Fill in operator+=(). That gives you +=, +, -=, and -.
// ALTERNATIVE 3/4. You might be able to just define += and then feed the rest into it
// depending on your circumstance.
// 5. Fill in operator-() for a difference between two instances.
// 6. Fill in operator[] to go to the offset like an array index.
// 7. Fill in operator== for equality. Gets == and != in one shot.
// 8. Fill in operator< for comparison. Also covers > and <= and >=.
// Congrats, you have a const_iterator. Go implement the non-const
// inheriting from this. It's super simple once this is done.
private:
using size_type = typename ParentIt::value_type::second_type;
public:
using iterator_category = std::random_access_iterator_tag;
using value_type = typename ParentIt::value_type::first_type;
using pointer = const value_type*;
using reference = const value_type&;
using difference_type = typename ParentIt::difference_type;
rle_const_iterator(ParentIt it) :
_it(it),
_usage(1)
{
}
[[nodiscard]] reference operator*() const noexcept
{
return _it->first;
}
[[nodiscard]] pointer operator->() const noexcept
{
return &operator*();
}
rle_const_iterator& operator++() noexcept
{
inc();
return *this;
}
rle_const_iterator operator++(int) noexcept
{
rle_const_iterator tmp = *this;
inc();
return tmp;
}
rle_const_iterator& operator--() noexcept
{
dec();
return *this;
}
rle_const_iterator operator--(int) noexcept
{
rle_const_iterator tmp = *this;
dec();
return tmp;
}
rle_const_iterator& operator+=(const difference_type offset) noexcept
{
// TODO: Optional iterator debug
if (offset < 0) // negative direction
{
// Hold a running count of how much more we need to move.
// Flip the sign to make it just the magnitude since this
// branch is already the direction.
auto move = static_cast<difference_type>(-offset);
// While we still need to move...
while (move > 0)
{
// Check how much space we have used on this run.
// A run that is 6 long (_it->second) and
// we have addressed the 4th position (_usage, starts at 1).
// We can move to the 1st position, or 3 to the left.
const auto space = static_cast<difference_type>(_usage - 1);
// If we have enough space to move...
if (space >= move)
{
// Move the storage forward the requested distance.
_usage -= gsl::narrow_cast<decltype(_usage)>(move);
// Remove the moved distance.
move -= move;
}
// If we do NOT have enough space.
else
{
// Reduce the requested distance by the total usage
// to count "burning out" this run.
move -= _usage;
// Advance the underlying iterator.
--_it;
// Signify we're on the last position.
_usage = _it->second;
}
}
}
else // positive direction
{
// Hold a running count of how much more we need to move.
auto move = static_cast<difference_type>(offset);
// While we still need to move...
while (move > 0)
{
// Check how much space we have left on this run.
// A run that is 6 long (_it->second) and
// we have addressed the 4th position (_usage, starts at 1).
// Then there are 2 left.
const auto space = static_cast<difference_type>(_it->second - _usage);
// If we have enough space to move...
if (space >= move)
{
// Move the storage forward the requested distance.
_usage += gsl::narrow_cast<decltype(_usage)>(move);
// Remove the moved distance.
move -= move;
}
// If we do NOT have enough space.
else
{
// Reduce the requested distance by the remaining space
// to count "burning out" this run.
// + 1 more for jumping to the next list item.
move -= space + 1;
// Advance the underlying iterator.
++_it;
// Signify we're on the first position.
_usage = 1;
}
}
}
return *this;
}
[[nodiscard]] rle_const_iterator operator+(const difference_type offset) const noexcept
{
rle_const_iterator tmp = *this;
return tmp += offset;
}
rle_const_iterator& operator-=(const difference_type offset) noexcept
{
return *this += -offset;
}
[[nodiscard]] rle_const_iterator operator-(const difference_type offset) const noexcept
{
rle_const_iterator tmp = *this;
return tmp -= offset;
}
[[nodiscard]] difference_type operator-(const rle_const_iterator& right) const noexcept
{
// TODO: Optional iterator debug
// Hold the accumulation.
difference_type accumulation = 0;
// Make ourselves a copy of the right side.
auto tmp = right;
// While we're pointing to a run that is RIGHT of tmp...
while (_it > tmp._it)
{
// Add all remaining space in tmp to the accumulation.
// + 1 more for jumping to the next list item.
accumulation += tmp._it->second - tmp._usage + 1;
// Move tmp's iterator rightward.
++tmp._it;
// Set it to the first position in the run.
tmp._usage = 1;
}
// While we're pointing to a run that is LEFT of tmp...
while (_it < tmp._it)
{
// Subtract all used space in tmp from the accumulation.
accumulation -= _usage;
// Move tmp's iterator leftward.
--tmp._it;
// Set it to the last position in the run.
tmp._usage = tmp._it->second;
}
// Now both iterators should be at the same position.
// Just accumulate the difference between their usages.
accumulation += _usage - tmp._usage;
return accumulation;
}
[[nodiscard]] reference operator[](const difference_type offset) const noexcept
{
return *operator+(offset);
}
[[nodiscard]] bool operator==(const rle_const_iterator& right) const noexcept
{
// TODO: Optional iterator debug
return _it == right._it && _usage == right._usage;
}
[[nodiscard]] bool operator!=(const rle_const_iterator& right) const noexcept
{
return !(*this == right);
}
[[nodiscard]] bool operator<(const rle_const_iterator& right) const noexcept
{
// TODO: Optional iterator debug
return _it < right._it || (_it == right._it && _usage < right._usage);
}
[[nodiscard]] bool operator>(const rle_const_iterator& right) const noexcept
{
return right < *this;
}
[[nodiscard]] bool operator<=(const rle_const_iterator& right) const noexcept
{
return !(right < *this);
}
[[nodiscard]] bool operator>=(const rle_const_iterator& right) const noexcept
{
return !(*this < right);
}
private:
void inc() noexcept
{
// In this particular implementation, we need to use the advanced
// seeking logic of += for the run lengths, so don't do a shorthand
// for single increment/decrement. Forward it on.
operator+=(1);
}
void dec() noexcept
{
// In this particular implementation, we need to use the advanced
// seeking logic of += for the run lengths, so don't do a shorthand
// for single increment/decrement. Forward it on.
operator-=(1);
}
ParentIt _it;
size_type _usage;
};
template<typename ParentIt>
class rle_iterator : public rle_const_iterator<ParentIt>
{
public:
// This looks like a lot, but seriously... we're defining nothing here.
// It's literally just stripping const off of the const iterator and
// making those accessible.
// If you use this as a sample, all you have to change is:
// 1. Make it inherit correctly and align that with the template.
// 2. Fix mybase to match
// 3. value_type needs to be whatever makes sense to come off of *iterator
// 4. difference_type needs to come from somewhere else, probably.
using mybase = rle_const_iterator<ParentIt>;
using iterator_category = std::random_access_iterator_tag;
using value_type = typename ParentIt::value_type::first_type;
using pointer = value_type*;
using reference = value_type&;
using difference_type = typename ParentIt::difference_type;
// Use base's constructor.
using mybase::mybase;
[[nodiscard]] reference operator*() const noexcept
{
return const_cast<reference>(mybase::operator*());
}
[[nodiscard]] pointer operator->() const noexcept
{
return const_cast<std::remove_const_t<value_type>*>(mybase::operator->());
}
rle_iterator& operator++() noexcept
{
mybase::operator++();
return *this;
}
rle_iterator operator++(int) noexcept
{
rle_iterator tmp = *this;
mybase::operator++();
return tmp;
}
rle_iterator& operator--() noexcept
{
mybase::operator--();
return *this;
}
rle_iterator operator--(int) noexcept
{
rle_iterator tmp = *this;
mybase::operator--();
return tmp;
}
rle_iterator& operator+=(const difference_type offset) noexcept
{
mybase::operator+=(offset);
return *this;
}
[[nodiscard]] rle_iterator operator+(const difference_type offset) const noexcept
{
rle_iterator tmp = *this;
return tmp += offset;
}
rle_iterator& operator-=(const difference_type offset) noexcept
{
mybase::operator-=(offset);
return *this;
}
// Use base's difference method.
using mybase::operator-;
[[nodiscard]] rle_iterator operator-(const difference_type offset) const noexcept
{
rle_iterator tmp = *this;
return tmp -= offset;
}
[[nodiscard]] reference operator[](const difference_type offset) const noexcept
{
return const_cast<reference>(mybase::operator[](offset));
}
};
};
// Run Length Encoded data storage
// T = The type you wish to store
// S = The type of the counter value to use (max run length)
// N = (optional, default 1) The count of runs to store internally before heap alloc
template<typename T, typename S = size_t, unsigned int N = 1>
class rle
{
private:
boost::container::small_vector<std::pair<T, S>, N> _list;
S _size;
rle(boost::container::small_vector<std::pair<T, S>, N> list, S size) :
_list(list),
_size(size)
{
}
public:
//using iterator = details::rle_iterator<typename decltype(_list)::iterator>;
using const_iterator = details::rle_const_iterator<typename decltype(_list)::const_iterator>;
//using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
static const S npos = std::numeric_limits<S>::max();
rle() :
_size(static_cast<S>(0))
{
}
rle(const S size, const T value) :
_size(size)
{
fill(value);
}
// Returns the total length of all runs as encoded.
S size() const noexcept
{
return _size;
}
// Get the value at the position
T at(S position) const
{
S applies;
return at(position, applies);
}
// Get the value at the position and for how much longer it applies.
T at(S position, S& applies) const
{
THROW_HR_IF(E_INVALIDARG, position >= _size);
return _at(position, applies)->first;
}
[[nodiscard]] rle<T, S, N> substr(const S offset = 0, const S count = npos) const
{
// TODO: validate params
const S startIndex = offset;
const S endIndex = std::min(_size - offset, count) + offset - 1;
S startApplies, endApplies;
const auto firstRun{ _at(startIndex, startApplies) };
const auto lastRun{ _at(endIndex, endApplies) };
decltype(_list) substring{ firstRun, lastRun + 1};
substring.front().second = startApplies;
substring.back().second = substring.back().second - endApplies + 1;
return til::rle<T, S, N>(substring, endIndex - startIndex + 1);
}
// Replaces every value seen in the run with a new one
// Does not change the length or position of the values.
void replace(const T oldValue, const T newValue)
{
for (auto& run : _list)
{
if (run.first == oldValue)
{
run.first = newValue;
}
}
}
void replace(const S pos, const S length, const til::rle<T, S, N>& rle)
{
_merge(rle.cbegin(), rle.cend(), pos, length);
}
void replace(const S pos, const S length, const til::rle<T, S, N>& rle, S subpos, S sublen = npos)
{
const auto totalRle = rle.size();
const auto rleRemain = rle.size() - subpos;
const auto subend = sublen >= rleRemain ? rle.end() : rle.end() - (rleRemain - sublen);
const auto substart = rle.begin() + subpos;
_merge(substart, subend, pos, length);
}
void replace(const S pos, const S length, const S repeat, const T value)
{
// TODO: validate position in bounds?
std::pair<T, S> item{ value, length };
gsl::span<std::pair<T, S>> span{ &item, 1 };
_merge(span.begin(), span.end(), pos, length);
}
template<class Iter>
void replace(const S pos, const S length, Iter first, Iter last)
{
_merge(first, last, pos, length);
}
// Adjust the size of the run.
// If new size is bigger, the last value is extended to new width.
// If new size is smaller, the runs are cut to fit.
void resize(const S newSize)
{
THROW_HR_IF(E_INVALIDARG, 0 == newSize);
// Easy case. If the new row is longer, increase the length of the last run by how much new space there is.
if (newSize > _size)
{
// Get the attribute that covers the final column of old width.
auto& run = _list.back();
// Extend its length by the additional columns we're adding.
run.second = run.second + newSize - _size;
// Store that the new total width we represent is the new width.
_size = newSize;
}
// harder case: new row is shorter.
else
{
// Get the attribute that covers the final column of the new width
S applies = 0;
auto run = _at(newSize - 1, applies);
// applies was given to us as "how many columns left from this point forward are covered by the returned run"
// So if the original run was B5 covering a 5 size OldWidth and we have a newSize of 3
// then when we called FindAttrIndex, it returned the B5 as the pIndexedRun and a 2 for how many more segments it covers
// after and including the 3rd column.
// B5-2 = B3, which is what we desire to cover the new 3 size buffer.
run->second = run->second - applies + 1;
// Store that the new total width we represent is the new width.
_size = newSize;
// Erase segments after the one we just updated.
_list.erase(run + 1, _list.cend());
// NOTE: Under some circumstances here, we have leftover run segments in memory or blank run segments
// in memory. We're not going to waste time redimensioning the array in the heap. We're just noting that the useful
// portions of it have changed.
}
}
// Places this value in every position from start to end.
// If no start is specified, fills the entire list.
void fill(const T value, const S start = gsl::narrow_cast<S>(0))
{
const auto length = gsl::narrow_cast<S>(_size - start);
replace(start, length, length, value);
}
constexpr bool operator==(const rle& other) const noexcept
{
return _size == other._size &&
std::equal(_list.cbegin(), _list.cend(), other._list.cbegin());
}
constexpr bool operator!=(const rle& other) const noexcept
{
return !(*this == other);
}
/*[[nodiscard]] iterator begin() noexcept
{
return iterator(_list.begin());
}*/
[[nodiscard]] const_iterator begin() const noexcept
{
return const_iterator(_list.begin());
}
/*[[nodiscard]] iterator end() noexcept
{
return iterator(_list.end());
}*/
[[nodiscard]] const_iterator end() const noexcept
{
return const_iterator(_list.end());
}
/*[[nodiscard]] reverse_iterator rbegin() noexcept
{
return reverse_iterator(end());
}*/
[[nodiscard]] const_reverse_iterator rbegin() const noexcept
{
return const_reverse_iterator(end());
}
/*[[nodiscard]] reverse_iterator rend() noexcept
{
return reverse_iterator(begin());
}*/
[[nodiscard]] const_reverse_iterator rend() const noexcept
{
return const_reverse_iterator(begin());
}
[[nodiscard]] const_iterator cbegin() const noexcept
{
return begin();
}
[[nodiscard]] const_iterator cend() const noexcept
{
return end();
}
[[nodiscard]] const_reverse_iterator crbegin() const noexcept
{
return rbegin();
}
[[nodiscard]] const_reverse_iterator crend() const noexcept
{
return rend();
}
#ifdef UNIT_TESTING
std::wstring to_string() const
{
std::wstringstream wss;
wss << std::endl
<< L"Run of size " << size() << " contains:" << std::endl;
for (auto& item : _list)
{
wss << wil::str_printf<std::wstring>(L"[%td for %td]", item.first, item.second) << L" ";
}
wss << std::endl;
return wss.str();
}
#endif
protected:
// TODO: get Dustin help to not duplicate this for constness.
auto _at(S position, S& applies) const
{
FAIL_FAST_IF(!(position < _size)); // The requested index cannot be longer than the total length described by this set of Attrs.
S totalLength = 0;
FAIL_FAST_IF(!(_list.size() > 0)); // There should be a non-zero and positive number of items in the array.
// Scan through the internal array from position 0 adding up the lengths that each attribute applies to
auto runPos = _list.begin();
do
{
totalLength += runPos->second;
if (totalLength > position)
{
// If we've just passed up the requested position with the length we added, break early
break;
}
runPos++;
} while (runPos < _list.end());
// we should have broken before falling out the while case.
// if we didn't break, then this ATTR_ROW wasn't filled with enough attributes for the entire row of characters
FAIL_FAST_IF(runPos >= _list.end());
// The remaining iterator position is the position of the attribute that is applicable at the position requested (position)
// Calculate its remaining applicability if requested
// The length on which the found attribute applies is the total length seen so far minus the position we were searching for.
FAIL_FAST_IF(!(totalLength > position)); // The length of all attributes we counted up so far should be longer than the position requested or we'll underflow.
applies = totalLength - position;
FAIL_FAST_IF(!(applies > 0)); // An attribute applies for >0 characters
// MSFT: 17130145 - will restore this and add a better assert to catch the real issue.
//FAIL_FAST_IF(!(attrApplies <= _size)); // An attribute applies for a maximum of the total length available to us
return runPos;
}
auto _at(S position, S& applies)
{
FAIL_FAST_IF(!(position < _size)); // The requested index cannot be longer than the total length described by this set of Attrs.
S totalLength = 0;
FAIL_FAST_IF(!(_list.size() > 0)); // There should be a non-zero and positive number of items in the array.
// Scan through the internal array from position 0 adding up the lengths that each attribute applies to
auto runPos = _list.begin();
do
{
totalLength += runPos->second;
if (totalLength > position)
{
// If we've just passed up the requested position with the length we added, break early
break;
}
runPos++;
} while (runPos < _list.end());
// we should have broken before falling out the while case.
// if we didn't break, then this ATTR_ROW wasn't filled with enough attributes for the entire row of characters
FAIL_FAST_IF(runPos >= _list.end());
// The remaining iterator position is the position of the attribute that is applicable at the position requested (position)
// Calculate its remaining applicability if requested
// The length on which the found attribute applies is the total length seen so far minus the position we were searching for.
FAIL_FAST_IF(!(totalLength > position)); // The length of all attributes we counted up so far should be longer than the position requested or we'll underflow.
applies = totalLength - position;
FAIL_FAST_IF(!(applies > 0)); // An attribute applies for >0 characters
return runPos;
}
// Routine Description:
// - Combines the given "string" worth of value/length pairs into our
// existing internally stored "string" of pairs.
// Arguments:
// - first/last - input iterators over the string of pairs to store
// - startIndex - location in our existing string to insert/cover/replace with the new data
// - coverLength - number of compressed values in our internal storage to "lose" to or "cover" with the new data offset from startIndex
template<class Iter>
void _merge(Iter first,
Iter last,
const S startIndex,
const S givenCoverLength)
{
// Definitions:
// Existing Run = The run length encoded color array we're already storing in memory before this was called.
// Insert Run = The run length encoded color array that someone is asking us to inject into our stored memory run.
// New Run = The run length encoded color array that we have to allocate and rebuild to store internally
// which will replace Existing Run at the end of this function.
// Example:
// _size = 10.
// Existing Run: R3 -> G5 -> B2
// Insert Run: Y1 -> N1 at startIndex = 5 and coverLength = 2
// (first to last is a 2 length iteration with Y1->N1 in it)
// Final Run: R3 -> G2 -> Y1 -> N1 -> G1 -> B2
// How many "colors" are covered by all compressed pairs in the incoming new items list.
// e.g. Y2 -> N5 covers 7 total locations as YYNNNNNN if it were uncompressed.
const auto newItemsTotalCoverage = std::accumulate(first, last, (S)0, [](S value, auto& item) -> S {
return value + gsl::narrow_cast<S>(item.second);
});
// How many pairs are in the new items iterators
// e.g. Y2 -> N5 is 2 pairs
const auto newItemsSize = std::distance(first, last);
// If npos was specified as the cover length,
// we will presume covering until the end of the string.
const auto coverLength = givenCoverLength == npos ? _size - startIndex : givenCoverLength;
// ---
// In the simple case, we're given the same length of both and it's a directly replace.
// e.g. Y2->N5 and a `coverLength` of 7. This means we would do, for an original R3->G5->B2 and startIndex = 2...
// R3->G5->B2 is RRRGGGGGBB (10 columns total)
// Y2->N5 at 2 is YYNNNNN (7 columns covered)
// so the result RRYYNNNNNB or compressed is R2->Y2->N5->B1 (10 columns total)
// ---
// In the complicated cases, we're given a larger or smaller coverLength because the caller wants to
// either truncate out or pad out part of the data as they're covering up.
// e.g. Y2->N5 and a `coverLength` of 0. This means we would do, for an original R3->G5->B2 and startIndex = 2...
// R3->G5->B2 is RRRGGGGGBB (10 columns total)
// Y2->N5 at 2 is YYNNNNN (7 columns inserted where the Y is aligned, but covering nothing... so the rest slides right...)
// so the result RRYYNNNNNRGGGGGBB or compressed R2->Y2->N5->R1->G5->B1 (17 columns total)
// This is a "grow" case.
// -OR-
// e.g. Y2->N5 and a `coverLength` of 8 (or more)
// R3->G5->B2 is RRRGGGGGBB (10 columns total)
// Y2->N5 at 2 is YYNNNNN (8 columns covered truncating an extra after the final N)
// so the result RRYYNNNNN or compressed is R2->Y2->N5 (9 columns total)
// This is a "shrink" case.
// ---
// The math is done like this because S is usually unsigned and we want to floor at 0.
const auto grow = newItemsTotalCoverage > coverLength ? newItemsTotalCoverage - coverLength : 0;
const auto shrink = coverLength > newItemsTotalCoverage ? coverLength - newItemsTotalCoverage : 0;
// TODO: GH#XXXX - unlock shortcuts for grow/shrink runs.
if (coverLength == newItemsTotalCoverage)
{
// If the insertion size is 1, do some pre-processing to
// see if we can get this done quickly.
if (newItemsSize == 1)
{
// Get the new color attribute we're trying to apply
const T NewAttr = first->first;
// If the existing run was only 1 element...
// ...and the new color is the same as the old...
if (_list.size() == 1 && _list.front().first == NewAttr)
{
// ... then we don't have to do anything but return.
return;
}
// .. otherwise if we internally have a list of 2 or more and we're about to insert a single color
// it's possible that we just walk left-to-right through the row and find a quick exit.
else if (startIndex >= 0 && newItemsTotalCoverage == 1)
{
// First we try to find the run where the insertion happens, using lowerBound and upperBound to track
// where we are currently at.
const auto begin = _list.begin();
S lowerBound = 0;
S upperBound = 0;
for (S i = 0; i < _list.size(); i++)
{
const auto curr = begin + i;
upperBound += curr->second;
if (startIndex >= lowerBound && startIndex < upperBound)
{
// The run that we try to insert into has the same color as the new one.
// e.g.
// AAAAABBBBBBBCCC
// ^
// AAAAABBBBBBBCCC
//
// 'B' is the new color and '^' represents where startIndex is. We don't have to
// do anything.
if (curr->first == NewAttr)
{
return;
}
// If the current run has length of exactly one, we can simply change the attribute
// of the current run.
// e.g.
// AAAAABCCCCCCCCC
// ^
// AAAAADCCCCCCCCC
//
// Here 'D' is the new color.
if (curr->second == 1)
{
curr->first = NewAttr;
return;
}
// If the insertion happens at current run's lower boundary...
if (startIndex == lowerBound && i > 0)
{
const auto prev = std::prev(curr, 1);
// ... and the previous run has the same color as the new one, we can
// just adjust the counts in the existing two elements in our internal list.
// e.g.
// AAAAABBBBBBBCCC
// ^
// AAAAAABBBBBBCCC
//
// Here 'A' is the new color.
if (NewAttr == prev->first)
{
prev->second++;
curr->second--;
// If we just reduced the right half to zero, just erase it out of the list.
if (curr->second == 0)
{
_list.erase(curr);
}
return;
}
}
// If the insertion happens at current run's upper boundary...
if (startIndex == upperBound - 1 && i + 1 < _list.size())
{
// ...then let's try our luck with the next run if possible. This is basically the opposite
// of what we did with the previous run.
// e.g.
// AAAAAABBBBBBCCC
// ^
// AAAAABBBBBBBCCC
//
// Here 'B' is the new color.
const auto next = std::next(curr, 1);
if (NewAttr == next->first)
{
curr->second--;
next->second++;
if (curr->second == 0)
{
_list.erase(curr);
}
return;
}
}
}
// Advance one run in the _list.
lowerBound = upperBound;
// The lowerBound is larger than startIndex, which means we fail to find an early exit at the run
// where the insertion happens. We can just break out.
if (lowerBound > startIndex)
{
break;
}
}
}
}
// If we're about to cover the entire existing run with a new one, we can also make an optimization.
if (startIndex == 0 && newItemsTotalCoverage == _size)
{
// Just dump what we're given over what we have and call it a day.
_list.assign(first, last);
return;
}
}
// In the worst case scenario, we will need a new run that is the length of
// The existing run in memory + The new run in memory + 1.
// This worst case occurs when we inject a new item in the middle of an existing run like so
// Existing R3->B5->G2, Insertion Y2 starting at 5 (in the middle of the B5)
// becomes R3->B2->Y2->B1->G2.
// The original run was 3 long. The insertion run was 1 long. We need 1 more for the
// fact that an existing piece of the run was split in half (to hold the latter half).
const S cNewRun = gsl::narrow_cast<S>(_list.size() + newItemsSize + 1);
decltype(_list) newRun;
newRun.reserve(cNewRun);
// We will start analyzing from the beginning of our existing run.
// Use some iterators to keep track of where we are in walking through our runs.
// Get the existing run that we'll be updating/manipulating.
auto existingPos = _list.begin();
const auto existingEnd = _list.end();
auto incomingPos = first;
S incomingRemaining = gsl::narrow_cast<S>(newItemsSize);
S existingCoverage = 0;
// Copy the existing run into the new buffer up to the "start index" where the new run will be injected.
// If the new run starts at 0, we have nothing to copy from the beginning.
if (startIndex != 0)
{
// While we're less than the desired insertion position...
while (existingCoverage < startIndex)
{
// Add up how much length we can cover by copying an item from the existing run.
existingCoverage += existingPos->second;
// Copy it to the new run buffer and advance both pointers.
newRun.push_back(*existingPos++);
}
// When we get to this point, we've copied full segments from the original existing run
// into our new run buffer. We will have 1 or more full segments of color attributes and
// we MIGHT have to cut the last copied segment's length back depending on where the inserted
// attributes will fall in the final/new run.
// Some examples:
// - Starting with the original string R3 -> G5 -> B2
// - 1. If the insertion is Y5 at start index 3
// We are trying to get a result/final/new run of R3 -> Y5 -> B2.
// We just copied R3 to the new destination buffer and we cang skip down and start inserting the new attrs.
// - 2. If the insertion is Y3 at start index 5
// We are trying to get a result/final/new run of R3 -> G2 -> Y3 -> B2.
// We just copied R3 -> G5 to the new destination buffer with the code above.
// But the insertion is going to cut out some of the length of the G5.
// We need to fix this up below so it says G2 instead to leave room for the Y3 to fit in
// the new/final run.
// Fetch out the length so we can fix it up based on the below conditions.
S length = newRun.back().second;
// If we've covered more cells already than the start of the attributes to be inserted...
if (existingCoverage > startIndex)
{
// ..then subtract some of the length of the final cell we copied.
// We want to take remove the difference in distance between the cells we've covered in the new
// run and the insertion point.
// (This turns G5 into G2 from Example 2 just above)
length -= (existingCoverage - startIndex);
}
// Now we're still on that "last cell copied" into the new run.
// If the color of that existing copied cell matches the color of the first segment
// of the run we're about to insert, we can just increment the length to extend the coverage.
if (newRun.back().first == incomingPos->first)
{
length += incomingPos->second;
// Since the color matched, we have already "used up" part of the insert run
// and can skip it in our big "memcopy" step below that will copy the bulk of the insert run.
incomingRemaining--;
incomingPos++;
}
// We're done manipulating the length. Store it back.
newRun.back().second = length;
}
// Bulk copy the majority (or all, depending on circumstance) of the insert run into the final run buffer.
std::copy_n(incomingPos, incomingRemaining, std::back_inserter(newRun));
// We're technically done with the insert run now and have 0 remaining, but won't bother updating its pointers
// and counts any further because we won't use them.
const S endIndex = startIndex + shrink + (newItemsTotalCoverage - grow) - 1;
// Now we need to move our pointer for the original existing run forward and update our counts
// on how many cells we could have copied from the source before finishing off the new run.
while (existingCoverage <= endIndex)
{
FAIL_FAST_IF(!(existingPos != existingEnd));
existingCoverage += existingPos->second;
existingPos++;
}
// If we still have original existing run cells remaining, copy them into the final new run.
if (existingPos != existingEnd || existingCoverage != (endIndex + 1))
{
// We advanced the existing run pointer and its count to on or past the end of what the insertion run filled in.
// If this ended up being past the end of what the insertion run covers, we have to account for the cells after
// the insertion run but before the next piece of the original existing run.
// The example in this case is if we had...
// Existing Run = R3 -> G5 -> B2 -> X5
// Insert Run = Y2 @ startIndex = 7 and endIndex = 8
// ... then at this point in time, our states would look like...
// New Run so far = R3 -> G4 -> Y2
// Existing Run Pointer is at X5
// Existing run coverage count at 3 + 5 + 2 = 10.
// However, in order to get the final desired New Run
// (which is R3 -> G4 -> Y2 -> B1 -> X5)
// we would need to grab a piece of that B2 we already skipped past.
// iExistingRunCoverage = 10. endIndex = 8. endIndex+1 = 9. 10 > 9. So we skipped something.
if (existingCoverage > (endIndex + 1))
{
// Back up the existing run pointer so we can grab the piece we skipped.
existingPos--;
// If the color matches what's already in our run, just increment the count value.
// This case is slightly off from the example above. This case is for if the B2 above was actually Y2.
// That Y2 from the existing run is the same color as the Y2 we just filled a few columns left in the final run
// so we can just adjust the final run's column count instead of adding another segment here.
if (newRun.back().first == existingPos->first)
{
S length = newRun.back().second;
length += (existingCoverage - (endIndex + 1));
newRun.back().second = length;
}
else
{
// If the color didn't match, then we just need to copy the piece we skipped and adjust
// its length for the discrepancy in columns not yet covered by the final/new run.
// Move forward to a blank spot in the new run
newRun.emplace_back();
// Copy the existing run's color information to the new run
newRun.back().first = existingPos->first;
// Adjust the length of that copied color to cover only the reduced number of columns needed
// now that some have been replaced by the insert run.
newRun.back().second = existingCoverage - (endIndex + 1);
}
// Now that we're done recovering a piece of the existing run we skipped, move the pointer forward again.
existingPos++;
}
// OK. In this case, we didn't skip anything. The end of the insert run fell right at a boundary
// in columns that was in the original existing run.
// However, the next piece of the original existing run might happen to have the same color attribute
// as the final piece of what we just copied.
// As an example...
// Existing Run = R3 -> G5 -> B2.
// Insert Run = B5 @ startIndex = 3 and endIndex = 7
// New Run so far = R3 -> B5
// New Run desired when done = R3 -> B7
// Existing run pointer is on B2.
// We want to merge the 2 from the B2 into the B5 so we get B7.
else if (newRun.back().first == existingPos->first)
{
// Add the value from the existing run into the current new run position.
S length = newRun.back().second;
length += existingPos->second;
newRun.back().second = length;
// Advance the existing run position since we consumed its value and merged it in.
existingPos++;
}
// Now bulk copy any segments left in the original existing run
if (existingPos < existingEnd)
{
std::copy_n(existingPos, (existingEnd - existingPos), std::back_inserter(newRun));
}
}
// OK, phew. We're done. Now we just need to free the existing run and store the new run in its place.
_list.swap(newRun);
return;
}
#ifdef UNIT_TESTING
friend class ::RunLengthEncodingTests;
#endif
};
};
#ifdef __WEX_COMMON_H__
namespace WEX::TestExecution
{
template<typename T>
class VerifyOutputTraits<::til::rle<T>>
{
public:
static WEX::Common::NoThrowString ToString(const ::til::rle<T>& object)
{
return WEX::Common::NoThrowString(object.to_string().c_str());
}
};
template<typename T>
class VerifyCompareTraits<::til::rle<T>, ::til::rle<T>>
{
public:
static bool AreEqual(const ::til::rle<T>& expected, const ::til::rle<T>& actual) noexcept
{
return expected == actual;
}
static bool AreSame(const ::til::rle<T>& expected, const ::til::rle<T>& actual) noexcept
{
return &expected == &actual;
}
static bool IsLessThan(const ::til::rle<T>& expectedLess, const ::til::rle<T>& expectedGreater) = delete;
static bool IsGreaterThan(const ::til::rle<T>& expectedGreater, const ::til::rle<T>& expectedLess) = delete;
static bool IsNull(const ::til::rle<T>& object) noexcept
{
return object == til::rle<T>{};
}
};
};
#endif