a8e4bedae3
## Summary of the Pull Request Introduces `til::rle`, a vector-like container which stores elements of type T in a run length encoded format. This allows efficient compaction of repeated elements within the vector. ## References * #8000 - Supports buffer rewrite work. A re-use of `til::rle` will be useful as a column counter as we pursue NxM storage and presentation. * #3075 - The new iterators allow skipping forward by multiple units, which wasn't possible under `TextBuffer-/OutputCellIterator`. Additionally it also allows a bulk insertions. * #8787 and #410 - High probability this should be `pmr`-ified like `bitmap` for things like `chafa` and `cacafire` which are changing the run length frequently. ## PR Checklist * [x] Closes #8741 * [x] I work here. * [x] Tests added. * [x] Tests passed. ## Validation Steps Performed * [x] Ran `cacafire` in `OpenConsole.exe` and it looked beautiful * [x] Ran new suite of `RunLengthEncodingTests.cpp` Co-authored-by: Michael Niksa <miniksa@microsoft.com>
1064 lines
39 KiB
C++
1064 lines
39 KiB
C++
// Copyright (c) Microsoft Corporation.
|
|
// Licensed under the MIT license.
|
|
|
|
#pragma once
|
|
|
|
#ifdef UNIT_TESTING
|
|
class RunLengthEncodingTests;
|
|
#endif
|
|
|
|
namespace til // Terminal Implementation Library. Also: "Today I Learned"
|
|
{
|
|
namespace details
|
|
{
|
|
template<typename T, typename S, typename ParentIt>
|
|
class rle_iterator
|
|
{
|
|
public:
|
|
using iterator_category = std::random_access_iterator_tag;
|
|
using value_type = T;
|
|
using pointer = T*;
|
|
using reference = T&;
|
|
using size_type = S;
|
|
using difference_type = typename ParentIt::difference_type;
|
|
|
|
// TODO GH#10135: Enable checked iterators for _ITERATOR_DEBUG_LEVEL != 0.
|
|
explicit rle_iterator(ParentIt&& it) noexcept :
|
|
_it{ std::forward<ParentIt>(it) },
|
|
_pos{ 0 }
|
|
{
|
|
}
|
|
|
|
[[nodiscard]] reference operator*() const noexcept
|
|
{
|
|
return _it->value;
|
|
}
|
|
|
|
[[nodiscard]] pointer operator->() const noexcept
|
|
{
|
|
return &operator*();
|
|
}
|
|
|
|
rle_iterator& operator++() noexcept
|
|
{
|
|
++_pos;
|
|
if (_pos == _it->length)
|
|
{
|
|
++_it;
|
|
_pos = 0;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
rle_iterator operator++(int) noexcept
|
|
{
|
|
auto tmp = *this;
|
|
++tmp;
|
|
return tmp;
|
|
}
|
|
|
|
rle_iterator& operator--() noexcept
|
|
{
|
|
if (_pos == 0)
|
|
{
|
|
--_it;
|
|
_pos = _it->length;
|
|
}
|
|
--_pos;
|
|
return *this;
|
|
}
|
|
|
|
rle_iterator operator--(int) noexcept
|
|
{
|
|
auto tmp = *this;
|
|
--tmp;
|
|
return tmp;
|
|
}
|
|
|
|
rle_iterator& operator+=(difference_type move) noexcept
|
|
{
|
|
// Splitting our function into a forward and backward move
|
|
// makes implementing the arithmetic quite a bit simpler.
|
|
if (move >= 0)
|
|
{
|
|
while (move > 0)
|
|
{
|
|
// If we have a run like this:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
// ^
|
|
// And this iterator points to ^, then space will be 2,
|
|
// as that's the number of times this iterator would continue
|
|
// yielding the number "2", if we were using operator++().
|
|
const auto space = static_cast<difference_type>(_it->length - _pos);
|
|
|
|
if (move < space)
|
|
{
|
|
// At this point: move <= std::numeric_limits<size_type>::max().
|
|
// --> the narrowing is safe.
|
|
_pos += gsl::narrow_cast<size_type>(move);
|
|
break;
|
|
}
|
|
|
|
move -= space;
|
|
++_it;
|
|
_pos = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
move = -move;
|
|
|
|
while (move > 0)
|
|
{
|
|
// If we have a run like this:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
// ^
|
|
// And this iterator points to ^, then space will be 1,
|
|
// as that's the number of times this iterator would continue
|
|
// yielding the number "2", if we were using operator--().
|
|
const auto space = static_cast<difference_type>(_pos);
|
|
|
|
if (move <= space)
|
|
{
|
|
// At this point: move <= std::numeric_limits<size_type>::max()
|
|
// --> the narrowing is safe.
|
|
_pos -= gsl::narrow_cast<size_type>(move);
|
|
break;
|
|
}
|
|
|
|
// When moving backwards we want to move to the last item
|
|
// in the previous run (that is: _pos == length - 1).
|
|
// --> Don't just move to the beginning of this run (-= _pos),
|
|
// but actually one item further (-= 1).
|
|
move -= static_cast<difference_type>(_pos) + 1;
|
|
--_it;
|
|
// _pos is supposed to be in the range [0, _it->length).
|
|
// --> The last position in the previous run is length - 1;
|
|
_pos = _it->length - 1;
|
|
}
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
rle_iterator& operator-=(const difference_type offset) noexcept
|
|
{
|
|
return *this += -offset;
|
|
}
|
|
|
|
[[nodiscard]] rle_iterator operator+(const difference_type offset) const noexcept
|
|
{
|
|
auto tmp = *this;
|
|
return tmp += offset;
|
|
}
|
|
|
|
[[nodiscard]] rle_iterator operator-(const difference_type offset) const noexcept
|
|
{
|
|
auto tmp = *this;
|
|
return tmp -= offset;
|
|
}
|
|
|
|
[[nodiscard]] difference_type operator-(const rle_iterator& right) const noexcept
|
|
{
|
|
// If we figure out which of the two iterators is "lower" (nearer to begin()) and
|
|
// "upper" (nearer to end()), we can simplify the way we think about this algorithm:
|
|
// The distance equals the length of all runs between lower and upper,
|
|
// excluding the positions of the lower and upper iterator.
|
|
//
|
|
// For instance:
|
|
// 1 1 1|2 2 2 2|3 3|4 4 4
|
|
// ^ ^
|
|
// lower upper
|
|
// _pos == 2 _pos == 1
|
|
//
|
|
// The total distance equals the total length all runs that are covered by
|
|
// lower up until (but not including) upper (here: 9), minus the number of
|
|
// items not covered by lower (here: 2, the same as _pos), plus the ones
|
|
// covered by upper, excluding itself (here: 1, the same as _pos).
|
|
|
|
const auto negative = *this < right;
|
|
const auto& lower = negative ? *this : right;
|
|
const auto& upper = negative ? right : *this;
|
|
difference_type distance = 0;
|
|
|
|
for (auto it = lower._it; it < upper._it; ++it)
|
|
{
|
|
distance += it->length;
|
|
}
|
|
|
|
distance -= lower._pos;
|
|
distance += upper._pos;
|
|
|
|
return negative ? -distance : distance;
|
|
}
|
|
|
|
[[nodiscard]] reference operator[](const difference_type offset) const noexcept
|
|
{
|
|
return *operator+(offset);
|
|
}
|
|
|
|
[[nodiscard]] bool operator==(const rle_iterator& right) const noexcept
|
|
{
|
|
return _it == right._it && _pos == right._pos;
|
|
}
|
|
|
|
[[nodiscard]] bool operator!=(const rle_iterator& right) const noexcept
|
|
{
|
|
return !(*this == right);
|
|
}
|
|
|
|
[[nodiscard]] bool operator<(const rle_iterator& right) const noexcept
|
|
{
|
|
return _it < right._it || (_it == right._it && _pos < right._pos);
|
|
}
|
|
|
|
[[nodiscard]] bool operator>(const rle_iterator& right) const noexcept
|
|
{
|
|
return right < *this;
|
|
}
|
|
|
|
[[nodiscard]] bool operator<=(const rle_iterator& right) const noexcept
|
|
{
|
|
return !(right < *this);
|
|
}
|
|
|
|
[[nodiscard]] bool operator>=(const rle_iterator& right) const noexcept
|
|
{
|
|
return !(*this < right);
|
|
}
|
|
|
|
private:
|
|
ParentIt _it;
|
|
size_type _pos;
|
|
};
|
|
} // namespace details
|
|
|
|
// rle_pair is a simple clone of std::pair, with one difference:
|
|
// copy and move constructors and operators are explicitly defaulted.
|
|
// This allows rle_pair to be std::is_trivially_copyable, if both T and S are.
|
|
// --> rle_pair can be used with memcpy(), unlike std::pair.
|
|
template<typename T, typename S>
|
|
struct rle_pair
|
|
{
|
|
using value_type = T;
|
|
using size_type = S;
|
|
|
|
rle_pair() = default;
|
|
|
|
rle_pair(const rle_pair&) = default;
|
|
rle_pair& operator=(const rle_pair&) = default;
|
|
|
|
rle_pair(rle_pair&&) = default;
|
|
rle_pair& operator=(rle_pair&&) = default;
|
|
|
|
constexpr rle_pair(const T& value, const S& length) noexcept(std::is_nothrow_copy_constructible_v<T>&& std::is_nothrow_copy_constructible_v<S>) :
|
|
value(value), length(length)
|
|
{
|
|
}
|
|
|
|
constexpr rle_pair(T&& value, S&& length) noexcept(std::is_nothrow_constructible_v<T>&& std::is_nothrow_constructible_v<S>) :
|
|
value(std::forward<T>(value)), length(std::forward<S>(length))
|
|
{
|
|
}
|
|
|
|
constexpr void swap(rle_pair& other) noexcept(std::is_nothrow_swappable_v<T>&& std::is_nothrow_swappable_v<S>)
|
|
{
|
|
if (this != std::addressof(other))
|
|
{
|
|
std::swap(value, other.value);
|
|
std::swap(length, other.length);
|
|
}
|
|
}
|
|
|
|
value_type value{};
|
|
size_type length{};
|
|
};
|
|
|
|
template<typename T, typename S>
|
|
[[nodiscard]] constexpr bool operator==(const rle_pair<T, S>& lhs, const rle_pair<T, S>& rhs)
|
|
{
|
|
return lhs.value == rhs.value && lhs.length == rhs.length;
|
|
}
|
|
|
|
template<typename T, typename S>
|
|
[[nodiscard]] constexpr bool operator!=(const rle_pair<T, S>& lhs, const rle_pair<T, S>& rhs)
|
|
{
|
|
return !(lhs == rhs);
|
|
}
|
|
|
|
template<typename T, typename S = std::size_t, typename Container = std::vector<rle_pair<T, S>>>
|
|
class basic_rle
|
|
{
|
|
public:
|
|
using value_type = T;
|
|
using allocator_type = typename Container::allocator_type;
|
|
using pointer = typename Container::pointer;
|
|
using const_pointer = typename Container::const_pointer;
|
|
using reference = T&;
|
|
using const_reference = const T&;
|
|
using size_type = S;
|
|
using difference_type = S;
|
|
|
|
using const_iterator = details::rle_iterator<const T, S, typename Container::const_iterator>;
|
|
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
|
|
|
using rle_type = rle_pair<value_type, size_type>;
|
|
using container = Container;
|
|
|
|
// We don't check anywhere whether a size_type value is negative.
|
|
// Having signed integers would break that.
|
|
static_assert(std::is_unsigned<size_type>::value, "the run length S must be unsigned");
|
|
static_assert(std::is_same<rle_type, typename Container::value_type>::value, "the value type of the Container must be rle_pair<T, S>");
|
|
|
|
constexpr basic_rle() noexcept = default;
|
|
~basic_rle() = default;
|
|
|
|
basic_rle(const basic_rle& other) = default;
|
|
basic_rle& operator=(const basic_rle& other) = default;
|
|
|
|
basic_rle(basic_rle&& other) noexcept :
|
|
_runs(std::move(other._runs)), _total_length(other._total_length)
|
|
{
|
|
// C++ fun fact:
|
|
// "std::move" actually doesn't actually promise to _really_ move stuff from A to B,
|
|
// but rather "leaves the source in an unspecified but valid state" according to the spec.
|
|
// Probably for the sake of performance or something.
|
|
// Quite ironic given that the committee refuses to change the STL ABI,
|
|
// forcing us to reinvent std::pair as til::rle_pair.
|
|
// --> Let's assume that container behavior falls into only two categories:
|
|
// * Moves the underlying memory, setting .size() to 0
|
|
// * Leaves the source intact (basically copying it)
|
|
// We can detect these cases using _runs.empty() and set _total_length accordingly.
|
|
if (other._runs.empty())
|
|
{
|
|
other._total_length = 0;
|
|
}
|
|
}
|
|
|
|
basic_rle& operator=(basic_rle&& other) noexcept
|
|
{
|
|
_runs = std::move(other._runs);
|
|
_total_length = other._total_length;
|
|
|
|
// See basic_rle(basic_rle&&) for why this is necessary.
|
|
if (other._runs.empty())
|
|
{
|
|
other._total_length = 0;
|
|
}
|
|
|
|
return *this;
|
|
}
|
|
|
|
basic_rle(std::initializer_list<rle_type> runs) :
|
|
_runs(runs), _total_length(0)
|
|
{
|
|
for (const auto& run : _runs)
|
|
{
|
|
_total_length += run.length;
|
|
}
|
|
}
|
|
|
|
basic_rle(container&& runs) :
|
|
_runs(std::forward<container>(runs)), _total_length(0)
|
|
{
|
|
for (const auto& run : _runs)
|
|
{
|
|
_total_length += run.length;
|
|
}
|
|
}
|
|
|
|
basic_rle(const size_type length, const value_type& value) :
|
|
_total_length(length)
|
|
{
|
|
if (length)
|
|
{
|
|
_runs.emplace_back(value, length);
|
|
}
|
|
}
|
|
|
|
void swap(basic_rle& other) noexcept
|
|
{
|
|
_runs.swap(other._runs);
|
|
std::swap(_total_length, other._total_length);
|
|
}
|
|
|
|
bool empty() const noexcept
|
|
{
|
|
return _total_length == 0;
|
|
}
|
|
|
|
// Returns the total length of all runs as encoded.
|
|
size_type size() const noexcept
|
|
{
|
|
return _total_length;
|
|
}
|
|
|
|
// This method gives access to the raw run length encoded array
|
|
// and allows users of this class to iterate over those.
|
|
const container& runs() const noexcept
|
|
{
|
|
return _runs;
|
|
}
|
|
|
|
// Get the value at the position
|
|
const_reference at(size_type position) const
|
|
{
|
|
const auto begin = _runs.begin();
|
|
const auto end = _runs.end();
|
|
|
|
rle_scanner scanner(begin, end);
|
|
auto it = scanner.scan(position).first;
|
|
|
|
if (it == end)
|
|
{
|
|
throw std::out_of_range("position out of range");
|
|
}
|
|
|
|
return it->value;
|
|
}
|
|
|
|
// Returns the range [start_index, end_index) as a new vector.
|
|
// It works just like std::string::substr(), but with absolute indices.
|
|
[[nodiscard]] basic_rle slice(size_type start_index, size_type end_index) const noexcept
|
|
{
|
|
if (end_index > _total_length)
|
|
{
|
|
end_index = _total_length;
|
|
}
|
|
|
|
if (start_index >= end_index)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
// Thanks to the prior conditions we can safely assume that:
|
|
// * 0 <= start_index < _total_length
|
|
// * 0 < end_index <= _total_length
|
|
// * start_index < end_index
|
|
//
|
|
// --> It's safe to subtract 1 from end_index
|
|
|
|
rle_scanner scanner(_runs.begin(), _runs.end());
|
|
auto [begin_run, start_run_pos] = scanner.scan(start_index);
|
|
auto [end_run, end_run_pos] = scanner.scan(end_index - 1);
|
|
|
|
container slice{ begin_run, end_run + 1 };
|
|
slice.back().length = end_run_pos + 1;
|
|
slice.front().length -= start_run_pos;
|
|
|
|
return { std::move(slice), static_cast<size_type>(end_index - start_index) };
|
|
}
|
|
|
|
// Replace the range [start_index, end_index) with the given value.
|
|
// If end_index is larger than size() it's set to size().
|
|
// start_index must be smaller or equal to end_index.
|
|
void replace(size_type start_index, size_type end_index, const value_type& value)
|
|
{
|
|
_check_indices(start_index, end_index);
|
|
|
|
const rle_type replacement{ value, static_cast<size_type>(end_index - start_index) };
|
|
_replace_unchecked(start_index, end_index, { &replacement, 1 });
|
|
}
|
|
|
|
// Replace the range [start_index, end_index) with the given run.
|
|
// If end_index is larger than size() it's set to size().
|
|
// start_index must be smaller or equal to end_index.
|
|
void replace(size_type start_index, size_type end_index, const rle_type& replacement)
|
|
{
|
|
replace(start_index, end_index, { &replacement, 1 });
|
|
}
|
|
|
|
// Replace the range [start_index, end_index) with replacements.
|
|
// If end_index is larger than size() it's set to size().
|
|
// start_index must be smaller or equal to end_index.
|
|
void replace(size_type start_index, size_type end_index, const gsl::span<const rle_type> replacements)
|
|
{
|
|
_check_indices(start_index, end_index);
|
|
_replace_unchecked(start_index, end_index, replacements);
|
|
}
|
|
|
|
// Replaces every instance of old_value in this vector with new_value.
|
|
void replace_values(const value_type& old_value, const value_type& new_value)
|
|
{
|
|
for (auto& run : _runs)
|
|
{
|
|
if (run.value == old_value)
|
|
{
|
|
run.value = new_value;
|
|
}
|
|
}
|
|
|
|
_compact();
|
|
}
|
|
|
|
// Adjust the size of the vector.
|
|
// If the size is being increased, the last run is extended to fill up the new vector size.
|
|
// If the size is being decreased, the trailing runs are cut off to fit.
|
|
void resize_trailing_extent(const size_type new_size)
|
|
{
|
|
if (new_size == 0)
|
|
{
|
|
_runs.clear();
|
|
}
|
|
else if (new_size < _total_length)
|
|
{
|
|
rle_scanner scanner(_runs.begin(), _runs.end());
|
|
auto [run, pos] = scanner.scan(new_size - 1);
|
|
|
|
run->length = ++pos;
|
|
|
|
_runs.erase(++run, _runs.cend());
|
|
}
|
|
else if (new_size > _total_length)
|
|
{
|
|
Expects(!_runs.empty());
|
|
auto& run = _runs.back();
|
|
|
|
run.length += new_size - _total_length;
|
|
}
|
|
|
|
_total_length = new_size;
|
|
}
|
|
|
|
constexpr bool operator==(const basic_rle& other) const noexcept
|
|
{
|
|
return _total_length == other._total_length && _runs == other._runs;
|
|
}
|
|
|
|
constexpr bool operator!=(const basic_rle& other) const noexcept
|
|
{
|
|
return !(*this == other);
|
|
}
|
|
|
|
[[nodiscard]] const_iterator begin() const noexcept
|
|
{
|
|
return const_iterator(_runs.begin());
|
|
}
|
|
|
|
[[nodiscard]] const_iterator end() const noexcept
|
|
{
|
|
return const_iterator(_runs.end());
|
|
}
|
|
|
|
[[nodiscard]] const_reverse_iterator rbegin() const noexcept
|
|
{
|
|
return const_reverse_iterator(end());
|
|
}
|
|
|
|
[[nodiscard]] const_reverse_iterator rend() const noexcept
|
|
{
|
|
return const_reverse_iterator(begin());
|
|
}
|
|
|
|
[[nodiscard]] const_iterator cbegin() const noexcept
|
|
{
|
|
return begin();
|
|
}
|
|
|
|
[[nodiscard]] const_iterator cend() const noexcept
|
|
{
|
|
return end();
|
|
}
|
|
|
|
[[nodiscard]] const_reverse_iterator crbegin() const noexcept
|
|
{
|
|
return rbegin();
|
|
}
|
|
|
|
[[nodiscard]] const_reverse_iterator crend() const noexcept
|
|
{
|
|
return rend();
|
|
}
|
|
|
|
#ifdef UNIT_TESTING
|
|
[[nodiscard]] std::wstring to_string() const
|
|
{
|
|
std::wstringstream ss;
|
|
bool beginning = true;
|
|
|
|
for (const auto& run : _runs)
|
|
{
|
|
if (beginning)
|
|
{
|
|
beginning = false;
|
|
}
|
|
else
|
|
{
|
|
ss << '|';
|
|
}
|
|
|
|
for (size_t i = 0; i < run.length; ++i)
|
|
{
|
|
if (i != 0)
|
|
{
|
|
ss << ' ';
|
|
}
|
|
|
|
ss << run.value;
|
|
}
|
|
}
|
|
|
|
return ss.str();
|
|
}
|
|
#endif
|
|
|
|
private:
|
|
template<typename It>
|
|
struct rle_scanner
|
|
{
|
|
explicit rle_scanner(It begin, It end) noexcept :
|
|
it(std::move(begin)), end(std::move(end)) {}
|
|
|
|
std::pair<It, size_type> scan(size_type index) noexcept
|
|
{
|
|
run_pos = 0;
|
|
|
|
for (; it != end; ++it)
|
|
{
|
|
const size_type new_total = total + it->length;
|
|
if (new_total > index)
|
|
{
|
|
run_pos = index - total;
|
|
break;
|
|
}
|
|
|
|
total = new_total;
|
|
}
|
|
|
|
return { it, run_pos };
|
|
}
|
|
|
|
private:
|
|
It it;
|
|
const It end;
|
|
size_type run_pos = 0;
|
|
size_type total = 0;
|
|
};
|
|
|
|
basic_rle(container&& runs, size_type size) :
|
|
_runs(std::forward<container>(runs)),
|
|
_total_length(size)
|
|
{
|
|
}
|
|
|
|
void _compact()
|
|
{
|
|
auto it = _runs.begin();
|
|
const auto end = _runs.end();
|
|
|
|
if (it == end)
|
|
{
|
|
return;
|
|
}
|
|
|
|
for (auto ref = it; ++it != end; ref = it)
|
|
{
|
|
if (ref->value == it->value)
|
|
{
|
|
ref->length += it->length;
|
|
|
|
while (++it != end)
|
|
{
|
|
if (ref->value == it->value)
|
|
{
|
|
ref->length += it->length;
|
|
}
|
|
else
|
|
{
|
|
*++ref = std::move(*it);
|
|
}
|
|
}
|
|
|
|
_runs.erase(++ref, end);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
inline void _check_indices(size_type start_index, size_type& end_index)
|
|
{
|
|
if (end_index > _total_length)
|
|
{
|
|
end_index = _total_length;
|
|
}
|
|
|
|
// start_index and end_index must be inside the inclusive range [0, _total_length].
|
|
if (start_index > end_index)
|
|
{
|
|
throw std::out_of_range("start_index <= end_index");
|
|
}
|
|
}
|
|
|
|
// Replace the range [start_index, end_index) with replacements.
|
|
void _replace_unchecked(size_type start_index, size_type end_index, const gsl::span<const rle_type> replacements)
|
|
{
|
|
//
|
|
//
|
|
//
|
|
// MUST READ: How this function (mostly) works
|
|
// -------------------------------------------
|
|
//
|
|
// ## Overview
|
|
//
|
|
// Assuming this instance consists of:
|
|
// _runs == {{1, 3}, {2, 3}, {3, 3}}
|
|
// Or shown in a more visual way:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
//
|
|
// If we're called with:
|
|
// _replace_unchecked(3, 6, {{1, 2}, {4, 1}, {2, 1}})
|
|
// Or shown in a more visual way:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
// ^ ^ <-- the first ^ is "start_index" (inclusive) and the second "end_index" (exclusive)
|
|
// 1 1|4|2 <-- the "replacements"
|
|
//
|
|
// This results in:
|
|
// 1 1 1 1|4|2 2|3 3 3
|
|
// and _total_length increases by 1.
|
|
//
|
|
//
|
|
// ## Trivial algorithm
|
|
//
|
|
// Assuming we have the following situation:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
// ^ ^
|
|
// 1 1|4|2
|
|
//
|
|
// A trivial algorithm can achieve this in 3-4 steps:
|
|
// 1. Remove the to be replaced range (marked with ^).
|
|
// The lengths of existing runs must be modified accordingly.
|
|
// Resulting in:
|
|
// 1 1|2|3 3 3
|
|
// ^ <-- the insertion point for replacements
|
|
//
|
|
// 2. (Optional) If the replaced range starts and ends within the same run,
|
|
// we need to split it up into two. An example can be found below.
|
|
// 3. Add the new replacements:
|
|
// 1 1|1 1|4|2|2|3 3 3
|
|
// 4. Join adjacent runs together (using _compact):
|
|
// 1 1 1 1|4|2 2|3 3 3
|
|
//
|
|
// An example for the optional step 2:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
// ^^
|
|
// 1 1
|
|
// Resulting in:
|
|
// 1 1 1|2|1 1|2|3 3 3
|
|
// ^ ^ <-- the {2, 3} run was split up
|
|
//
|
|
// All 4 steps require elements in the underlying _runs vector to be shuffled around.
|
|
// This function is long and complex, as it determines the place of insertion
|
|
// as well as joining of adjacent runs before applying any modifications.
|
|
//
|
|
//
|
|
// ## Optimized algorithm
|
|
//
|
|
// Note: "step N" refers to the 4 steps in previous "Trivial algorithm" section.
|
|
//
|
|
// There are 3 ways to reduce the cost of the trivial algorithm.
|
|
// Before modifying the underlying _runs vector we must detect:
|
|
// * (step 2) Whether the replaced range starts and ends within the same run,
|
|
// forcing us to split up a run and **add an additional element**.
|
|
// * (step 4) "adjacent runs" which would occur after insertion.
|
|
// We must insert **one run less each** if either the first or last element
|
|
// of "replacements" is the same as it's existing successor/predecessor element.
|
|
// This fact is even true in case like this:
|
|
// 1 1|2 2|1 1
|
|
// ^ ^
|
|
// 1 1
|
|
// Resulting in a single run and the removal of 2 elements from _runs:
|
|
// 1 1 1 1 1 1
|
|
// * How many runs we need to insert in total (including the previous 2 points)
|
|
// and how many existing runs this will replace. Using this information
|
|
// we can merge removal (step 1) and insertion (step 3) together.
|
|
//
|
|
// Let's look at the example from the previous section and
|
|
// assume we apply the previously mentioned optimizations
|
|
// This allows us to detect the adjacent runs and turn this:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
// ^ ^
|
|
// 1 1|4|2
|
|
// Into this:
|
|
// 1 1 1 1|2 2|3 3 3
|
|
// ^
|
|
// 4
|
|
// Our algorithm now only needs to make a single insertion into _runs.
|
|
//
|
|
// Let's look at the example for the optional step 2:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
// ^^
|
|
// 1 1
|
|
// We can detect early that we need to add an additional element.
|
|
// This allows us to change it into a single insertion again:
|
|
// 1 1 1|2|3 3 3
|
|
// ^
|
|
// 1 1|2
|
|
//
|
|
// Similarly we can detect cases where we replace more runs than we insert.
|
|
// For instance:
|
|
// 1 1 1|2 2 2|3 3 3|4 4 4|5 5 5
|
|
// ^ ^
|
|
// 6 6 6
|
|
// After shortening the existing runs this is turned into a copy operation:
|
|
// 1 1 1|2|3 3 3|4 4 4|5 5 5
|
|
// ^ ^
|
|
// 6 6 6
|
|
// And a removal of the extra space:
|
|
// 1 1 1|2|6 6 6|4 4 4|5 5 5
|
|
// ^ ^
|
|
// Resulting in:
|
|
// 1 1 1|2|6 6 6|5 5 5
|
|
//
|
|
//
|
|
// ## Implementation
|
|
//
|
|
// The need to calculate the exact space requirements before insertion of new or
|
|
// removal of existing runs requires us to have our steps in a specific order.
|
|
//
|
|
// [Step1]: Detect future adjacent runs.
|
|
// As this requires us to insert up to 2 runs less.
|
|
// For instance:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
// ^ ^
|
|
// 1 1
|
|
// = 1 1 1 1 1|2|3 3 3
|
|
// ^-- The first run was joined in place by increasing its length by 2.
|
|
// This continues in [Step7].
|
|
// [Step2]: Detect whether a run needs to be split in 2.
|
|
// As this requires us to insert 1 additional run.
|
|
// For instance:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
// ^^
|
|
// 1 1
|
|
// = 1 1 1|2|1 1|2|3 3 3
|
|
// ^-- An additional run was inserted.
|
|
// This continues in [Step5].
|
|
// [Step3]: Adjust the lengths of existing runs.
|
|
// For instance:
|
|
// 1 1 1|2 2 2|3 3 3
|
|
// ^ ^
|
|
// 3 3
|
|
// = 1|3 3|2 2 2|3 3 3
|
|
// ^-- The first existing run was shortened by 2.
|
|
// [Step4]: Copy over as many runs into the to-be-replaced range as possible.
|
|
// [Step5]: If we split up a run, we must copy in the trailing end now.
|
|
// [Step6.1]: If we still have any remaining extra space in the to-be-replaced range we need to remove it.
|
|
// [Step6.2]: Otherwise if the space wasn't enough we need to insert the remaining runs.
|
|
// [Step7]: Apply the additional lengths for adjacent runs.
|
|
// [Step8]: Recalculate the _total_length.
|
|
//
|
|
//
|
|
//
|
|
|
|
// TODO GH#10135: Ensure replacements contains no runs with .length == 0.
|
|
|
|
rle_scanner scanner{ _runs.begin(), _runs.end() };
|
|
auto [begin, begin_pos] = scanner.scan(start_index);
|
|
auto [end, end_pos] = scanner.scan(end_index);
|
|
|
|
// This condition handles pure removals, where replacements.size() == 0.
|
|
//
|
|
// But this isn't just a shortcut optimization...
|
|
// The remaining code in this function assumes that replacements.size() != 0
|
|
// and will happily access replacements.front()/.back() for instance.
|
|
// Otherwise the logic within this if condition is identical to the rest of this function.
|
|
//
|
|
// NOTE:
|
|
// Optimally the remaining code in this method should be made compatible with empty replacements.
|
|
// Especially since this logic is extremely similar to the one below for non-empty replacements.
|
|
if (replacements.empty())
|
|
{
|
|
const size_type removed = end_index - start_index;
|
|
|
|
if (start_index != 0 && end_index != _total_length)
|
|
{
|
|
const auto previous = begin_pos ? begin : begin - 1;
|
|
if (previous->value == end->value)
|
|
{
|
|
end->length -= end_pos - (begin_pos ? begin_pos : previous->length);
|
|
begin_pos = 0;
|
|
end_pos = 0;
|
|
begin = previous;
|
|
}
|
|
}
|
|
|
|
if (begin_pos)
|
|
{
|
|
begin->length = begin_pos;
|
|
++begin;
|
|
}
|
|
if (end_pos)
|
|
{
|
|
end->length -= end_pos;
|
|
}
|
|
|
|
_runs.erase(begin, end);
|
|
_total_length -= removed;
|
|
return;
|
|
}
|
|
|
|
// [Step1]
|
|
size_type begin_additional_length = 0;
|
|
size_type end_additional_length = 0;
|
|
if (start_index != 0)
|
|
{
|
|
const auto previous = begin_pos ? begin : begin - 1;
|
|
if (previous->value == replacements.front().value)
|
|
{
|
|
begin_additional_length = begin_pos ? begin_pos : previous->length;
|
|
begin_pos = 0;
|
|
begin = previous;
|
|
}
|
|
}
|
|
if (end_index != _total_length)
|
|
{
|
|
// end already points 1 item past "end_index".
|
|
// --> No need for something analogue to "previous" above.
|
|
if (end->value == replacements.back().value)
|
|
{
|
|
end_additional_length = end->length - end_pos;
|
|
end_pos = 0;
|
|
++end;
|
|
}
|
|
}
|
|
|
|
// [Step2]
|
|
std::optional<rle_type> mid_insertion_trailer;
|
|
if (begin == end && begin_pos != 0)
|
|
{
|
|
mid_insertion_trailer.emplace(begin->value, static_cast<size_type>(begin->length - end_pos));
|
|
// mid_insertion_trailer contains the element that will be inserted past
|
|
// the to-be-replaced range. We must ensure that we don't accidentally
|
|
// adjust the length of an unrelated run and thus set end_post to 0.
|
|
end_pos = 0;
|
|
}
|
|
|
|
// [Step3]
|
|
if (begin_pos)
|
|
{
|
|
begin->length = begin_pos;
|
|
// begin is part of the to-be-replaced range.
|
|
// We've used the run begin is pointing to adjust it's length.
|
|
// --> We must increment it in order to not overwrite it in [Step4].
|
|
++begin;
|
|
}
|
|
if (end_pos)
|
|
{
|
|
// Similarly to before we must adjust the length,
|
|
// but this time we don't need to decrement end, as it's
|
|
// already pointing past the to-be-replaced range anyways.
|
|
end->length -= end_pos;
|
|
}
|
|
|
|
// NOTE: It's possible for begin > end, as we increment begin in [Step3].
|
|
const size_t available_space = begin < end ? end - begin : 0;
|
|
const size_t required_space = replacements.size() + (mid_insertion_trailer ? 1 : 0);
|
|
const auto begin_index = begin - _runs.begin();
|
|
const auto replacements_begin = replacements.begin();
|
|
const auto replacements_end = replacements.end();
|
|
|
|
// [Step4]
|
|
const auto direct_copy_end = replacements_begin + std::min(available_space, replacements.size());
|
|
begin = std::copy(replacements_begin, direct_copy_end, begin);
|
|
|
|
if (available_space >= required_space)
|
|
{
|
|
// [Step6.1]
|
|
_runs.erase(begin, end);
|
|
}
|
|
else
|
|
{
|
|
if (mid_insertion_trailer)
|
|
{
|
|
// Unfortunately there's no efficient way to express "insert an iterator range
|
|
// plus one extra element at the end" with standard vector containers.
|
|
// --> First make some space for N+1 elements using default initialization.
|
|
// Then insert the new runs and finally the mid_insertion_trailer.
|
|
_runs.insert(begin, required_space - available_space, {});
|
|
// [Step6.2]
|
|
begin = std::copy(direct_copy_end, replacements_end, _runs.begin() + begin_index);
|
|
// [Step5]
|
|
*begin = *std::move(mid_insertion_trailer);
|
|
}
|
|
else
|
|
{
|
|
// [Step6.2]
|
|
_runs.insert(begin, direct_copy_end, replacements_end);
|
|
}
|
|
}
|
|
|
|
// [Step7]
|
|
if (begin_additional_length)
|
|
{
|
|
begin = _runs.begin() + begin_index;
|
|
begin->length += begin_additional_length;
|
|
}
|
|
if (end_additional_length)
|
|
{
|
|
end = _runs.begin() + begin_index + required_space - 1;
|
|
end->length += end_additional_length;
|
|
}
|
|
|
|
// [Step8]
|
|
_total_length -= end_index - start_index;
|
|
for (const auto& run : replacements)
|
|
{
|
|
_total_length += run.length;
|
|
}
|
|
}
|
|
|
|
container _runs;
|
|
S _total_length{ 0 };
|
|
|
|
#ifdef UNIT_TESTING
|
|
friend class ::RunLengthEncodingTests;
|
|
#endif
|
|
};
|
|
|
|
template<typename T, typename S = std::size_t>
|
|
using rle = basic_rle<T, S, std::vector<rle_pair<T, S>>>;
|
|
|
|
#ifdef BOOST_CONTAINER_CONTAINER_SMALL_VECTOR_HPP
|
|
template<typename T, typename S = std::size_t, std::size_t N = 1>
|
|
using small_rle = basic_rle<T, S, boost::container::small_vector<rle_pair<T, S>, N>>;
|
|
#endif
|
|
};
|
|
|
|
#ifdef __WEX_COMMON_H__
|
|
namespace WEX::TestExecution
|
|
{
|
|
template<typename T, typename S, typename Container>
|
|
class VerifyOutputTraits<::til::basic_rle<T, S, Container>>
|
|
{
|
|
using rle_vector = ::til::basic_rle<T, S, Container>;
|
|
|
|
public:
|
|
static WEX::Common::NoThrowString ToString(const rle_vector& object)
|
|
{
|
|
return WEX::Common::NoThrowString(object.to_string().c_str());
|
|
}
|
|
};
|
|
|
|
template<typename T, typename S, typename Container>
|
|
class VerifyCompareTraits<::til::basic_rle<T, S, Container>, ::til::basic_rle<T, S, Container>>
|
|
{
|
|
using rle_vector = ::til::basic_rle<T, S, Container>;
|
|
|
|
public:
|
|
static bool AreEqual(const rle_vector& expected, const rle_vector& actual) noexcept
|
|
{
|
|
return expected == actual;
|
|
}
|
|
|
|
static bool AreSame(const rle_vector& expected, const rle_vector& actual) noexcept
|
|
{
|
|
return &expected == &actual;
|
|
}
|
|
|
|
static bool IsLessThan(const rle_vector& expectedLess, const rle_vector& expectedGreater) = delete;
|
|
static bool IsGreaterThan(const rle_vector& expectedGreater, const rle_vector& expectedLess) = delete;
|
|
static bool IsNull(const rle_vector& object) = delete;
|
|
};
|
|
};
|
|
#endif
|