// Matrix Construct // // Copyright (C) Matrix Construct Developers, Authors & Contributors // Copyright (C) 2016-2019 Jason Volk // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted, provided that the above // copyright notice and this permission notice is present in all copies. The // full license for this software is available in the LICENSE file. #pragma once #define HAVE_IRCD_LEB128_H /// Little Endian Base 128 (unsigned) tool suite. namespace ircd::uleb128 { template constexpr size_t length(const T &) noexcept; template<> size_t length(const uint128_t &) noexcept; template<> size_t length(const uint64_t &) noexcept; template constexpr T encode(T) noexcept; template constexpr T decode(T) noexcept; } /// Generic template to decode an unsigned LEB128. For constexprs this /// produces zero code. Inlined it is branchless, and reasonable. Unfortunately /// too much unrolling can be unwieldy for inlining when using larger word /// sizes, but the use cases tend to be very high in call frequency to decode /// many bytes: this is why we have some template specializations with /// platform-specific optimizations; otherwise, this template is the default. /// /// Note that the input can contain junk above the encoded integer, which will /// be ignored. Decoding starts at the first byte of the input (regardless of /// type T) and continues until the first byte which has its MSB clear (limited /// by the size of the type T); bytes after the terminating byte are ignored. template inline constexpr T ircd::uleb128::decode(T val) noexcept { constexpr const T control_mask {0x80}, content_mask {0x7F}; T ret(0); // destination uint8_t flag(content_mask); for(size_t i(0); i < sizeof(T); ++i) { // Sample the 7 bits of content into b. b will be zero if the control // flag was previously cleared, otherwise the control flag is the mask // for the 7 bits. T b(val & flag); // test if the 8th bit is zero or one; if zero, this was the last byte // and the control flag is cleared. flag &= ~(val & control_mask); // Consume this byte off the source data. val >>= 8; // Shift the acquired content to its final destination offset. b <<= 7 * i; // Merge this byte with the destination. ret |= b; } return ret; } /// Generic template to encode an unsigned LEB128 integer from native type T. /// Type T must be large enough to hold the result. For the common T=uint64_t /// the input cannot use more than 56 bits. There is no checking if the input /// value is too large for encoding. template inline constexpr T ircd::uleb128::encode(T val) noexcept { constexpr const T content_mask {0x7F}; T ret(0); // destination for(size_t i(0); i < sizeof(T); ++i) { // Sample the lowest 7 bits of the input. T b(val & content_mask); // Consume 7 bits off the input. val >>= 7; // Set the high order bit on this byte if the input still has more // left to encode after this iteration. b |= T(bool(val)) << 7; // Shift the content to its final destination offset. b <<= 8 * i; // Merge this byte with destination. ret |= b; } return ret; } /// The terminating/last byte for the encoded input is the least significant /// byte without its MSB set. We find that by inverting the mask and /// counting the trailing (least significant) zero bits; then add one for /// the terminating byte itself. Note doc sez if mask had all zero bits then /// the result of clz/ctz is undefined. #if defined(__MMX__) && !defined(RB_GENERIC) template<> inline size_t ircd::uleb128::length(const uint64_t &val) noexcept { static const int max_mask {0x0000007f}; const int mask { _mm_movemask_pi8(__m64(val)) & max_mask }; return __builtin_ctz(~mask) + 1; } #endif #if defined(__SSE2__) && !defined(RB_GENERIC) template<> inline size_t ircd::uleb128::length(const uint128_t &val) noexcept { static const int max_mask {0x00007fff}; const int mask { _mm_movemask_epi8(__m128i(val)) & max_mask }; return __builtin_ctz(~mask) + 1; } #endif /// Counts number of bytes of an LEB encoded integer contained in a word of /// type T. This is the length of the LEB encoding, not the decoded length. /// For large integers some template specializations generate optimized /// code which doesn't need to be unrolled; otherwise this template is the /// default naive loop (which generates zero code for constexprs). template inline constexpr size_t ircd::uleb128::length(const T &val) noexcept { constexpr const T control_mask {0x80}; size_t i(0); for(; i < sizeof(T) - 1; ++i) if(~val & (control_mask << (i * 8))) break; return i + 1; }