diff --git a/include/ircd/ircd.h b/include/ircd/ircd.h index e9715de5e..df9687230 100644 --- a/include/ircd/ircd.h +++ b/include/ircd/ircd.h @@ -19,6 +19,7 @@ #include "vector_view.h" #include "byte_view.h" #include "buffer/buffer.h" +#include "leb128.h" #include "allocator.h" #include "util/util.h" #include "exception.h" diff --git a/include/ircd/leb128.h b/include/ircd/leb128.h new file mode 100644 index 000000000..56499b08d --- /dev/null +++ b/include/ircd/leb128.h @@ -0,0 +1,109 @@ +// Matrix Construct +// +// Copyright (C) Matrix Construct Developers, Authors & Contributors +// Copyright (C) 2016-2019 Jason Volk +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice is present in all copies. The +// full license for this software is available in the LICENSE file. + +#pragma once +#define HAVE_IRCD_LEB128_H + +/// Little Endian Base 128 (unsigned) tool suite. +namespace ircd::uleb128 +{ + template constexpr size_t len(const T &) noexcept; + template constexpr T encode(T) noexcept; + template constexpr T decode(T) noexcept; +} + +/// Generic template to decode an unsigned LEB128. For constexprs this +/// produces zero code. Inlined it is branchless, and reasonable. Unfortunately +/// too much unrolling can be unwieldy for inlining, but the use cases tend to +/// be very high in call frequency to decode many bytes: this is why we have +/// some template specializations with platform-specific optimizations; +/// otherwise, this template is the default. +/// +/// Note that the input can contain junk above the encoded integer, which will +/// be ignored. Decoding starts at the first byte of the input (regardless of +/// type T) and continues until the first byte which has its MSB clear (limited +/// by the size of the type T); bytes after the terminating byte are ignored. +template +inline constexpr T +ircd::uleb128::decode(T val) +noexcept +{ + constexpr const T control_mask {0x80}, content_mask {0x7F}; + + T ret(0); // destination + uint8_t flag(content_mask); + for(size_t i(0); i < sizeof(T); ++i) + { + // Sample the 7 bits of content into b. b will be zero if the control + // flag was previously cleared, otherwise the control flag is the mask + // for the 7 bits. + T b(val & flag); + + // test if the 8th bit is zero or one; if zero, this was the last byte + // and the control flag is cleared. + flag &= ~(val & control_mask); + + // Consume this byte off the source data. + val >>= 8; + + // Shift the acquired content to its final destination offset. + b <<= 7 * i; + + // Merge this byte with the destination. + ret |= b; + } + + return ret; +} + +template +inline constexpr T +ircd::uleb128::encode(T val) +noexcept +{ + constexpr const T content_mask {0x7F}; + + T ret(0); // destination + for(size_t i(0); i < sizeof(T); ++i) + { + // Sample the lowest 7 bits of the input. + T b(val & content_mask); + + // Consume 7 bits off the input. + val >>= 7; + + // Set the high order bit on this byte if the input still has more + // left to encode after this iteration. + b |= T(bool(val)) << 7; + + // Shift the content to its final destination offset. + b <<= 8 * i; + + // Merge this byte with destination. + ret |= b; + } + + return ret; +} + +template +inline constexpr size_t +ircd::uleb128::len(const T &val) +noexcept +{ + constexpr const T control_mask {0x80}; + + size_t i(0); + for(; i < sizeof(T); ++i) + if(~val & (control_mask << (i * 8))) + break; + + return i + 1; +}