0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-11-16 15:00:51 +01:00

ircd: Implement unsigned LEB128.

This commit is contained in:
Jason Volk 2019-10-15 16:45:57 -07:00
parent ac9b834eca
commit 62f2f5ea8d
2 changed files with 110 additions and 0 deletions

View file

@ -19,6 +19,7 @@
#include "vector_view.h" #include "vector_view.h"
#include "byte_view.h" #include "byte_view.h"
#include "buffer/buffer.h" #include "buffer/buffer.h"
#include "leb128.h"
#include "allocator.h" #include "allocator.h"
#include "util/util.h" #include "util/util.h"
#include "exception.h" #include "exception.h"

109
include/ircd/leb128.h Normal file
View file

@ -0,0 +1,109 @@
// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_LEB128_H
/// Little Endian Base 128 (unsigned) tool suite.
namespace ircd::uleb128
{
template<class T> constexpr size_t len(const T &) noexcept;
template<class T> constexpr T encode(T) noexcept;
template<class T> constexpr T decode(T) noexcept;
}
/// Generic template to decode an unsigned LEB128. For constexprs this
/// produces zero code. Inlined it is branchless, and reasonable. Unfortunately
/// too much unrolling can be unwieldy for inlining, but the use cases tend to
/// be very high in call frequency to decode many bytes: this is why we have
/// some template specializations with platform-specific optimizations;
/// otherwise, this template is the default.
///
/// Note that the input can contain junk above the encoded integer, which will
/// be ignored. Decoding starts at the first byte of the input (regardless of
/// type T) and continues until the first byte which has its MSB clear (limited
/// by the size of the type T); bytes after the terminating byte are ignored.
template<class T>
inline constexpr T
ircd::uleb128::decode(T val)
noexcept
{
constexpr const T control_mask {0x80}, content_mask {0x7F};
T ret(0); // destination
uint8_t flag(content_mask);
for(size_t i(0); i < sizeof(T); ++i)
{
// Sample the 7 bits of content into b. b will be zero if the control
// flag was previously cleared, otherwise the control flag is the mask
// for the 7 bits.
T b(val & flag);
// test if the 8th bit is zero or one; if zero, this was the last byte
// and the control flag is cleared.
flag &= ~(val & control_mask);
// Consume this byte off the source data.
val >>= 8;
// Shift the acquired content to its final destination offset.
b <<= 7 * i;
// Merge this byte with the destination.
ret |= b;
}
return ret;
}
template<class T>
inline constexpr T
ircd::uleb128::encode(T val)
noexcept
{
constexpr const T content_mask {0x7F};
T ret(0); // destination
for(size_t i(0); i < sizeof(T); ++i)
{
// Sample the lowest 7 bits of the input.
T b(val & content_mask);
// Consume 7 bits off the input.
val >>= 7;
// Set the high order bit on this byte if the input still has more
// left to encode after this iteration.
b |= T(bool(val)) << 7;
// Shift the content to its final destination offset.
b <<= 8 * i;
// Merge this byte with destination.
ret |= b;
}
return ret;
}
template<class T>
inline constexpr size_t
ircd::uleb128::len(const T &val)
noexcept
{
constexpr const T control_mask {0x80};
size_t i(0);
for(; i < sizeof(T); ++i)
if(~val & (control_mask << (i * 8)))
break;
return i + 1;
}