ircd: Implement unsigned LEB128.

2024-06-24 12:58:21 +02:00 · 2019-10-15 16:45:57 -07:00 · 2019-10-15 16:45:57 -07:00 · 62f2f5ea8d
parent ac9b834eca
commit 62f2f5ea8d
2 changed files with 110 additions and 0 deletions
--- a/include/ircd/ircd.h
+++ b/include/ircd/ircd.h
@ -19,6 +19,7 @@
 #include "vector_view.h"
 #include "byte_view.h"
 #include "buffer/buffer.h"
+#include "leb128.h"
 #include "allocator.h"
 #include "util/util.h"
 #include "exception.h"
--- a/include/ircd/leb128.h
+++ b/include/ircd/leb128.h
@ -0,0 +1,109 @@
+// Matrix Construct
+//
+// Copyright (C) Matrix Construct Developers, Authors & Contributors
+// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice is present in all copies. The
+// full license for this software is available in the LICENSE file.
+
+#pragma once
+#define HAVE_IRCD_LEB128_H
+
+/// Little Endian Base 128 (unsigned) tool suite.
+namespace ircd::uleb128
+{
+	template<class T> constexpr size_t len(const T &) noexcept;
+	template<class T> constexpr T encode(T) noexcept;
+	template<class T> constexpr T decode(T) noexcept;
+}
+
+/// Generic template to decode an unsigned LEB128. For constexprs this
+/// produces zero code. Inlined it is branchless, and reasonable. Unfortunately
+/// too much unrolling can be unwieldy for inlining, but the use cases tend to
+/// be very high in call frequency to decode many bytes: this is why we have
+/// some template specializations with platform-specific optimizations;
+/// otherwise, this template is the default.
+///
+/// Note that the input can contain junk above the encoded integer, which will
+/// be ignored. Decoding starts at the first byte of the input (regardless of
+/// type T) and continues until the first byte which has its MSB clear (limited
+/// by the size of the type T); bytes after the terminating byte are ignored.
+template<class T>
+inline constexpr T
+ircd::uleb128::decode(T val)
+noexcept
+{
+	constexpr const T control_mask {0x80}, content_mask {0x7F};
+
+	T ret(0); // destination
+	uint8_t flag(content_mask);
+	for(size_t i(0); i < sizeof(T); ++i)
+	{
+		// Sample the 7 bits of content into b. b will be zero if the control
+		// flag was previously cleared, otherwise the control flag is the mask
+		// for the 7 bits.
+		T b(val & flag);
+
+		// test if the 8th bit is zero or one; if zero, this was the last byte
+		// and the control flag is cleared.
+		flag &= ~(val & control_mask);
+
+		// Consume this byte off the source data.
+		val >>= 8;
+
+		// Shift the acquired content to its final destination offset.
+		b <<= 7 * i;
+
+		// Merge this byte with the destination.
+		ret |= b;
+	}
+
+	return ret;
+}
+
+template<class T>
+inline constexpr T
+ircd::uleb128::encode(T val)
+noexcept
+{
+	constexpr const T content_mask {0x7F};
+
+	T ret(0); // destination
+	for(size_t i(0); i < sizeof(T); ++i)
+	{
+		// Sample the lowest 7 bits of the input.
+		T b(val & content_mask);
+
+		// Consume 7 bits off the input.
+		val >>= 7;
+
+		// Set the high order bit on this byte if the input still has more
+		// left to encode after this iteration.
+		b |= T(bool(val)) << 7;
+
+		// Shift the content to its final destination offset.
+		b <<= 8 * i;
+
+		// Merge this byte with destination.
+		ret |= b;
+	}
+
+	return ret;
+}
+
+template<class T>
+inline constexpr size_t
+ircd::uleb128::len(const T &val)
+noexcept
+{
+	constexpr const T control_mask {0x80};
+
+	size_t i(0);
+	for(; i < sizeof(T); ++i)
+		if(~val & (control_mask << (i * 8)))
+			break;
+
+	return i + 1;
+}