ircd::simd: Add template for streaming tokenization.

2025-01-13 16:33:53 +01:00 · 2021-02-20 12:31:32 -08:00 · 2021-02-20 12:31:32 -08:00 · 75865b86ad
commit 75865b86ad
parent f35e0749dd
2 changed files with 97 additions and 0 deletions
--- a/include/ircd/simd/simd.h
+++ b/include/ircd/simd/simd.h
@ -38,6 +38,7 @@
 #include "transform.h"
 #include "generate.h"
 #include "accumulate.h"
+#include "tokens.h"
 #include "print.h"

 namespace ircd
--- a/include/ircd/simd/tokens.h
+++ b/include/ircd/simd/tokens.h
@ -0,0 +1,96 @@
+// Matrix Construct
+//
+// Copyright (C) Matrix Construct Developers, Authors & Contributors
+// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice is present in all copies. The
+// full license for this software is available in the LICENSE file.
+
+#pragma once
+#define HAVE_IRCD_SIMD_TOKENS_H
+
+namespace ircd::simd
+{
+	template<class token_block_t,
+             class input_block_t>
+	using tokens_prototype = u64x2 (token_block_t &, input_block_t, input_block_t mask);
+
+	template<class input_t,
+	         class block_t,
+	         class lambda>
+	u64x2 tokens(block_t *, const char *, const u64x2, lambda&&) noexcept;
+}
+
+/// Tokenize a character string input into fixed-width elements of the output
+/// array.
+///
+/// The input streams variably with character (byte) resolution; the return
+/// value for the input side of the closure advances the input one or more
+/// bytes for the next iteration.
+///
+/// The output streams variably with element (lane) resolution; the return
+/// value for the output side of the closure advances the output zero or more
+/// tokens at a time. Note iff the token type is u8 this value is a byte incr.
+///
+/// The closure performs the transformation for as many tokens as the block
+/// size allows for at a time. Closure returns the number of tokens written
+/// to the output block, and number of characters to consume off the input
+/// block for the next iteration.
+/// 
+template<class input_t,
+         class block_t,
+         class lambda>
+inline ircd::u64x2
+ircd::simd::tokens(block_t *const __restrict__ out_,
+                   const char *const __restrict__ in,
+                   const u64x2 max_,
+                   lambda&& closure)
+noexcept
+{
+	char *const __restrict__ &out
+	{
+		reinterpret_cast<char *__restrict__>(out_)
+	};
+
+	const u64x2 max
+	{
+		max_[0] * sizeof_lane<block_t>(),
+		max_[1]
+	};
+
+	assert(max[0] % sizeof(block_t) == 0);
+	const u64x2 consumed
+	{
+		transform<input_t>(out, in, max, [&closure]
+		(input_t &block, const auto mask)
+		{
+			const u64x2 res
+			{
+				closure(reinterpret_cast<block_t &>(block), block, mask)
+			};
+
+			// Closure returns the number of elems they filled of the output
+			// block; we convert that to bytes for the transform template.
+			assert(res[0] <= lanes<block_t>());
+			assert(res[1] <= sizeof(block_t));
+			return u64x2
+			{
+				res[0] * sizeof_lane<block_t>(),
+				res[1],
+			};
+		})
+	};
+
+	assert(consumed[0] % sizeof_lane<block_t>() == 0);
+	const u64x2 ret
+	{
+		consumed[0] / sizeof_lane<block_t>(),
+		std::min(consumed[1], max[1]),
+	};
+
+	assert(ret[0] <= max_[0]);
+	assert(ret[1] <= max_[1]);
+	return ret;
+}