// Matrix Construct // // Copyright (C) Matrix Construct Developers, Authors & Contributors // Copyright (C) 2016-2021 Jason Volk // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted, provided that the above // copyright notice and this permission notice is present in all copies. The // full license for this software is available in the LICENSE file. #pragma once #define HAVE_IRCD_SIMD_TOKENS_H namespace ircd::simd { template using tokens_prototype = u64x2 (token_block_t &, input_block_t, input_block_t mask); template u64x2 tokens(block_t *, const char *, const u64x2, lambda&&) noexcept; } /// Tokenize a character string input into fixed-width elements of the output /// array. /// /// The input streams variably with character (byte) resolution; the return /// value for the input side of the closure advances the input one or more /// bytes for the next iteration. /// /// The output streams variably with element (lane) resolution; the return /// value for the output side of the closure advances the output zero or more /// tokens at a time. Note iff the token type is u8 this value is a byte incr. /// /// The closure performs the transformation for as many tokens as the block /// size allows for at a time. Closure returns the number of tokens written /// to the output block, and number of characters to consume off the input /// block for the next iteration. /// template inline ircd::u64x2 ircd::simd::tokens(block_t *const __restrict__ out_, const char *const __restrict__ in, const u64x2 max_, lambda&& closure) noexcept { char *const __restrict__ &out { reinterpret_cast(out_) }; const u64x2 max { max_[0] * sizeof_lane(), max_[1] }; assert(max[0] % sizeof(block_t) == 0); const u64x2 consumed { transform(out, in, max, [&closure] (input_t &block, const auto mask) { const u64x2 res { closure(reinterpret_cast(block), block, mask) }; // Closure returns the number of elems they filled of the output // block; we convert that to bytes for the transform template. assert(res[0] <= lanes()); assert(res[1] <= sizeof(block_t)); return u64x2 { res[0] * sizeof_lane(), res[1], }; }) }; assert(consumed[0] % sizeof_lane() == 0); const u64x2 ret { consumed[0] / sizeof_lane(), std::min(consumed[1], max[1]), }; assert(ret[0] <= max_[0]); assert(ret[1] <= max_[1]); return ret; }