From aec21af0e409e225448f0c636ea70bf9c5144387 Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Mon, 29 Jun 2020 20:15:11 -0700 Subject: [PATCH] ircd::simd: Add population mask convenience template. --- include/ircd/simd/simd.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/include/ircd/simd/simd.h b/include/ircd/simd/simd.h index 18e4cd8fd..c0c9aa550 100644 --- a/include/ircd/simd/simd.h +++ b/include/ircd/simd/simd.h @@ -18,6 +18,9 @@ namespace ircd::simd { + template T popmask(const T) noexcept; + template size_t popcount(const T) noexcept; + // xmmx shifter workarounds template T shl(const T &a) noexcept; template T shr(const T &a) noexcept; @@ -34,6 +37,32 @@ namespace ircd using simd::lane_cast; } +/// Convenience template. Vector compare instructions yield 0xff on equal; +/// sometimes one might need an actual value of 1 for accumulators or maybe +/// some bool-type reason... +template +inline size_t +ircd::simd::popcount(const T a) +noexcept +{ + size_t i(0), ret(0); + while(i < lanes(a)) + ret += __builtin_popcountll(a[i++]); + + return ret; +} + +/// Convenience template. Vector compare instructions yield 0xff on equal; +/// sometimes one might need an actual value of 1 for accumulators or maybe +/// some bool-type reason... +template +inline T +ircd::simd::popmask(const T a) +noexcept +{ + return a & 1; +} + #ifdef HAVE_X86INTRIN_H template [[using gnu: always_inline, gnu_inline, artificial]]