From 291038a0145b5cc87e1d8004ac3b0ddc79dd9e13 Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Fri, 17 Jul 2020 06:39:52 -0700 Subject: [PATCH] ircd::simd: Portabilities for clz on various x86. --- include/ircd/simd/simd.h | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/include/ircd/simd/simd.h b/include/ircd/simd/simd.h index 5ee5c9773..d2f8b0dc8 100644 --- a/include/ircd/simd/simd.h +++ b/include/ircd/simd/simd.h @@ -43,6 +43,15 @@ __attribute__((target("lzcnt"))) ircd::simd::clz(const T a) noexcept { + constexpr auto uses_bsr + { + #ifndef __LZCNT__ + true + #else + false + #endif + }; + uint ret(0), i(0); do { const auto mask @@ -50,10 +59,16 @@ noexcept boolmask(uint(ret == sizeof_lane() * 8 * i)) }; - if constexpr(sizeof_lane() <= sizeof(u16)) + if constexpr(sizeof_lane() <= sizeof(u16) && uses_bsr) + ret += (15 - __lzcnt16(__builtin_bswap16(a[i++]))) & mask; + else if constexpr(sizeof_lane() <= sizeof(u16)) ret += __lzcnt16(__builtin_bswap16(a[i++])) & mask; + else if constexpr(sizeof_lane() <= sizeof(u32) && uses_bsr) + ret += (31 - __lzcnt32(__builtin_bswap32(a[i++]))) & mask; else if constexpr(sizeof_lane() <= sizeof(u32)) ret += __lzcnt32(__builtin_bswap32(a[i++])) & mask; + else if constexpr(uses_bsr) + ret += (63 - __lzcnt64(__builtin_bswap64(a[i++]))) & mask; else ret += __lzcnt64(__builtin_bswap64(a[i++])) & mask; } @@ -70,12 +85,27 @@ __attribute__((target("lzcnt"))) ircd::simd::ctz(const T a) noexcept { + constexpr auto uses_bsr + { + #ifndef __LZCNT__ + true + #else + false + #endif + }; + uint ret(0), i(lanes()), mask(-1U); do { - if constexpr(sizeof_lane() <= sizeof(u16)) + if constexpr(sizeof_lane() <= sizeof(u16) && uses_bsr) + ret += (15 - __lzcnt16(a[--i])) & mask; + else if constexpr(sizeof_lane() <= sizeof(u16)) ret += __lzcnt16(a[--i]) & mask; + else if constexpr(sizeof_lane() <= sizeof(u32) && uses_bsr) + ret += (31 - __lzcnt32(a[--i])) & mask; else if constexpr(sizeof_lane() <= sizeof(u32)) ret += __lzcnt32(a[--i]) & mask; + else if constexpr(uses_bsr) + ret += (63 - __lzcnt64(a[--i])) & mask; else ret += __lzcnt64(a[--i]) & mask;