From 6195be54a50b0338c9d7eb783eed42a813583a0e Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Mon, 6 Jul 2020 06:51:01 -0700 Subject: [PATCH] ircd::simd: Split and improve byte shifter interface. --- include/ircd/simd/shift.h | 104 ++++++++++++++++++++++++++++++++++++++ include/ircd/simd/simd.h | 75 +-------------------------- 2 files changed, 106 insertions(+), 73 deletions(-) create mode 100644 include/ircd/simd/shift.h diff --git a/include/ircd/simd/shift.h b/include/ircd/simd/shift.h new file mode 100644 index 000000000..3a8da0a41 --- /dev/null +++ b/include/ircd/simd/shift.h @@ -0,0 +1,104 @@ +// The Construct +// +// Copyright (C) The Construct Developers, Authors & Contributors +// Copyright (C) 2016-2020 Jason Volk +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice is present in all copies. The +// full license for this software is available in the LICENSE file. + +#pragma once +#define HAVE_IRCD_SIMD_SHIFT_H + +// xmmx shifter workarounds +namespace ircd::simd +{ + template + typename std::enable_if::type + shl(const T a) noexcept; + + template + typename std::enable_if::type + shl(const T a) noexcept; + + template + typename std::enable_if::type + shr(const T a) noexcept; + + template + typename std::enable_if::type + shr(const T a) noexcept; +} + +#ifdef HAVE_X86INTRIN_H +template +[[using gnu: always_inline, gnu_inline, artificial]] +extern inline typename std::enable_if::type +ircd::simd::shr(const T a) +noexcept +{ + static_assert + ( + b % 8 == 0, "xmmx register only shifts right at bytewise resolution." + ); + + return _mm_bsrli_si128(a, b / 8); +} +#endif + +#ifdef HAVE_X86INTRIN_H +template +[[using gnu: always_inline, gnu_inline, artificial]] +extern inline typename std::enable_if::type +ircd::simd::shl(const T a) +noexcept +{ + static_assert + ( + b % 8 == 0, "xmmx register only shifts left at bytewise resolution." + ); + + return _mm_bslli_si128(a, b / 8); +} +#endif + +#if defined(HAVE_X86INTRIN_H) && defined(__AVX2__) +template +[[using gnu: always_inline, gnu_inline, artificial]] +extern inline typename std::enable_if::type +ircd::simd::shr(const T a) +noexcept +{ + static_assert + ( + b % 8 == 0, "ymmx register only shifts right at bytewise resolution." + ); + + return _mm256_srli_si256(a, b / 8); +} +#endif + +#if defined(HAVE_X86INTRIN_H) && defined(__AVX2__) +template +[[using gnu: always_inline, gnu_inline, artificial]] +extern inline typename std::enable_if::type +ircd::simd::shl(const T a) +noexcept +{ + static_assert + ( + b % 8 == 0, "ymmx register only shifts left at bytewise resolution." + ); + + return _mm256_slli_si256(a, b / 8); +} +#endif diff --git a/include/ircd/simd/simd.h b/include/ircd/simd/simd.h index 67ea8b575..a2555e994 100644 --- a/include/ircd/simd/simd.h +++ b/include/ircd/simd/simd.h @@ -14,6 +14,7 @@ #include "type.h" #include "traits.h" #include "lane_cast.h" +#include "shift.h" #include "print.h" namespace ircd::simd @@ -24,87 +25,15 @@ namespace ircd::simd template uint popcount(const T) noexcept; template uint clz(const T) noexcept; template uint ctz(const T) noexcept; - - // xmmx shifter workarounds - template T shl(const T &a) noexcept; - template T shr(const T &a) noexcept; - template u128x1 shl(const u128x1 &a) noexcept; - template u128x1 shr(const u128x1 &a) noexcept; - template u256x1 shl(const u256x1 &a) noexcept; - template u256x1 shr(const u256x1 &a) noexcept; } namespace ircd { + using simd::lane_cast; using simd::shl; using simd::shr; - using simd::lane_cast; } -#ifdef HAVE_X86INTRIN_H -template -[[using gnu: always_inline, gnu_inline, artificial]] -extern inline ircd::u128x1 -ircd::simd::shr(const u128x1 &a) -noexcept -{ - static_assert - ( - b % 8 == 0, "xmmx register only shifts right at bytewise resolution." - ); - - return _mm_bsrli_si128(a, b / 8); -} -#endif - -#ifdef HAVE_X86INTRIN_H -template -[[using gnu: always_inline, gnu_inline, artificial]] -extern inline ircd::u128x1 -ircd::simd::shl(const u128x1 &a) -noexcept -{ - static_assert - ( - b % 8 == 0, "xmmx register only shifts right at bytewise resolution." - ); - - return _mm_bslli_si128(a, b / 8); -} -#endif - -#if defined(HAVE_X86INTRIN_H) && defined(__AVX2__) -template -[[using gnu: always_inline, gnu_inline, artificial]] -extern inline ircd::u256x1 -ircd::simd::shr(const u256x1 &a) -noexcept -{ - static_assert - ( - b % 8 == 0, "ymmx register only shifts right at bytewise resolution." - ); - - return _mm256_srli_si256(a, b / 8); -} -#endif - -#if defined(HAVE_X86INTRIN_H) && defined(__AVX2__) -template -[[using gnu: always_inline, gnu_inline, artificial]] -extern inline ircd::u256x1 -ircd::simd::shl(const u256x1 &a) -noexcept -{ - static_assert - ( - b % 8 == 0, "ymmx register only shifts right at bytewise resolution." - ); - - return _mm256_slli_si256(a, b / 8); -} -#endif - /// Convenience template. Unfortunately this drops to scalar until specific /// targets and specializations are created. template