// The Construct // // Copyright (C) The Construct Developers, Authors & Contributors // Copyright (C) 2016-2020 Jason Volk // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted, provided that the above // copyright notice and this permission notice is present in all copies. The // full license for this software is available in the LICENSE file. #pragma once #define HAVE_IRCD_SIMD_SHIFT_H // xmmx shifter interface namespace ircd::simd { template typename std::enable_if::type shl(const T a) noexcept; template typename std::enable_if::type shl(const T a) noexcept; template typename std::enable_if::type shl(const T a) noexcept; template typename std::enable_if::type shr(const T a) noexcept; template typename std::enable_if::type shr(const T a) noexcept; template typename std::enable_if::type shr(const T a) noexcept; } // xmmx shifter fallback template (internal) namespace ircd::simd { template T _shl(const T) noexcept; template T _shr(const T) noexcept; } #ifdef HAVE_X86INTRIN_H template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shr(const T a) noexcept { static_assert ( b % 8 == 0, "xmmx register only shifts right at bytewise resolution." ); return T(_mm_bsrli_si128(u128x1(a), b / 8)); } #endif #ifdef HAVE_X86INTRIN_H template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shl(const T a) noexcept { static_assert ( b % 8 == 0, "xmmx register only shifts left at bytewise resolution." ); return T(_mm_bslli_si128(u128x1(a), b / 8)); } #endif #if defined(HAVE_X86INTRIN_H) && defined(__AVX2__) template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shr(const T a) noexcept { static_assert ( b % 8 == 0, "ymmx register only shifts right at bytewise resolution." ); return T(_mm256_srli_si256(u256x1(a), b / 8)); } #else template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shr(const T a) noexcept { return _shr(a); } #endif #if defined(HAVE_X86INTRIN_H) && defined(__AVX2__) template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shl(const T a) noexcept { static_assert ( b % 8 == 0, "ymmx register only shifts left at bytewise resolution." ); return T(_mm256_slli_si256(u256x1(a), b / 8)); } #else template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shl(const T a) noexcept { return _shl(a); } #endif template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shr(const T a) noexcept { return _shr(a); } template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shl(const T a) noexcept { return _shl(a); } template [[using gnu: always_inline, gnu_inline, artificial]] extern inline T ircd::simd::_shr(const T a) noexcept { static_assert ( sizeof(T) == sizeof(V) ); static_assert ( b % 8 == 0, "[emulated] xmmx register only shifts right at bytewise resolution." ); constexpr int B { b / 8 }; V ret; for(size_t i(0); i < sizeof(V) - B; ++i) ret[i] = a[i + B]; for(size_t i(sizeof(V) - B); i < sizeof(V); ++i) ret[i] = 0; return T(ret); } template [[using gnu: always_inline, gnu_inline, artificial]] extern inline T ircd::simd::_shl(const T a) noexcept { static_assert ( sizeof(T) == sizeof(V) ); static_assert ( b % 8 == 0, "[emulated] xmmx register only shifts left at bytewise resolution." ); constexpr int B { b / 8 }; V ret; for(size_t i(0); i < B; ++i) ret[i] = 0; for(size_t i(B); i < sizeof(V); ++i) ret[i] = a[i - B]; return T(ret); }