// The Construct // // Copyright (C) The Construct Developers, Authors & Contributors // Copyright (C) 2016-2020 Jason Volk // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted, provided that the above // copyright notice and this permission notice is present in all copies. The // full license for this software is available in the LICENSE file. #pragma once #define HAVE_IRCD_SIMD_SHIFT_H // xmmx shifter workarounds namespace ircd::simd { template typename std::enable_if::type shl(const T a) noexcept; template typename std::enable_if::type shl(const T a) noexcept; template typename std::enable_if::type shr(const T a) noexcept; template typename std::enable_if::type shr(const T a) noexcept; } #ifdef HAVE_X86INTRIN_H template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shr(const T a) noexcept { static_assert ( b % 8 == 0, "xmmx register only shifts right at bytewise resolution." ); return T(_mm_bsrli_si128(u128x1(a), b / 8)); } #endif #ifdef HAVE_X86INTRIN_H template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shl(const T a) noexcept { static_assert ( b % 8 == 0, "xmmx register only shifts left at bytewise resolution." ); return T(_mm_bslli_si128(u128x1(a), b / 8)); } #endif #if defined(HAVE_X86INTRIN_H) && defined(__AVX2__) template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shr(const T a) noexcept { static_assert ( b % 8 == 0, "ymmx register only shifts right at bytewise resolution." ); return T(_mm256_srli_si256(u256x1(a), b / 8)); } #endif #if defined(HAVE_X86INTRIN_H) && defined(__AVX2__) template [[using gnu: always_inline, gnu_inline, artificial]] extern inline typename std::enable_if::type ircd::simd::shl(const T a) noexcept { static_assert ( b % 8 == 0, "ymmx register only shifts left at bytewise resolution." ); return T(_mm256_slli_si256(u256x1(a), b / 8)); } #endif