0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-11-17 15:30:52 +01:00

ircd::simd: Add scatter template.

This commit is contained in:
Jason Volk 2020-09-20 06:25:29 -07:00
parent dc14f2f803
commit 4168f2899f
2 changed files with 61 additions and 0 deletions

View file

@ -0,0 +1,60 @@
// The Construct
//
// Copyright (C) The Construct Developers, Authors & Contributors
// Copyright (C) 2016-2020 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_SIMD_SCATTER_H
namespace ircd::simd
{
template<int scale = 1,
class value,
class index_vector,
class value_vector>
void scatter(value *, const index_vector, const u64, const value_vector) noexcept;
}
/// Scatter values to memory locations from the argument vector.
///
/// Each lane in the index vector corresponds to each lane in the operand vector.
/// Each bit in the mask corresponds to each lane as well.
///
template<int scale,
class value,
class index_vector,
class value_vector>
inline void
ircd::simd::scatter(value *const __restrict__ base,
const index_vector index,
const u64 mask,
const value_vector val)
noexcept
{
static_assert
(
lanes<index_vector>() == lanes<value_vector>()
);
const auto *const in
{
reinterpret_cast<const lane_type<value_vector> *>(&val)
};
const auto *const idx
{
reinterpret_cast<const lane_type<index_vector> *>(&index)
};
#if defined(__AVX512F__)
#pragma clang loop unroll(disable)
#endif
for(size_t i(0); i < lanes<index_vector>(); ++i)
if(mask & (1UL << i))
base[idx[i] * scale] = in[i];
}

View file

@ -21,6 +21,7 @@
#include "upper.h" #include "upper.h"
#include "mask.h" #include "mask.h"
#include "gather.h" #include "gather.h"
#include "scatter.h"
#include "shl.h" #include "shl.h"
#include "shr.h" #include "shr.h"
#include "rol.h" #include "rol.h"