mirror of
https://github.com/matrix-construct/construct
synced 2024-11-25 08:12:37 +01:00
ircd::math: Simplify fmma template.
This commit is contained in:
parent
e04d734959
commit
a476df4ca1
1 changed files with 7 additions and 13 deletions
|
@ -13,9 +13,8 @@
|
||||||
|
|
||||||
namespace ircd::math
|
namespace ircd::math
|
||||||
{
|
{
|
||||||
template<const struct fmma_opts &opts,
|
template<class T>
|
||||||
class T>
|
void fmma(T *, const T *, const T *, const struct fmma_opts &);
|
||||||
void fmma(T *, const T *, const T *, size_t = 0, size_t = 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Options for the template.
|
/// Options for the template.
|
||||||
|
@ -24,29 +23,24 @@ struct ircd::math::fmma_opts
|
||||||
size_t cols { 0 };
|
size_t cols { 0 };
|
||||||
size_t rows { 0 };
|
size_t rows { 0 };
|
||||||
size_t tiles { 1 };
|
size_t tiles { 1 };
|
||||||
char polarity { 'x' };
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Fused Matrix-Multiply & Accumulate
|
/// Fused Matrix-Multiply & Accumulate
|
||||||
/// clang11 FMA vfmadd213ps/vfmadd231ps
|
/// clang11 FMA vfmadd213ps/vfmadd231ps
|
||||||
/// clang11 FMA4 vfmaddps
|
/// clang11 FMA4 vfmaddps
|
||||||
template<const ircd::math::fmma_opts &opts,
|
template<class T>
|
||||||
class T>
|
|
||||||
inline void
|
inline void
|
||||||
ircd::math::fmma(T *const __restrict__ out,
|
ircd::math::fmma(T *const __restrict__ out,
|
||||||
const T *const __restrict__ in,
|
const T *const __restrict__ in,
|
||||||
const T *const __restrict__ weight,
|
const T *const __restrict__ weight,
|
||||||
size_t cols,
|
const struct fmma_opts &opts)
|
||||||
size_t rows)
|
|
||||||
{
|
{
|
||||||
static const auto
|
const auto
|
||||||
|
&cols{opts.cols},
|
||||||
|
&rows{opts.rows},
|
||||||
&tiles{opts.tiles},
|
&tiles{opts.tiles},
|
||||||
&lanes{simd::lanes<T>()};
|
&lanes{simd::lanes<T>()};
|
||||||
|
|
||||||
cols = cols?: opts.cols;
|
|
||||||
rows = rows?: opts.rows;
|
|
||||||
std::swap(rows, opts.polarity == 'y'? cols: rows);
|
|
||||||
|
|
||||||
const auto width
|
const auto width
|
||||||
{
|
{
|
||||||
cols / lanes / tiles
|
cols / lanes / tiles
|
||||||
|
|
Loading…
Reference in a new issue