0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-11-25 08:12:37 +01:00

ircd::math: Simplify fmma template.

This commit is contained in:
Jason Volk 2021-04-04 12:52:26 -07:00
parent e04d734959
commit a476df4ca1

View file

@ -13,9 +13,8 @@
namespace ircd::math namespace ircd::math
{ {
template<const struct fmma_opts &opts, template<class T>
class T> void fmma(T *, const T *, const T *, const struct fmma_opts &);
void fmma(T *, const T *, const T *, size_t = 0, size_t = 0);
} }
/// Options for the template. /// Options for the template.
@ -24,29 +23,24 @@ struct ircd::math::fmma_opts
size_t cols { 0 }; size_t cols { 0 };
size_t rows { 0 }; size_t rows { 0 };
size_t tiles { 1 }; size_t tiles { 1 };
char polarity { 'x' };
}; };
/// Fused Matrix-Multiply & Accumulate /// Fused Matrix-Multiply & Accumulate
/// clang11 FMA vfmadd213ps/vfmadd231ps /// clang11 FMA vfmadd213ps/vfmadd231ps
/// clang11 FMA4 vfmaddps /// clang11 FMA4 vfmaddps
template<const ircd::math::fmma_opts &opts, template<class T>
class T>
inline void inline void
ircd::math::fmma(T *const __restrict__ out, ircd::math::fmma(T *const __restrict__ out,
const T *const __restrict__ in, const T *const __restrict__ in,
const T *const __restrict__ weight, const T *const __restrict__ weight,
size_t cols, const struct fmma_opts &opts)
size_t rows)
{ {
static const auto const auto
&cols{opts.cols},
&rows{opts.rows},
&tiles{opts.tiles}, &tiles{opts.tiles},
&lanes{simd::lanes<T>()}; &lanes{simd::lanes<T>()};
cols = cols?: opts.cols;
rows = rows?: opts.rows;
std::swap(rows, opts.polarity == 'y'? cols: rows);
const auto width const auto width
{ {
cols / lanes / tiles cols / lanes / tiles