2021-03-15 23:11:44 +01:00
|
|
|
// Matrix Construct
|
|
|
|
//
|
|
|
|
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
|
|
|
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
|
|
|
//
|
|
|
|
// Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
|
|
// copyright notice and this permission notice is present in all copies. The
|
|
|
|
// full license for this software is available in the LICENSE file.
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
#define HAVE_IRCD_MATH_FMMA_H
|
|
|
|
|
|
|
|
namespace ircd::math
|
|
|
|
{
|
2021-04-04 21:52:26 +02:00
|
|
|
template<class T>
|
|
|
|
void fmma(T *, const T *, const T *, const struct fmma_opts &);
|
2021-03-15 23:11:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Options for the template.
|
|
|
|
struct ircd::math::fmma_opts
|
|
|
|
{
|
|
|
|
size_t cols { 0 };
|
|
|
|
size_t rows { 0 };
|
|
|
|
size_t tiles { 1 };
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Fused Matrix-Multiply & Accumulate
|
|
|
|
/// clang11 FMA vfmadd213ps/vfmadd231ps
|
|
|
|
/// clang11 FMA4 vfmaddps
|
2021-04-04 21:52:26 +02:00
|
|
|
template<class T>
|
2021-03-15 23:11:44 +01:00
|
|
|
inline void
|
|
|
|
ircd::math::fmma(T *const __restrict__ out,
|
|
|
|
const T *const __restrict__ in,
|
|
|
|
const T *const __restrict__ weight,
|
2021-04-04 21:52:26 +02:00
|
|
|
const struct fmma_opts &opts)
|
2021-03-15 23:11:44 +01:00
|
|
|
{
|
2021-04-04 21:52:26 +02:00
|
|
|
const auto
|
|
|
|
&cols{opts.cols},
|
|
|
|
&rows{opts.rows},
|
2021-03-15 23:11:44 +01:00
|
|
|
&tiles{opts.tiles},
|
|
|
|
&lanes{simd::lanes<T>()};
|
|
|
|
|
|
|
|
const auto width
|
|
|
|
{
|
|
|
|
cols / lanes / tiles
|
|
|
|
};
|
|
|
|
|
|
|
|
const auto height
|
|
|
|
{
|
|
|
|
rows / lanes
|
|
|
|
};
|
|
|
|
|
|
|
|
for(uint i(0); i < width; i++)
|
|
|
|
for(uint j(0); j < height; j++)
|
|
|
|
for(uint t(0); t < tiles; ++t)
|
|
|
|
for(uint l(0); l < lanes; ++l)
|
|
|
|
{
|
|
|
|
const auto x
|
|
|
|
{
|
|
|
|
i * tiles + t
|
|
|
|
};
|
|
|
|
|
|
|
|
const auto y
|
|
|
|
{
|
|
|
|
x * lanes + l
|
|
|
|
};
|
|
|
|
|
|
|
|
const T mul
|
|
|
|
{
|
|
|
|
in[x][l] * weight[y * height + j]
|
|
|
|
};
|
|
|
|
|
|
|
|
out[j] += mul;
|
|
|
|
}
|
|
|
|
}
|