0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-11-25 08:12:37 +01:00

ircd::simt: Move workgroup ident values to arguments.

This commit is contained in:
Jason Volk 2021-11-05 19:26:39 -07:00
parent b6207fac74
commit a85f192066
6 changed files with 22 additions and 34 deletions

View file

@ -15,12 +15,10 @@
/// Broadcast originating from the local leader (index [0]). All threads in the
/// group participate.
inline void
ircd_simt_broadcast_f4lldr(__local float4 *const buf)
ircd_simt_broadcast_f4lldr(__local float4 *const buf,
const uint ln,
const uint li)
{
const uint
li = get_local_id(0),
ln = get_local_size(0);
for(uint stride = 1; stride < ln; stride <<= 1)
{
if(li < stride)

View file

@ -31,18 +31,16 @@ struct ircd_math_mean
///
inline void
ircd_simt_math_mean_f4lldr(__local float4 *const restrict out,
__local const float4 *const restrict in)
__local const float4 *const restrict in,
const uint ln,
const uint li)
{
const uint
li = get_local_id(0),
ln = get_local_size(0);
out[li] = in[li];
ircd_simt_reduce_add_f4lldr(out);
ircd_simt_reduce_add_f4lldr(out, ln, li);
if(li == 0)
out[li] = ircd_simt_reduce_add_f4(out[li]) / (ln * 4);
ircd_simt_broadcast_f4lldr(out);
ircd_simt_broadcast_f4lldr(out, ln, li);
}
#endif

View file

@ -17,19 +17,17 @@
inline void
ircd_simt_math_norm_f4lldr(__local float4 *const out,
__local const float4 *const in,
__local float4 *const restrict tmp)
__local float4 *const restrict tmp,
const uint ln,
const uint li)
{
const uint
li = get_local_id(0),
ln = get_local_size(0);
ircd_simt_math_mean_f4lldr(tmp, in);
ircd_simt_math_mean_f4lldr(tmp, in, ln, li);
const float4
sub_mean = in[li] - tmp[li];
tmp[li] = pow(sub_mean, 2);
ircd_simt_math_mean_f4lldr(out, tmp);
ircd_simt_math_mean_f4lldr(out, tmp, ln, li);
const float4
epsilon = 0.00001f,

View file

@ -15,12 +15,10 @@
/// Sum all elements in the buffer. All threads in the group participate;
/// result is placed in index [0], the rest of the buffer is trashed.
inline void
ircd_simt_reduce_add_f4lldr(__local float4 *const buf)
ircd_simt_reduce_add_f4lldr(__local float4 *const buf,
const uint ln,
const uint li)
{
const uint
li = get_local_id(0),
ln = get_local_size(0);
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
{
barrier(CLK_LOCAL_MEM_FENCE);

View file

@ -16,12 +16,10 @@
/// the greatest value is placed in index [0], the rest of the buffer is
/// trashed.
inline void
ircd_simt_reduce_max_flldr(__local float *const buf)
ircd_simt_reduce_max_flldr(__local float *const buf,
const uint ln,
const uint li)
{
const uint
li = get_local_id(0),
ln = get_local_size(0);
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
{
barrier(CLK_LOCAL_MEM_FENCE);

View file

@ -59,12 +59,10 @@ ircd_simt_sort_idx16_trick(__local ushort *const idx,
/// Sort indices in `idx` which point to values contained in `val`.
inline void
ircd_simt_sort_idx16_flldr(__local ushort *const idx,
__global const float *const val)
__global const float *const val,
const uint ln,
const uint li)
{
const uint
li = get_local_id(0),
ln = get_local_size(0);
for(uint up = 1; up < ln; up <<= 1)
{
const bool