0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-11-16 15:00:51 +01:00

ircd::simt: Fix buffer and ordering hokey-pokey.

This commit is contained in:
Jason Volk 2022-01-07 10:59:31 -08:00
parent 781ead25b5
commit 12f3eb871d
2 changed files with 14 additions and 9 deletions

View file

@ -30,17 +30,21 @@ struct ircd_math_mean
/// ln = local group size
///
inline void
ircd_simt_math_mean_f4lldr(__local float4 *const restrict out,
__local const float4 *const restrict in,
ircd_simt_math_mean_f4lldr(__local float4 *const buf,
const uint ln,
const uint li)
{
out[li] = in[li];
ircd_simt_reduce_add_f4lldr(out, ln, li);
ircd_simt_reduce_add_f4lldr(buf, ln, li);
if(li == 0)
out[li] = ircd_simt_reduce_add_f4(out[li]) / (ln * 4);
{
const float
sum = ircd_simt_reduce_add_f4(buf[li]),
res = sum / (ln * 4);
ircd_simt_broadcast_f4lldr(out, ln, li);
buf[li] = res;
}
ircd_simt_broadcast_f4lldr(buf, ln, li);
}
#endif

View file

@ -21,17 +21,18 @@ ircd_simt_math_norm_f4lldr(__local float4 *const out,
const uint ln,
const uint li)
{
ircd_simt_math_mean_f4lldr(tmp, in, ln, li);
tmp[li] = in[li];
ircd_simt_math_mean_f4lldr(tmp, ln, li);
const float4
sub_mean = in[li] - tmp[li];
tmp[li] = pow(sub_mean, 2);
ircd_simt_math_mean_f4lldr(out, tmp, ln, li);
ircd_simt_math_mean_f4lldr(tmp, ln, li);
const float4
epsilon = 0.00001f,
s = sqrt(out[li] + epsilon);
s = sqrt(tmp[li] + epsilon);
out[li] = sub_mean / s;
}