0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-09-28 19:58:53 +02:00

ircd::simt: Fix buffer and ordering hokey-pokey.

This commit is contained in:
Jason Volk 2022-01-07 10:59:31 -08:00
parent 781ead25b5
commit 12f3eb871d
2 changed files with 14 additions and 9 deletions

View file

@ -30,17 +30,21 @@ struct ircd_math_mean
/// ln = local group size /// ln = local group size
/// ///
inline void inline void
ircd_simt_math_mean_f4lldr(__local float4 *const restrict out, ircd_simt_math_mean_f4lldr(__local float4 *const buf,
__local const float4 *const restrict in,
const uint ln, const uint ln,
const uint li) const uint li)
{ {
out[li] = in[li]; ircd_simt_reduce_add_f4lldr(buf, ln, li);
ircd_simt_reduce_add_f4lldr(out, ln, li);
if(li == 0) if(li == 0)
out[li] = ircd_simt_reduce_add_f4(out[li]) / (ln * 4); {
const float
sum = ircd_simt_reduce_add_f4(buf[li]),
res = sum / (ln * 4);
ircd_simt_broadcast_f4lldr(out, ln, li); buf[li] = res;
}
ircd_simt_broadcast_f4lldr(buf, ln, li);
} }
#endif #endif

View file

@ -21,17 +21,18 @@ ircd_simt_math_norm_f4lldr(__local float4 *const out,
const uint ln, const uint ln,
const uint li) const uint li)
{ {
ircd_simt_math_mean_f4lldr(tmp, in, ln, li); tmp[li] = in[li];
ircd_simt_math_mean_f4lldr(tmp, ln, li);
const float4 const float4
sub_mean = in[li] - tmp[li]; sub_mean = in[li] - tmp[li];
tmp[li] = pow(sub_mean, 2); tmp[li] = pow(sub_mean, 2);
ircd_simt_math_mean_f4lldr(out, tmp, ln, li); ircd_simt_math_mean_f4lldr(tmp, ln, li);
const float4 const float4
epsilon = 0.00001f, epsilon = 0.00001f,
s = sqrt(out[li] + epsilon); s = sqrt(tmp[li] + epsilon);
out[li] = sub_mean / s; out[li] = sub_mean / s;
} }