mirror of
https://github.com/matrix-construct/construct
synced 2024-11-16 15:00:51 +01:00
ircd::simt: Fix buffer and ordering hokey-pokey.
This commit is contained in:
parent
781ead25b5
commit
12f3eb871d
2 changed files with 14 additions and 9 deletions
|
@ -30,17 +30,21 @@ struct ircd_math_mean
|
|||
/// ln = local group size
|
||||
///
|
||||
inline void
|
||||
ircd_simt_math_mean_f4lldr(__local float4 *const restrict out,
|
||||
__local const float4 *const restrict in,
|
||||
ircd_simt_math_mean_f4lldr(__local float4 *const buf,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
out[li] = in[li];
|
||||
ircd_simt_reduce_add_f4lldr(out, ln, li);
|
||||
ircd_simt_reduce_add_f4lldr(buf, ln, li);
|
||||
|
||||
if(li == 0)
|
||||
out[li] = ircd_simt_reduce_add_f4(out[li]) / (ln * 4);
|
||||
{
|
||||
const float
|
||||
sum = ircd_simt_reduce_add_f4(buf[li]),
|
||||
res = sum / (ln * 4);
|
||||
|
||||
ircd_simt_broadcast_f4lldr(out, ln, li);
|
||||
buf[li] = res;
|
||||
}
|
||||
|
||||
ircd_simt_broadcast_f4lldr(buf, ln, li);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -21,17 +21,18 @@ ircd_simt_math_norm_f4lldr(__local float4 *const out,
|
|||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
ircd_simt_math_mean_f4lldr(tmp, in, ln, li);
|
||||
tmp[li] = in[li];
|
||||
ircd_simt_math_mean_f4lldr(tmp, ln, li);
|
||||
|
||||
const float4
|
||||
sub_mean = in[li] - tmp[li];
|
||||
|
||||
tmp[li] = pow(sub_mean, 2);
|
||||
ircd_simt_math_mean_f4lldr(out, tmp, ln, li);
|
||||
ircd_simt_math_mean_f4lldr(tmp, ln, li);
|
||||
|
||||
const float4
|
||||
epsilon = 0.00001f,
|
||||
s = sqrt(out[li] + epsilon);
|
||||
s = sqrt(tmp[li] + epsilon);
|
||||
|
||||
out[li] = sub_mean / s;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue