mirror of
https://github.com/matrix-construct/construct
synced 2024-09-28 19:58:53 +02:00
ircd::simt: Fix buffer and ordering hokey-pokey.
This commit is contained in:
parent
781ead25b5
commit
12f3eb871d
2 changed files with 14 additions and 9 deletions
|
@ -30,17 +30,21 @@ struct ircd_math_mean
|
||||||
/// ln = local group size
|
/// ln = local group size
|
||||||
///
|
///
|
||||||
inline void
|
inline void
|
||||||
ircd_simt_math_mean_f4lldr(__local float4 *const restrict out,
|
ircd_simt_math_mean_f4lldr(__local float4 *const buf,
|
||||||
__local const float4 *const restrict in,
|
|
||||||
const uint ln,
|
const uint ln,
|
||||||
const uint li)
|
const uint li)
|
||||||
{
|
{
|
||||||
out[li] = in[li];
|
ircd_simt_reduce_add_f4lldr(buf, ln, li);
|
||||||
ircd_simt_reduce_add_f4lldr(out, ln, li);
|
|
||||||
|
|
||||||
if(li == 0)
|
if(li == 0)
|
||||||
out[li] = ircd_simt_reduce_add_f4(out[li]) / (ln * 4);
|
{
|
||||||
|
const float
|
||||||
|
sum = ircd_simt_reduce_add_f4(buf[li]),
|
||||||
|
res = sum / (ln * 4);
|
||||||
|
|
||||||
ircd_simt_broadcast_f4lldr(out, ln, li);
|
buf[li] = res;
|
||||||
|
}
|
||||||
|
|
||||||
|
ircd_simt_broadcast_f4lldr(buf, ln, li);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -21,17 +21,18 @@ ircd_simt_math_norm_f4lldr(__local float4 *const out,
|
||||||
const uint ln,
|
const uint ln,
|
||||||
const uint li)
|
const uint li)
|
||||||
{
|
{
|
||||||
ircd_simt_math_mean_f4lldr(tmp, in, ln, li);
|
tmp[li] = in[li];
|
||||||
|
ircd_simt_math_mean_f4lldr(tmp, ln, li);
|
||||||
|
|
||||||
const float4
|
const float4
|
||||||
sub_mean = in[li] - tmp[li];
|
sub_mean = in[li] - tmp[li];
|
||||||
|
|
||||||
tmp[li] = pow(sub_mean, 2);
|
tmp[li] = pow(sub_mean, 2);
|
||||||
ircd_simt_math_mean_f4lldr(out, tmp, ln, li);
|
ircd_simt_math_mean_f4lldr(tmp, ln, li);
|
||||||
|
|
||||||
const float4
|
const float4
|
||||||
epsilon = 0.00001f,
|
epsilon = 0.00001f,
|
||||||
s = sqrt(out[li] + epsilon);
|
s = sqrt(tmp[li] + epsilon);
|
||||||
|
|
||||||
out[li] = sub_mean / s;
|
out[li] = sub_mean / s;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue