mirror of
https://github.com/matrix-construct/construct
synced 2024-12-27 07:54:05 +01:00
ircd::simt: Simplify various loop/mask conditionals; minor cleanup.
This commit is contained in:
parent
1e08339955
commit
de166dfab8
2 changed files with 11 additions and 17 deletions
|
@ -20,16 +20,11 @@ ircd_simt_math_mean_f4lldr(__local float4 *const restrict out,
|
||||||
ircd_simt_reduce_add_f4lldr(out, num, i);
|
ircd_simt_reduce_add_f4lldr(out, num, i);
|
||||||
|
|
||||||
if(i == 0)
|
if(i == 0)
|
||||||
{
|
for(uint k = 1; k < 4; ++k)
|
||||||
float numerator = 0.0f;
|
out[i][0] += out[i][k];
|
||||||
float4 numeratorv = out[i];
|
|
||||||
for(uint k = 0; k < 4; ++k)
|
|
||||||
numerator += numeratorv[k];
|
|
||||||
|
|
||||||
out[i] = numerator;
|
if(i == 0)
|
||||||
}
|
out[i] = out[i][0] / (num * 4);
|
||||||
|
|
||||||
ircd_simt_broadcast_f4lldr(out, num, i);
|
ircd_simt_broadcast_f4lldr(out, num, i);
|
||||||
const float4 numeratorv = out[i];
|
|
||||||
out[i] = numeratorv / (num * 4);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,15 +18,14 @@ ircd_simt_sort_idx16_flldr(__local ushort *const idx,
|
||||||
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
||||||
{
|
{
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
if(li >= stride || val[idx[li]] >= val[idx[li + stride]])
|
||||||
|
continue;
|
||||||
|
|
||||||
if(li < stride && val[idx[li]] < val[idx[li + stride]])
|
const ushort
|
||||||
{
|
ours = idx[li],
|
||||||
const ushort
|
theirs = idx[li + stride];
|
||||||
ours = idx[li],
|
|
||||||
theirs = idx[li + stride];
|
|
||||||
|
|
||||||
idx[li] = theirs;
|
idx[li] = theirs;
|
||||||
idx[li + stride] = ours;
|
idx[li + stride] = ours;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue