mirror of
https://github.com/matrix-construct/construct
synced 2024-11-25 08:12:37 +01:00
ircd::simt: Move workgroup ident values to arguments.
This commit is contained in:
parent
b6207fac74
commit
a85f192066
6 changed files with 22 additions and 34 deletions
|
@ -15,12 +15,10 @@
|
|||
/// Broadcast originating from the local leader (index [0]). All threads in the
|
||||
/// group participate.
|
||||
inline void
|
||||
ircd_simt_broadcast_f4lldr(__local float4 *const buf)
|
||||
ircd_simt_broadcast_f4lldr(__local float4 *const buf,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
const uint
|
||||
li = get_local_id(0),
|
||||
ln = get_local_size(0);
|
||||
|
||||
for(uint stride = 1; stride < ln; stride <<= 1)
|
||||
{
|
||||
if(li < stride)
|
||||
|
|
|
@ -31,18 +31,16 @@ struct ircd_math_mean
|
|||
///
|
||||
inline void
|
||||
ircd_simt_math_mean_f4lldr(__local float4 *const restrict out,
|
||||
__local const float4 *const restrict in)
|
||||
__local const float4 *const restrict in,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
const uint
|
||||
li = get_local_id(0),
|
||||
ln = get_local_size(0);
|
||||
|
||||
out[li] = in[li];
|
||||
ircd_simt_reduce_add_f4lldr(out);
|
||||
ircd_simt_reduce_add_f4lldr(out, ln, li);
|
||||
|
||||
if(li == 0)
|
||||
out[li] = ircd_simt_reduce_add_f4(out[li]) / (ln * 4);
|
||||
|
||||
ircd_simt_broadcast_f4lldr(out);
|
||||
ircd_simt_broadcast_f4lldr(out, ln, li);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -17,19 +17,17 @@
|
|||
inline void
|
||||
ircd_simt_math_norm_f4lldr(__local float4 *const out,
|
||||
__local const float4 *const in,
|
||||
__local float4 *const restrict tmp)
|
||||
__local float4 *const restrict tmp,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
const uint
|
||||
li = get_local_id(0),
|
||||
ln = get_local_size(0);
|
||||
|
||||
ircd_simt_math_mean_f4lldr(tmp, in);
|
||||
ircd_simt_math_mean_f4lldr(tmp, in, ln, li);
|
||||
|
||||
const float4
|
||||
sub_mean = in[li] - tmp[li];
|
||||
|
||||
tmp[li] = pow(sub_mean, 2);
|
||||
ircd_simt_math_mean_f4lldr(out, tmp);
|
||||
ircd_simt_math_mean_f4lldr(out, tmp, ln, li);
|
||||
|
||||
const float4
|
||||
epsilon = 0.00001f,
|
||||
|
|
|
@ -15,12 +15,10 @@
|
|||
/// Sum all elements in the buffer. All threads in the group participate;
|
||||
/// result is placed in index [0], the rest of the buffer is trashed.
|
||||
inline void
|
||||
ircd_simt_reduce_add_f4lldr(__local float4 *const buf)
|
||||
ircd_simt_reduce_add_f4lldr(__local float4 *const buf,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
const uint
|
||||
li = get_local_id(0),
|
||||
ln = get_local_size(0);
|
||||
|
||||
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
|
|
@ -16,12 +16,10 @@
|
|||
/// the greatest value is placed in index [0], the rest of the buffer is
|
||||
/// trashed.
|
||||
inline void
|
||||
ircd_simt_reduce_max_flldr(__local float *const buf)
|
||||
ircd_simt_reduce_max_flldr(__local float *const buf,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
const uint
|
||||
li = get_local_id(0),
|
||||
ln = get_local_size(0);
|
||||
|
||||
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
|
|
@ -59,12 +59,10 @@ ircd_simt_sort_idx16_trick(__local ushort *const idx,
|
|||
/// Sort indices in `idx` which point to values contained in `val`.
|
||||
inline void
|
||||
ircd_simt_sort_idx16_flldr(__local ushort *const idx,
|
||||
__global const float *const val)
|
||||
__global const float *const val,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
const uint
|
||||
li = get_local_id(0),
|
||||
ln = get_local_size(0);
|
||||
|
||||
for(uint up = 1; up < ln; up <<= 1)
|
||||
{
|
||||
const bool
|
||||
|
|
Loading…
Reference in a new issue