mirror of
https://github.com/matrix-construct/construct
synced 2024-11-25 00:02:34 +01:00
ircd::simt: Add optimizations from cl_khr_subgroups support.
This commit is contained in:
parent
0fb7577066
commit
47e54547f9
3 changed files with 29 additions and 0 deletions
|
@ -33,6 +33,14 @@ inline void
|
|||
ircd_simt_broadcast_flldr(__local float *const buf,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
#if defined(cl_khr_subgroups)
|
||||
{
|
||||
const float
|
||||
ret = work_group_broadcast(buf[0], 0);
|
||||
|
||||
buf[li] = ret;
|
||||
}
|
||||
#else
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
@ -40,3 +48,4 @@ ircd_simt_broadcast_flldr(__local float *const buf,
|
|||
buf[li] = buf[0];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -39,6 +39,15 @@ inline void
|
|||
ircd_simt_reduce_add_flldr(__local float *const buf,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
#if defined(cl_khr_subgroups)
|
||||
{
|
||||
const float
|
||||
ret = work_group_reduce_add(buf[li]);
|
||||
|
||||
if(li == 0)
|
||||
buf[li] = ret;
|
||||
}
|
||||
#else
|
||||
{
|
||||
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
||||
{
|
||||
|
@ -52,6 +61,7 @@ ircd_simt_reduce_add_flldr(__local float *const buf,
|
|||
buf[li] += buf[li + 2];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __OPENCL_VERSION__
|
||||
/// Sum all elements in the buffer. All threads in the group participate;
|
||||
|
|
|
@ -19,6 +19,15 @@ inline void
|
|||
ircd_simt_reduce_max_flldr(__local float *const buf,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
#if defined(cl_khr_subgroups)
|
||||
{
|
||||
const float
|
||||
ret = work_group_reduce_max(buf[li]);
|
||||
|
||||
if(li == 0)
|
||||
buf[li] = ret;
|
||||
}
|
||||
#else
|
||||
{
|
||||
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
||||
{
|
||||
|
@ -34,6 +43,7 @@ ircd_simt_reduce_max_flldr(__local float *const buf,
|
|||
buf[li] = buf[li + 2];
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __OPENCL_VERSION__
|
||||
inline void
|
||||
|
|
Loading…
Reference in a new issue