mirror of
https://github.com/matrix-construct/construct
synced 2024-11-29 02:02:38 +01:00
ircd::simt: Add optimizations from cl_khr_subgroups support.
This commit is contained in:
parent
0fb7577066
commit
47e54547f9
3 changed files with 29 additions and 0 deletions
|
@ -33,6 +33,14 @@ inline void
|
||||||
ircd_simt_broadcast_flldr(__local float *const buf,
|
ircd_simt_broadcast_flldr(__local float *const buf,
|
||||||
const uint ln,
|
const uint ln,
|
||||||
const uint li)
|
const uint li)
|
||||||
|
#if defined(cl_khr_subgroups)
|
||||||
|
{
|
||||||
|
const float
|
||||||
|
ret = work_group_broadcast(buf[0], 0);
|
||||||
|
|
||||||
|
buf[li] = ret;
|
||||||
|
}
|
||||||
|
#else
|
||||||
{
|
{
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
|
@ -40,3 +48,4 @@ ircd_simt_broadcast_flldr(__local float *const buf,
|
||||||
buf[li] = buf[0];
|
buf[li] = buf[0];
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
|
@ -39,6 +39,15 @@ inline void
|
||||||
ircd_simt_reduce_add_flldr(__local float *const buf,
|
ircd_simt_reduce_add_flldr(__local float *const buf,
|
||||||
const uint ln,
|
const uint ln,
|
||||||
const uint li)
|
const uint li)
|
||||||
|
#if defined(cl_khr_subgroups)
|
||||||
|
{
|
||||||
|
const float
|
||||||
|
ret = work_group_reduce_add(buf[li]);
|
||||||
|
|
||||||
|
if(li == 0)
|
||||||
|
buf[li] = ret;
|
||||||
|
}
|
||||||
|
#else
|
||||||
{
|
{
|
||||||
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
||||||
{
|
{
|
||||||
|
@ -52,6 +61,7 @@ ircd_simt_reduce_add_flldr(__local float *const buf,
|
||||||
buf[li] += buf[li + 2];
|
buf[li] += buf[li + 2];
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __OPENCL_VERSION__
|
#ifdef __OPENCL_VERSION__
|
||||||
/// Sum all elements in the buffer. All threads in the group participate;
|
/// Sum all elements in the buffer. All threads in the group participate;
|
||||||
|
|
|
@ -19,6 +19,15 @@ inline void
|
||||||
ircd_simt_reduce_max_flldr(__local float *const buf,
|
ircd_simt_reduce_max_flldr(__local float *const buf,
|
||||||
const uint ln,
|
const uint ln,
|
||||||
const uint li)
|
const uint li)
|
||||||
|
#if defined(cl_khr_subgroups)
|
||||||
|
{
|
||||||
|
const float
|
||||||
|
ret = work_group_reduce_max(buf[li]);
|
||||||
|
|
||||||
|
if(li == 0)
|
||||||
|
buf[li] = ret;
|
||||||
|
}
|
||||||
|
#else
|
||||||
{
|
{
|
||||||
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
||||||
{
|
{
|
||||||
|
@ -34,6 +43,7 @@ ircd_simt_reduce_max_flldr(__local float *const buf,
|
||||||
buf[li] = buf[li + 2];
|
buf[li] = buf[li + 2];
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __OPENCL_VERSION__
|
#ifdef __OPENCL_VERSION__
|
||||||
inline void
|
inline void
|
||||||
|
|
Loading…
Reference in a new issue