mirror of
https://github.com/matrix-construct/construct
synced 2024-12-30 17:34:04 +01:00
ircd::simt: Add dressings for cross-inclusion.
This commit is contained in:
parent
d4e41218b0
commit
5e91d51e6a
8 changed files with 40 additions and 0 deletions
|
@ -48,6 +48,7 @@
|
|||
#include "buffer/buffer.h"
|
||||
#include "vg.h"
|
||||
#include "simd/simd.h"
|
||||
#include "simt/simt.h"
|
||||
#include "allocator.h"
|
||||
#include "util/util.h"
|
||||
#include "exception.h"
|
||||
|
|
|
@ -8,6 +8,10 @@
|
|||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_SIMT_BROADCAST_H
|
||||
|
||||
#ifdef __OPENCL_C_VERSION__
|
||||
/// Broadcast originating from the local leader (index [0]). All threads in the
|
||||
/// group participate.
|
||||
inline void
|
||||
|
@ -25,3 +29,4 @@ ircd_simt_broadcast_f4lldr(__local float4 *const buf)
|
|||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -8,6 +8,10 @@
|
|||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_SIMT_MEAN_H
|
||||
|
||||
#ifdef __OPENCL_C_VERSION__
|
||||
/// Compute average of all elements in the input. The result is broadcast
|
||||
/// to all elements of the output.
|
||||
///
|
||||
|
@ -31,3 +35,4 @@ ircd_simt_math_mean_f4lldr(__local float4 *const restrict out,
|
|||
|
||||
ircd_simt_broadcast_f4lldr(out);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -8,6 +8,10 @@
|
|||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_SIMT_NORM_H
|
||||
|
||||
#ifdef __OPENCL_C_VERSION__
|
||||
/// Normalize the input, placing the result in possibly overlapping output.
|
||||
/// This procedure requires an additional temporary buffer.
|
||||
inline void
|
||||
|
@ -33,3 +37,4 @@ ircd_simt_math_norm_f4lldr(__local float4 *const out,
|
|||
|
||||
out[li] = sub_mean / s;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -8,6 +8,10 @@
|
|||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_SIMT_RAND_H
|
||||
|
||||
#ifdef __OPENCL_C_VERSION__
|
||||
/// Generate the next pseudo-random 64-bit sequence from the 256-bit state
|
||||
/// and update the state for the next call.
|
||||
inline ulong
|
||||
|
@ -27,7 +31,9 @@ ircd_simt_rand_xoshiro256p(ulong s[4])
|
|||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __OPENCL_C_VERSION__
|
||||
/// Generate the next pseudo-random 64-bit sequence from the 256-bit global
|
||||
/// state and update the state for the next call.
|
||||
inline ulong
|
||||
|
@ -43,3 +49,4 @@ ircd_simt_rand_xoshiro256pg(__global ulong s[4])
|
|||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -8,6 +8,10 @@
|
|||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_SIMT_REDUCE_ADD_H
|
||||
|
||||
#ifdef __OPENCL_C_VERSION__
|
||||
/// Sum all elements in the buffer. All threads in the group participate;
|
||||
/// result is placed in index [0], the rest of the buffer is trashed.
|
||||
inline void
|
||||
|
@ -25,7 +29,9 @@ ircd_simt_reduce_add_f4lldr(__local float4 *const buf)
|
|||
buf[li] += buf[li + stride];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __OPENCL_C_VERSION__
|
||||
/// Sum all elements in the buffer. All threads in the group participate;
|
||||
/// result is placed in index [0], the rest of the buffer is trashed.
|
||||
inline float
|
||||
|
@ -38,3 +44,4 @@ ircd_simt_reduce_add_f4(const float4 in)
|
|||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -8,6 +8,10 @@
|
|||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_SIMT_REDUCE_MAX_H
|
||||
|
||||
#ifdef __OPENCL_C_VERSION__
|
||||
/// Find the greatest value in the buffer. All threads in the group participate;
|
||||
/// the greatest value is placed in index [0], the rest of the buffer is
|
||||
/// trashed.
|
||||
|
@ -27,3 +31,4 @@ ircd_simt_reduce_max_flldr(__local float *const buf)
|
|||
buf[li] = buf[li + stride];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -8,6 +8,10 @@
|
|||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_SIMT_SORT_H
|
||||
|
||||
#ifdef __OPENCL_C_VERSION__
|
||||
/// Sort indices in `idx` which point to values contained in `val`.
|
||||
inline void
|
||||
ircd_simt_sort_idx16_flldr(__local ushort *const idx,
|
||||
|
@ -31,3 +35,4 @@ ircd_simt_sort_idx16_flldr(__local ushort *const idx,
|
|||
idx[li + stride] = ours;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue