0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-11-29 02:02:38 +01:00

ircd::simt: Add dressings for cross-inclusion.

This commit is contained in:
Jason Volk 2021-05-02 14:43:10 -07:00
parent d4e41218b0
commit 5e91d51e6a
8 changed files with 40 additions and 0 deletions

View file

@ -48,6 +48,7 @@
#include "buffer/buffer.h"
#include "vg.h"
#include "simd/simd.h"
#include "simt/simt.h"
#include "allocator.h"
#include "util/util.h"
#include "exception.h"

View file

@ -8,6 +8,10 @@
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_SIMT_BROADCAST_H
#ifdef __OPENCL_C_VERSION__
/// Broadcast originating from the local leader (index [0]). All threads in the
/// group participate.
inline void
@ -25,3 +29,4 @@ ircd_simt_broadcast_f4lldr(__local float4 *const buf)
barrier(CLK_LOCAL_MEM_FENCE);
}
}
#endif

View file

@ -8,6 +8,10 @@
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_SIMT_MEAN_H
#ifdef __OPENCL_C_VERSION__
/// Compute average of all elements in the input. The result is broadcast
/// to all elements of the output.
///
@ -31,3 +35,4 @@ ircd_simt_math_mean_f4lldr(__local float4 *const restrict out,
ircd_simt_broadcast_f4lldr(out);
}
#endif

View file

@ -8,6 +8,10 @@
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_SIMT_NORM_H
#ifdef __OPENCL_C_VERSION__
/// Normalize the input, placing the result in possibly overlapping output.
/// This procedure requires an additional temporary buffer.
inline void
@ -33,3 +37,4 @@ ircd_simt_math_norm_f4lldr(__local float4 *const out,
out[li] = sub_mean / s;
}
#endif

View file

@ -8,6 +8,10 @@
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_SIMT_RAND_H
#ifdef __OPENCL_C_VERSION__
/// Generate the next pseudo-random 64-bit sequence from the 256-bit state
/// and update the state for the next call.
inline ulong
@ -27,7 +31,9 @@ ircd_simt_rand_xoshiro256p(ulong s[4])
return ret;
}
#endif
#ifdef __OPENCL_C_VERSION__
/// Generate the next pseudo-random 64-bit sequence from the 256-bit global
/// state and update the state for the next call.
inline ulong
@ -43,3 +49,4 @@ ircd_simt_rand_xoshiro256pg(__global ulong s[4])
return ret;
}
#endif

View file

@ -8,6 +8,10 @@
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_SIMT_REDUCE_ADD_H
#ifdef __OPENCL_C_VERSION__
/// Sum all elements in the buffer. All threads in the group participate;
/// result is placed in index [0], the rest of the buffer is trashed.
inline void
@ -25,7 +29,9 @@ ircd_simt_reduce_add_f4lldr(__local float4 *const buf)
buf[li] += buf[li + stride];
}
}
#endif
#ifdef __OPENCL_C_VERSION__
/// Sum all elements in the buffer. All threads in the group participate;
/// result is placed in index [0], the rest of the buffer is trashed.
inline float
@ -38,3 +44,4 @@ ircd_simt_reduce_add_f4(const float4 in)
return ret;
}
#endif

View file

@ -8,6 +8,10 @@
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_SIMT_REDUCE_MAX_H
#ifdef __OPENCL_C_VERSION__
/// Find the greatest value in the buffer. All threads in the group participate;
/// the greatest value is placed in index [0], the rest of the buffer is
/// trashed.
@ -27,3 +31,4 @@ ircd_simt_reduce_max_flldr(__local float *const buf)
buf[li] = buf[li + stride];
}
}
#endif

View file

@ -8,6 +8,10 @@
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_SIMT_SORT_H
#ifdef __OPENCL_C_VERSION__
/// Sort indices in `idx` which point to values contained in `val`.
inline void
ircd_simt_sort_idx16_flldr(__local ushort *const idx,
@ -31,3 +35,4 @@ ircd_simt_sort_idx16_flldr(__local ushort *const idx,
idx[li + stride] = ours;
}
}
#endif