mirror of
https://github.com/matrix-construct/construct
synced 2024-11-29 02:02:38 +01:00
ircd::simt: Start a CLC toolchest.
This commit is contained in:
parent
29e74ec9e1
commit
5962a58c1e
7 changed files with 228 additions and 0 deletions
25
include/ircd/simt/broadcast.h
Normal file
25
include/ircd/simt/broadcast.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
/// Broadcast originating from the local leader (index [0]). All threads in the
|
||||
/// group participate.
|
||||
inline void
|
||||
ircd_simt_broadcast_f4lldr(__local float4 *const buf,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
for(uint stride = 1; stride < ln; stride <<= 1)
|
||||
{
|
||||
if(li < stride)
|
||||
buf[li + stride] = buf[li];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
}
|
32
include/ircd/simt/mean.h
Normal file
32
include/ircd/simt/mean.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
/// Compute average of all elements in the input. The result is broadcast
|
||||
/// to all elements of the output.
|
||||
inline void
|
||||
ircd_simt_math_mean_f4lldr(__local float4 *const restrict out,
|
||||
__local const float4 *const restrict in,
|
||||
const uint num,
|
||||
const uint i)
|
||||
{
|
||||
out[i] = in[i];
|
||||
ircd_simt_reduce_add_f4lldr(out, num, i);
|
||||
|
||||
float numerator = 0.0f;
|
||||
float4 numeratorv = out[i];
|
||||
for(uint k = 0; k < 4; ++k)
|
||||
numerator += numeratorv[k];
|
||||
|
||||
out[i] = numerator;
|
||||
ircd_simt_broadcast_f4lldr(out, num, i);
|
||||
|
||||
numeratorv = out[i];
|
||||
out[i] = numeratorv / (num * 4);
|
||||
}
|
33
include/ircd/simt/norm.h
Normal file
33
include/ircd/simt/norm.h
Normal file
|
@ -0,0 +1,33 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
/// Normalize the input, placing the result in possibly overlapping output.
|
||||
/// This procedure requires an additional temporary buffer.
|
||||
inline void
|
||||
ircd_simt_math_norm_f4lldr(__local float4 *const out,
|
||||
__local const float4 *const in,
|
||||
__local float4 *const restrict tmp,
|
||||
const uint num,
|
||||
const uint i)
|
||||
{
|
||||
ircd_simt_math_mean_f4lldr(tmp, in, num, i);
|
||||
|
||||
const float4
|
||||
sub_mean = in[i] - tmp[i];
|
||||
|
||||
tmp[i] = pow(sub_mean, 2);
|
||||
ircd_simt_math_mean_f4lldr(out, tmp, num, i);
|
||||
|
||||
const float4
|
||||
epsilon = 0.00001f,
|
||||
s = sqrt(out[i] + epsilon);
|
||||
|
||||
out[i] = sub_mean / s;
|
||||
}
|
45
include/ircd/simt/rand.h
Normal file
45
include/ircd/simt/rand.h
Normal file
|
@ -0,0 +1,45 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
/// Generate the next pseudo-random 64-bit sequence from the 256-bit state
|
||||
/// and update the state for the next call.
|
||||
inline ulong
|
||||
ircd_simt_rand_xoshiro256p(ulong s[4])
|
||||
{
|
||||
const ulong
|
||||
ret = s[0] + s[3],
|
||||
ent = s[1] << 17;
|
||||
|
||||
s[2] ^= s[0];
|
||||
s[3] ^= s[1];
|
||||
s[1] ^= s[3];
|
||||
s[0] ^= s[3];
|
||||
s[2] ^= ent;
|
||||
|
||||
s[3] = rotate(s[3], 45UL); // (s[3] << 45) | (s[3] >> (64 - 45));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/// Generate the next pseudo-random 64-bit sequence from the 256-bit global
|
||||
/// state and update the state for the next call.
|
||||
inline ulong
|
||||
ircd_simt_rand_xoshiro256pg(__global ulong s[4])
|
||||
{
|
||||
ulong _s[4], ret;
|
||||
for(uint i = 0; i < 4; i++)
|
||||
_s[i] = s[i];
|
||||
|
||||
ret = ircd_simt_rand_xoshiro256p(_s);
|
||||
for(uint i = 0; i < 4; i++)
|
||||
s[i] = _s[i];
|
||||
|
||||
return ret;
|
||||
}
|
42
include/ircd/simt/reduce.h
Normal file
42
include/ircd/simt/reduce.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
/// Sum all elements in the buffer. All threads in the group participate;
|
||||
/// result is placed in index [0], the rest of the buffer is trashed.
|
||||
inline void
|
||||
ircd_simt_reduce_add_f4lldr(__local float4 *const buf,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(li < stride)
|
||||
buf[li] += buf[li + stride];
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the greatest value in the buffer. All threads in the group participate;
|
||||
/// the greatest value is placed in index [0], the rest of the buffer is
|
||||
/// trashed.
|
||||
inline void
|
||||
ircd_simt_reduce_max_f4lldr(__local float *const buf,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(li < stride)
|
||||
buf[li] = max(buf[li], buf[li + stride]);
|
||||
}
|
||||
}
|
19
include/ircd/simt/simt.h
Normal file
19
include/ircd/simt/simt.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_SIMT_H
|
||||
|
||||
#include "broadcast.h"
|
||||
#include "reduce.h"
|
||||
#include "sort.h"
|
||||
#include "mean.h"
|
||||
#include "norm.h"
|
||||
#include "rand.h"
|
32
include/ircd/simt/sort.h
Normal file
32
include/ircd/simt/sort.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
/// Sort indices in `idx` which point to values contained in `val`.
|
||||
inline void
|
||||
ircd_simt_sort_idx16_flldr(__local ushort *const idx,
|
||||
__global const float *const val,
|
||||
const uint ln,
|
||||
const uint li)
|
||||
{
|
||||
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
|
||||
{
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if(li < stride && val[idx[li]] < val[idx[li + stride]])
|
||||
{
|
||||
const ushort
|
||||
ours = idx[li],
|
||||
theirs = idx[li + stride];
|
||||
|
||||
idx[li] = theirs;
|
||||
idx[li + stride] = ours;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue