0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2025-01-16 17:46:54 +01:00

ircd::simt: Start a CLC toolchest.

This commit is contained in:
Jason Volk 2021-04-02 12:51:59 -07:00
parent 29e74ec9e1
commit 5962a58c1e
7 changed files with 228 additions and 0 deletions

View file

@ -0,0 +1,25 @@
// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
/// Broadcast originating from the local leader (index [0]). All threads in the
/// group participate.
inline void
ircd_simt_broadcast_f4lldr(__local float4 *const buf,
const uint ln,
const uint li)
{
for(uint stride = 1; stride < ln; stride <<= 1)
{
if(li < stride)
buf[li + stride] = buf[li];
barrier(CLK_LOCAL_MEM_FENCE);
}
}

32
include/ircd/simt/mean.h Normal file
View file

@ -0,0 +1,32 @@
// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
/// Compute average of all elements in the input. The result is broadcast
/// to all elements of the output.
inline void
ircd_simt_math_mean_f4lldr(__local float4 *const restrict out,
__local const float4 *const restrict in,
const uint num,
const uint i)
{
out[i] = in[i];
ircd_simt_reduce_add_f4lldr(out, num, i);
float numerator = 0.0f;
float4 numeratorv = out[i];
for(uint k = 0; k < 4; ++k)
numerator += numeratorv[k];
out[i] = numerator;
ircd_simt_broadcast_f4lldr(out, num, i);
numeratorv = out[i];
out[i] = numeratorv / (num * 4);
}

33
include/ircd/simt/norm.h Normal file
View file

@ -0,0 +1,33 @@
// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
/// Normalize the input, placing the result in possibly overlapping output.
/// This procedure requires an additional temporary buffer.
inline void
ircd_simt_math_norm_f4lldr(__local float4 *const out,
__local const float4 *const in,
__local float4 *const restrict tmp,
const uint num,
const uint i)
{
ircd_simt_math_mean_f4lldr(tmp, in, num, i);
const float4
sub_mean = in[i] - tmp[i];
tmp[i] = pow(sub_mean, 2);
ircd_simt_math_mean_f4lldr(out, tmp, num, i);
const float4
epsilon = 0.00001f,
s = sqrt(out[i] + epsilon);
out[i] = sub_mean / s;
}

45
include/ircd/simt/rand.h Normal file
View file

@ -0,0 +1,45 @@
// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
/// Generate the next pseudo-random 64-bit sequence from the 256-bit state
/// and update the state for the next call.
inline ulong
ircd_simt_rand_xoshiro256p(ulong s[4])
{
const ulong
ret = s[0] + s[3],
ent = s[1] << 17;
s[2] ^= s[0];
s[3] ^= s[1];
s[1] ^= s[3];
s[0] ^= s[3];
s[2] ^= ent;
s[3] = rotate(s[3], 45UL); // (s[3] << 45) | (s[3] >> (64 - 45));
return ret;
}
/// Generate the next pseudo-random 64-bit sequence from the 256-bit global
/// state and update the state for the next call.
inline ulong
ircd_simt_rand_xoshiro256pg(__global ulong s[4])
{
ulong _s[4], ret;
for(uint i = 0; i < 4; i++)
_s[i] = s[i];
ret = ircd_simt_rand_xoshiro256p(_s);
for(uint i = 0; i < 4; i++)
s[i] = _s[i];
return ret;
}

View file

@ -0,0 +1,42 @@
// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
/// Sum all elements in the buffer. All threads in the group participate;
/// result is placed in index [0], the rest of the buffer is trashed.
inline void
ircd_simt_reduce_add_f4lldr(__local float4 *const buf,
const uint ln,
const uint li)
{
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
{
barrier(CLK_LOCAL_MEM_FENCE);
if(li < stride)
buf[li] += buf[li + stride];
}
}
/// Find the greatest value in the buffer. All threads in the group participate;
/// the greatest value is placed in index [0], the rest of the buffer is
/// trashed.
inline void
ircd_simt_reduce_max_f4lldr(__local float *const buf,
const uint ln,
const uint li)
{
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
{
barrier(CLK_LOCAL_MEM_FENCE);
if(li < stride)
buf[li] = max(buf[li], buf[li + stride]);
}
}

19
include/ircd/simt/simt.h Normal file
View file

@ -0,0 +1,19 @@
// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_SIMT_H
#include "broadcast.h"
#include "reduce.h"
#include "sort.h"
#include "mean.h"
#include "norm.h"
#include "rand.h"

32
include/ircd/simt/sort.h Normal file
View file

@ -0,0 +1,32 @@
// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
/// Sort indices in `idx` which point to values contained in `val`.
inline void
ircd_simt_sort_idx16_flldr(__local ushort *const idx,
__global const float *const val,
const uint ln,
const uint li)
{
for(uint stride = ln >> 1; stride > 0; stride >>= 1)
{
barrier(CLK_LOCAL_MEM_FENCE);
if(li < stride && val[idx[li]] < val[idx[li + stride]])
{
const ushort
ours = idx[li],
theirs = idx[li + stride];
idx[li] = theirs;
idx[li + stride] = ours;
}
}
}