mirror of
https://github.com/matrix-construct/construct
synced 2024-12-12 16:43:07 +01:00
87 lines
2.1 KiB
C
87 lines
2.1 KiB
C
// Matrix Construct
|
|
//
|
|
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
|
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
|
//
|
|
// Permission to use, copy, modify, and/or distribute this software for any
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
// copyright notice and this permission notice is present in all copies. The
|
|
// full license for this software is available in the LICENSE file.
|
|
|
|
#pragma once
|
|
#define HAVE_IRCD_SIMT_SORT_H
|
|
|
|
#ifdef __OPENCL_VERSION__
|
|
inline bool
|
|
ircd_simt_sort_idx16_cmpxchg(__local ushort *const idx,
|
|
__global const float *const val,
|
|
const uint ai,
|
|
const uint bi,
|
|
const bool parity)
|
|
{
|
|
const ushort
|
|
a = idx[ai],
|
|
b = idx[bi];
|
|
|
|
const bool
|
|
lt = val[a] < val[b],
|
|
swap = (lt && !parity) || (!lt && parity);
|
|
|
|
if(swap)
|
|
{
|
|
idx[ai] = b;
|
|
idx[bi] = a;
|
|
}
|
|
|
|
return swap;
|
|
}
|
|
#endif
|
|
|
|
#ifdef __OPENCL_VERSION__
|
|
inline bool
|
|
ircd_simt_sort_idx16_trick(__local ushort *const idx,
|
|
__global const float *const val,
|
|
const uint li,
|
|
const uint stride,
|
|
const bool parity)
|
|
{
|
|
const bool
|
|
active = (li % (stride << 1)) < stride;
|
|
|
|
if(!active)
|
|
return false;
|
|
|
|
const uint
|
|
oi = li + stride;
|
|
|
|
return ircd_simt_sort_idx16_cmpxchg(idx, val, li, oi, parity);
|
|
}
|
|
#endif
|
|
|
|
#ifdef __OPENCL_VERSION__
|
|
/// Sort indices in `idx` which point to values contained in `val`.
|
|
inline void
|
|
ircd_simt_sort_idx16_flldr(__local ushort *const idx,
|
|
__global const float *const val,
|
|
const uint ln,
|
|
const uint li)
|
|
{
|
|
assert(ircd_math_is_pow2(ln));
|
|
|
|
#pragma clang loop unroll(disable)
|
|
for(uint up = 1; up < ln; up <<= 1)
|
|
{
|
|
const bool
|
|
parity = li % (up << 2) > up;
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
ircd_simt_sort_idx16_trick(idx, val, li, up, parity);
|
|
|
|
for(uint down = up >> 1; down > 0; down >>= 1)
|
|
{
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
ircd_simt_sort_idx16_trick(idx, val, li, down, parity);
|
|
}
|
|
}
|
|
}
|
|
#endif
|