0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-12-28 00:14:07 +01:00
construct/include/ircd/cl.h

305 lines
8 KiB
C++

// The Construct
//
// Copyright (C) The Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
#pragma once
#define HAVE_IRCD_CL_H
/// OpenCL Interface
namespace ircd::cl
{
struct init;
struct exec;
struct kern;
struct code;
struct data;
struct work;
IRCD_EXCEPTION(ircd::error, error)
IRCD_EXCEPTION(error, opencl_error)
using read_closure = std::function<void (const_buffer)>;
using write_closure = std::function<void (mutable_buffer)>;
extern const info::versions version_api;
extern info::versions version_abi;
extern conf::item<bool> profile_queue;
extern conf::item<bool> enable;
extern log::log log;
string_view reflect_error(const int code) noexcept;
void log_dev_info(const uint platform_id, const uint device_id);
void log_dev_info(const uint platform_id);
void log_dev_info();
void flush();
void sync();
}
/// cl_event wrapping
struct ircd::cl::work
:instance_list<cl::work>
{
void *handle {nullptr};
ctx::ctx *context {ctx::current};
static void init(), fini() noexcept;
public:
std::array<uint64_t, 4> profile() const;
void wait(const uint = 0);
explicit work(void *const &handle); // note: RetainEvent()
work() noexcept;
work(work &&) noexcept;
work(const work &) = delete;
work &operator=(work &&) noexcept;
work &operator=(const work &) = delete;
~work() noexcept;
};
/// cl_mem wrapping
struct ircd::cl::data
{
void *handle {nullptr};
public:
uint flags() const;
size_t size() const;
char *ptr() const; // host only
data(const size_t, const mutable_buffer &, const bool wonly = false); // device rw
data(const size_t, const const_buffer &); // device ro
data(const mutable_buffer &, const bool wonly = false); // host rw
data(const const_buffer &); // host ro
data(data &, const pair<size_t, off_t> &); // subbuffer
data(const data &) = delete;
data() = default;
data(data &&) noexcept;
data &operator=(const data &) = delete;
data &operator=(data &&) noexcept;
~data() noexcept;
};
/// cl_program wrapping
struct ircd::cl::code
{
void *handle {nullptr};
public:
void build(const string_view &opts = {});
explicit code(const vector_view<const const_buffer> &bins, const string_view &opts = {});
code(const vector_view<const string_view> &srcs, const string_view &opts = {});
code(const string_view &src, const string_view &opts = {});
code() = default;
code(code &&) noexcept;
code &operator=(const code &) = delete;
code &operator=(code &&) noexcept;
~code() noexcept;
};
/// cl_kernel wrapping
struct ircd::cl::kern
{
struct range;
void *handle {nullptr};
public:
std::array<size_t, 3> compile_group_size(void *dev = nullptr) const;
size_t preferred_group_size_multiple(void *dev = nullptr) const;
size_t group_size(void *dev = nullptr) const;
size_t local_mem_size(void *dev = nullptr) const;
size_t stack_mem_size(void *dev = nullptr) const;
void arg(const int, data &);
void arg(const int, const const_buffer &);
template<class T> void arg(const int, const T &);
template<class... argv> kern(code &, const string_view &name, argv&&...);
kern(code &, const string_view &name);
kern() = default;
kern(kern &&) noexcept;
kern &operator=(const kern &) = delete;
kern &operator=(kern &&) noexcept;
~kern() noexcept;
};
/// NDRangeKernel dimension range selector
struct ircd::cl::kern::range
{
std::array<size_t, 5>
global { 0, 0, 0, 0, 0 },
local { 0, 0, 0, 0, 0 },
offset { 0, 0, 0, 0, 0 };
};
/// Construction enqueues the task; destruction waits for completion.
///
/// clEnqueue* construction with resulting cl_event wrapping. Instances
/// represent the full lifecycle of work creation, submission and completion.
///
/// Our interface is tied directly to ircd::ctx for intuitive control flow and
/// interaction with the device. By default, all constructions are dependent
/// on the last construction made on the same ircd::ctx, providing sequential
/// consistency for each ircd::ctx, and independence between different ctxs.
/// Each instance destructs only when complete, otherwise the ircd::ctx will
/// block in the destructor.
struct ircd::cl::exec
:work
{
struct opts;
static const opts opts_default;
// View data written by the device to the GTT (synchronous closure).
exec(data &, const pair<size_t, off_t> &, const read_closure &, const opts & = opts_default);
// View buffer in the GTT which the device will read (synchronous closure).
exec(data &, const pair<size_t, off_t> &, const write_closure &, const opts & = opts_default);
// Copy data from the buffer to the GTT for use by the device.
exec(data &, const const_buffer &, const opts & = opts_default);
// Copy data written by the device to the GTT into our buffer.
exec(data &, const mutable_buffer &, const opts & = opts_default);
// Copy data directly between buffers.
exec(data &, const data &, const opts & = opts_default);
// Execute a kernel on a range.
exec(kern &, const kern::range &, const opts & = opts_default);
// Execute a kernel on a range.
exec(kern &, const opts &, const kern::range &);
// Execute a barrier.
exec(const opts &);
// No-op
exec() = default;
};
/// Options for an exec.
struct ircd::cl::exec::opts
{
/// Specify a list of dependencies. When provided, this list overrides the
/// default sequential behavior; thus can be used to start new dependency
/// chains for some task concurrency on the same ircd::ctx. Providing a
/// single reference to the last exec on the same stack is equivalent to
/// the default.
vector_view<cl::exec> deps;
/// For operations which have a size; otherwise ignored, or serves as
/// sentinel for automatic size.
size_t size {0};
/// For operations which have an offset (or two); otherwise ignored.
off_t offset[2] {0};
/// Starts a new dependency chain; allowing empty deps without implicit
/// dependency on the last work item constructed on the ircd::ctx.
bool indep {false};
/// For operations which plan to both read and write to the GTT, set to
/// true and execute the write_closure; otherwise ignored. Can be used
/// to de-optimize the write_closure, which is unidirectional by default.
bool duplex {false};
/// For operations that have an optional blocking behavior; otherwise
/// ignored. Note that this is a thread-level blocking mechanism and
/// does not yield the ircd::ctx; for testing/special use only.
bool blocking {false};
/// Perform a flush of the queue directly after submit.
bool flush {false};
/// Perform a sync of the queue directly after submit; this will block in
/// the ctor; all work will be complete at full construction.
bool sync {false};
};
struct ircd::cl::init
{
init();
~init() noexcept;
};
#ifndef IRCD_USE_OPENCL
inline ircd::cl::init::init() {}
inline ircd::cl::init::~init() noexcept {}
#endif
inline
ircd::cl::work::work(work &&other)
noexcept
:handle{std::move(other.handle)}
,context{std::move(other.context)}
{
other.handle = nullptr;
other.context = nullptr;
}
inline ircd::cl::work &
ircd::cl::work::operator=(work &&other)
noexcept
{
this->~work();
handle = std::move(other.handle);
context = std::move(other.context);
other.handle = nullptr;
other.context = nullptr;
return *this;
}
template<class... argv>
inline
ircd::cl::kern::kern(code &c,
const string_view &name,
argv&&... a)
:kern{c, name}
{
constexpr uint argc
{
sizeof...(a)
};
data *const datas[argc]
{
std::addressof(a)...
};
for(uint i(0); i < argc; ++i)
this->arg(i, *datas[i]);
}
template<class T>
inline void
ircd::cl::kern::arg(const int pos,
const T &val)
{
static_assert(!std::is_same<T, cl::data>());
arg(pos, const_buffer
{
reinterpret_cast<const char *>(&val), sizeof(T)
});
}
inline
ircd::cl::exec::exec(kern &kern,
const opts &opts,
const kern::range &range)
:exec
{
kern, range, opts
}
{}