0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-06-09 21:48:55 +02:00

ircd:🆑 Refactor interface for map-only/zero-copy approach.

This commit is contained in:
Jason Volk 2022-03-19 15:15:23 -07:00
parent aba8b6813e
commit b4e4c06890
3 changed files with 98 additions and 335 deletions

View file

@ -17,14 +17,17 @@ struct ircd::cl::data
static conf::item<size_t> gart_page_size;
void *handle {nullptr};
void *mapped {nullptr};
public:
uint flags() const;
size_t size() const;
off_t offset() const;
char *ptr() const; // host only
size_t maps() const;
size_t refs() const;
char *ptr() const;
operator const_buffer() const;
operator mutable_buffer() const;
data(const size_t, const bool host_rd = false, const bool host_wr = false);
data(const mutable_buffer &, const bool dev_wonly = false); // host rw

View file

@ -29,17 +29,8 @@ struct ircd::cl::exec
static const opts opts_default;
// View buffer in the GTT which the device will read (synchronous closure).
exec(data &, const pair<size_t, off_t> &, const write_closure & = nullptr, const opts & = opts_default);
// View data written by the device to the GTT (synchronous closure).
exec(data &, const pair<size_t, off_t> &, const read_closure &, const opts & = opts_default);
// Copy data from the buffer to the GTT for use by the device.
exec(data &, const const_buffer &, const opts & = opts_default);
// Copy data written by the device to the GTT into our buffer.
exec(data &, const mutable_buffer &, const opts & = opts_default);
exec(data &, const std::memory_order, const opts & = opts_default);
// Copy data directly between buffers.
exec(data &, const data &, const opts & = opts_default);
@ -84,11 +75,6 @@ struct ircd::cl::exec::opts
/// dependency on the last work item constructed on the ircd::ctx.
bool indep {false};
/// For operations which plan to both read and write to the GTT, set to
/// true and execute the write_closure; otherwise ignored. Can be used
/// to de-optimize the write_closure, which is unidirectional by default.
bool duplex {false};
/// For operations that have an optional blocking behavior; otherwise
/// ignored. Note that this is a thread-level blocking mechanism and
/// does not yield the ircd::ctx; for testing/special use only.

View file

@ -906,82 +906,10 @@ catch(const std::exception &e)
}
ircd::cl::exec::exec(data &data,
const mutable_buffer &buf,
const std::memory_order order,
const opts &opts)
try
{
auto &q
{
queue[0][0]
};
const size_t size
{
opts.size == -1UL?
ircd::size(buf):
opts.size
};
if(!size)
return;
assert(!this->object);
this->object = &data;
const auto deps
{
make_deps(this, opts)
};
assert(!this->handle);
call
(
clEnqueueReadBuffer,
q,
cl_mem(data.handle),
opts.blocking,
opts.offset[0],
size,
ircd::data(buf),
deps.size(),
deps.size()? deps.data(): nullptr,
reinterpret_cast<cl_event *>(&this->handle)
);
primary_stats.exec_read_bytes += size;
primary_stats.exec_read_tasks += 1;
handle_submitted(this, opts);
}
catch(const std::exception &e)
{
log::error
{
log, "Exec Read data:%p cl_mem:%p buf:%p,%zu :%s",
&data,
data.handle,
ircd::data(buf),
ircd::size(buf),
e.what(),
};
throw;
}
ircd::cl::exec::exec(data &data,
const const_buffer &buf,
const opts &opts)
try
{
const size_t size
{
opts.size == -1UL?
ircd::size(buf):
opts.size
};
if(!size)
return;
if(unlikely(run::level != run::level::RUN))
throw unavailable
{
@ -990,281 +918,116 @@ try
};
assert(run::level == run::level::RUN);
auto &q
const auto max_size
{
queue[0][0]
};
assert(!this->object);
this->object = &data;
const auto deps
{
make_deps(this, opts)
};
assert(!this->handle);
call
(
clEnqueueWriteBuffer,
q,
cl_mem(data.handle),
opts.blocking,
opts.offset[0],
size,
mutable_cast(ircd::data(buf)),
deps.size(),
deps.size()? deps.data(): nullptr,
reinterpret_cast<cl_event *>(&this->handle)
);
primary_stats.exec_write_bytes += size;
primary_stats.exec_write_tasks += 1;
handle_submitted(this, opts);
}
catch(const std::exception &e)
{
log::error
{
log, "Exec Write data:%p cl_mem:%p buf:%p,%zu :%s",
&data,
data.handle,
ircd::data(buf),
ircd::size(buf),
e.what(),
};
throw;
}
ircd::cl::exec::exec(data &data,
const pair<size_t, off_t> &slice,
const read_closure &closure,
const opts &opts)
try
{
const auto size
{
slice.first?:
opts.size == -1UL?
data.size():
opts.size
};
if(!size)
return;
const auto offset
{
slice.second?:
opts.offset[0]
};
assert(size_t(size) <= data.size());
assert(size_t(offset) <= data.size());
auto &q
{
queue[0][0]
};
assert(!this->object);
this->object = &data;
const auto deps
{
make_deps(this, opts)
};
cl_map_flags flags {0};
flags |= CL_MAP_READ;
int err {CL_SUCCESS};
assert(!this->handle);
void *const ptr
{
clEnqueueMapBuffer
(
q,
cl_mem(data.handle),
opts.blocking,
flags,
offset,
size,
deps.size(),
deps.size()? deps.data(): nullptr,
reinterpret_cast<cl_event *>(&this->handle),
&err
)
};
throw_on_error(err);
primary_stats.exec_read_bytes += size;
primary_stats.exec_read_tasks += 1;
handle_submitted(this, opts);
assert(this->handle);
assert(ptr);
// Perform the unmapping on unwind. This is after the mapping event
// completed and the closure was called below. The unmapping event will
// replace the event handle for this exec instance until its actual dtor;
// thus the lifetime of the exec we are constructing actually represents
// the unmapping event.
const unwind unmap{[this, &data, &q, &ptr, &opts]
{
assert(!this->handle);
call
(
clEnqueueUnmapMemObject,
q,
cl_mem(data.handle),
ptr,
0, // deps
nullptr, // depslist
reinterpret_cast<cl_event *>(&this->handle)
);
handle_submitted(this, opts);
}};
// After the closure is called below, or throws, or if wait() throws,
// we free the completed map event here to allow for the unmap event.
const unwind rehandle{[this]
{
assert(this->handle);
call(clReleaseEvent, cl_event(this->handle));
this->handle = nullptr;
this->work::ts = ircd::cycles();
}};
// Wait for the mapping to complete before presenting the buffer.
wait();
if(likely(closure))
closure(const_buffer
{
reinterpret_cast<const char *>(ptr), size
});
}
catch(const std::exception &e)
{
log::error
{
log, "Exec Read Closure :%s",
e.what(),
};
throw;
}
ircd::cl::exec::exec(data &data,
const pair<size_t, off_t> &slice,
const write_closure &closure,
const opts &opts)
try
{
const auto size
{
slice.first?:
opts.size == -1UL?
data.size():
opts.size
};
if(!size)
return;
if(unlikely(run::level != run::level::RUN))
throw unavailable
{
"Unable to write to device in runlevel %s",
reflect(run::level),
};
assert(run::level == run::level::RUN);
const auto offset
{
slice.second?:
opts.offset[0]
size_t(opts.offset[0]) < max_size?
max_size - opts.offset[0]:
0UL
};
assert(size_t(size) <= data.size());
assert(size_t(offset) <= data.size());
assert(size_t(opts.offset[0]) <= data.size());
if(!size)
return;
bool read {false}, write {false}, invalidate {false};
bool blocking {opts.blocking};
switch(order)
{
case std::memory_order_relaxed:
return;
case std::memory_order_consume:
read = true;
break;
case std::memory_order_acquire:
read = true;
write = true;
break;
case std::memory_order_acq_rel:
read = true;
write = true;
break;
case std::memory_order_seq_cst:
read = true;
write = true;
blocking = true;
break;
case std::memory_order_release:
break;
}
const cl_map_flags flags
{
(boolmask<cl_map_flags>(read) & CL_MAP_READ) |
(boolmask<cl_map_flags>(write) & CL_MAP_WRITE) |
(boolmask<cl_map_flags>(invalidate) & CL_MAP_WRITE_INVALIDATE_REGION)
};
if(!flags && !data.mapped)
return;
auto &q
{
queue[0][0]
};
assert(!this->object);
this->object = &data;
const auto deps
{
make_deps(this, opts)
};
cl_map_flags flags {0};
flags |= opts.duplex || opts.blocking? CL_MAP_WRITE: CL_MAP_WRITE_INVALIDATE_REGION;
flags |= opts.duplex? CL_MAP_READ: 0;
assert(!this->object);
this->object = &data;
int err {CL_SUCCESS};
assert(!this->handle);
void *const ptr
{
clEnqueueMapBuffer
assert(flags || data.mapped);
if(flags)
data.mapped = clEnqueueMapBuffer
(
q,
cl_mem(data.handle),
opts.blocking,
blocking,
flags,
offset,
opts.offset[0],
size,
deps.size(),
deps.size()? deps.data(): nullptr,
reinterpret_cast<cl_event *>(&this->handle),
&err
)
};
throw_on_error(err);
// Account for read operation only when caller maps read/write.
primary_stats.exec_read_bytes += opts.duplex? size: 0UL;
primary_stats.exec_read_tasks += opts.duplex;
handle_submitted(this, opts);
assert(this->handle);
assert(ptr);
const unwind unmap{[this, &data, &q, &ptr, &opts, &size]
{
assert(!this->handle);
);
else
call
(
clEnqueueUnmapMemObject,
q,
cl_mem(data.handle),
ptr,
std::exchange(data.mapped, nullptr),
0, // deps
nullptr, // depslist
reinterpret_cast<cl_event *>(&this->handle)
);
primary_stats.exec_write_bytes += size;
primary_stats.exec_write_tasks += 1;
handle_submitted(this, opts);
}};
const unwind rehandle{[this]
{
assert(this->handle);
call(clReleaseEvent, cl_event(this->handle));
this->handle = nullptr;
this->work::ts = ircd::cycles();
}};
wait();
if(closure)
closure(mutable_buffer
{
reinterpret_cast<char *>(ptr), size
});
throw_on_error(err);
primary_stats.exec_read_bytes += read? size: 0;
primary_stats.exec_read_tasks += read;
primary_stats.exec_write_bytes += write || invalidate? size: 0;
primary_stats.exec_write_tasks += write || invalidate;
handle_submitted(this, opts);
assert(data.mapped || !flags);
assert(this->handle);
}
catch(const std::exception &e)
{
@ -2314,24 +2077,18 @@ catch(const std::exception &e)
return;
}
size_t
ircd::cl::data::refs()
ircd::cl::data::operator
mutable_buffer()
const
{
assert(handle);
char buf[sizeof(size_t)] {0};
return info<uint>(clGetMemObjectInfo, cl_mem(mutable_cast(handle)), CL_MEM_REFERENCE_COUNT, buf);
return { ptr(), size() };
}
size_t
ircd::cl::data::maps()
ircd::cl::data::operator
const_buffer()
const
{
assert(handle);
char buf[sizeof(size_t)] {0};
return info<uint>(clGetMemObjectInfo, cl_mem(mutable_cast(handle)), CL_MEM_MAP_COUNT, buf);
return { ptr(), size() };
}
char *
@ -2341,7 +2098,24 @@ const
assert(handle);
char buf[sizeof(void *)] {0};
return info<char *>(clGetMemObjectInfo, cl_mem(mutable_cast(handle)), CL_MEM_HOST_PTR, buf);
const auto ret
{
this->mapped?
static_cast<char *>(this->mapped):
info<char *>(clGetMemObjectInfo, cl_mem(mutable_cast(handle)), CL_MEM_HOST_PTR, buf)
};
return ret;
}
size_t
ircd::cl::data::refs()
const
{
assert(handle);
char buf[sizeof(size_t)] {0};
return info<uint>(clGetMemObjectInfo, cl_mem(mutable_cast(handle)), CL_MEM_REFERENCE_COUNT, buf);
}
off_t