mirror of
https://github.com/matrix-construct/construct
synced 2024-12-25 15:04:10 +01:00
ircd:🆑 Replace niceness value with intensity by range partition.
This commit is contained in:
parent
508d27eb40
commit
da614e33a5
3 changed files with 73 additions and 41 deletions
|
@ -40,7 +40,7 @@ namespace ircd::cl
|
|||
extern conf::item<bool> enable;
|
||||
extern conf::item<bool> profile_queue;
|
||||
extern conf::item<uint64_t> watchdog_tsc;
|
||||
extern conf::item<milliseconds> nice_rate;
|
||||
extern conf::item<uint64_t> intensity;
|
||||
extern conf::item<std::string> path;
|
||||
extern conf::item<std::string> envs[];
|
||||
}
|
||||
|
|
|
@ -65,11 +65,14 @@ struct ircd::cl::exec::opts
|
|||
/// For operations which have an offset (or two); otherwise ignored.
|
||||
off_t offset[2] {0};
|
||||
|
||||
/// Tune the intensity of the execution. For headless deployments the
|
||||
/// maximum intensity is advised. Lesser values are more intense. The
|
||||
/// default of -1 is the maximum. The value of zero yields the ircd::ctx
|
||||
/// after submission, but does not otherwise decrease the intensity.
|
||||
int nice {-1};
|
||||
/// Tune the intensity of the execution. The value is intended to correlate
|
||||
/// with parallel resource consumption on the device by shaping the work
|
||||
/// groups submitted over the range. The minimum value of 1 will serialize
|
||||
/// execution. Values greater than the number of CU's will not increase
|
||||
/// concurrency but may still partition a large range with multiple command
|
||||
/// submissions to increase niceness. The default of zero will maximize
|
||||
/// intensity and minimize command submissions (to one).
|
||||
uint intensity {0};
|
||||
|
||||
/// Starts a new dependency chain; allowing empty deps without implicit
|
||||
/// dependency on the last work item constructed on the ircd::ctx.
|
||||
|
|
99
ircd/cl.cc
99
ircd/cl.cc
|
@ -141,11 +141,11 @@ ircd::cl::profile_queue
|
|||
{ "persist", false },
|
||||
};
|
||||
|
||||
decltype(ircd::cl::nice_rate)
|
||||
ircd::cl::nice_rate
|
||||
decltype(ircd::cl::intensity)
|
||||
ircd::cl::intensity
|
||||
{
|
||||
{ "name", "ircd.cl.nice.rate" },
|
||||
{ "default", 1L },
|
||||
{ "name", "ircd.cl.intensity" },
|
||||
{ "default", 0L },
|
||||
};
|
||||
|
||||
decltype(ircd::cl::watchdog_tsc)
|
||||
|
@ -854,13 +854,13 @@ catch(const std::exception &e)
|
|||
}
|
||||
|
||||
ircd::cl::exec::exec(kern &kern,
|
||||
const kern::range &work,
|
||||
const kern::range &range,
|
||||
const opts &opts)
|
||||
try
|
||||
{
|
||||
size_t dim(0);
|
||||
for(size_t i(0); i < work.global.size(); ++i)
|
||||
dim += work.global[i] > 0 && dim == i;
|
||||
for(size_t i(0); i < range.global.size(); ++i)
|
||||
dim += range.global[i] > 0 && dim == i;
|
||||
|
||||
if(!dim)
|
||||
return;
|
||||
|
@ -886,32 +886,67 @@ try
|
|||
assert(!this->object);
|
||||
this->object = &kern;
|
||||
|
||||
size_t global_size(range.global[0]);
|
||||
size_t local_size(range.local[0]);
|
||||
for(size_t d(1); d < dim; ++d)
|
||||
{
|
||||
global_size *= range.global[d];
|
||||
local_size *= range.local[d];
|
||||
}
|
||||
|
||||
assert(global_size % local_size == 0);
|
||||
const size_t groups
|
||||
{
|
||||
global_size / local_size
|
||||
};
|
||||
|
||||
assert(groups > 0);
|
||||
size_t intensity
|
||||
{
|
||||
cl::intensity?
|
||||
std::max(opts.intensity, uint(cl::intensity)):
|
||||
opts.intensity
|
||||
};
|
||||
|
||||
if(intensity < groups)
|
||||
while(intensity > 1 && groups % intensity != 0)
|
||||
intensity--;
|
||||
|
||||
const size_t tasks
|
||||
{
|
||||
intensity && intensity < groups?
|
||||
groups / intensity:
|
||||
1
|
||||
};
|
||||
|
||||
assert(!this->handle);
|
||||
call
|
||||
(
|
||||
clEnqueueNDRangeKernel,
|
||||
q,
|
||||
cl_kernel(kern.handle),
|
||||
dim,
|
||||
work.offset.data(),
|
||||
work.global.data(),
|
||||
work.local.data(),
|
||||
deps.size(),
|
||||
deps.size()? deps.data(): nullptr,
|
||||
addressof_handle(this)
|
||||
);
|
||||
for(size_t i(0); i < tasks; ++i)
|
||||
{
|
||||
kern::range sub_range(range);
|
||||
for(size_t d(0); d < dim; ++d)
|
||||
{
|
||||
sub_range.global[d] /= tasks;
|
||||
sub_range.offset[d] += sub_range.global[d] * i;
|
||||
}
|
||||
|
||||
size_t global_size(work.global[0]);
|
||||
for(size_t i(1); i < dim; ++i)
|
||||
global_size *= work.global[i];
|
||||
call
|
||||
(
|
||||
clEnqueueNDRangeKernel,
|
||||
q,
|
||||
cl_kernel(kern.handle),
|
||||
dim,
|
||||
sub_range.offset.data(),
|
||||
sub_range.global.data(),
|
||||
sub_range.local.data(),
|
||||
deps.size(),
|
||||
deps.size()? deps.data(): nullptr,
|
||||
i == tasks - 1? addressof_handle(this): nullptr
|
||||
);
|
||||
}
|
||||
|
||||
size_t local_size(work.local[0]);
|
||||
for(size_t i(1); i < dim; ++i)
|
||||
local_size *= work.local[i];
|
||||
|
||||
primary_stats.exec_kern_tasks += 1;
|
||||
primary_stats.exec_kern_tasks += tasks;
|
||||
primary_stats.exec_kern_threads += global_size;
|
||||
primary_stats.exec_kern_groups += global_size / local_size;
|
||||
primary_stats.exec_kern_groups += groups;
|
||||
handle_submitted(this, opts);
|
||||
}
|
||||
catch(const std::exception &e)
|
||||
|
@ -1136,12 +1171,6 @@ ircd::cl::handle_submitted(cl::exec *const &exec,
|
|||
|
||||
if(likely(!opts.blocking))
|
||||
check_submit_blocking(exec, opts);
|
||||
|
||||
if(opts.nice == 0)
|
||||
ctx::yield();
|
||||
|
||||
if(opts.nice > 0)
|
||||
ctx::sleep(opts.nice * milliseconds(nice_rate));
|
||||
}
|
||||
|
||||
/// Checks if the OpenCL runtime blocked this thread to sound the alarms.
|
||||
|
|
Loading…
Reference in a new issue