mirror of
https://github.com/matrix-construct/construct
synced 2024-12-27 16:04:15 +01:00
ircd:🆑 Replace niceness value with intensity by range partition.
This commit is contained in:
parent
508d27eb40
commit
da614e33a5
3 changed files with 73 additions and 41 deletions
|
@ -40,7 +40,7 @@ namespace ircd::cl
|
||||||
extern conf::item<bool> enable;
|
extern conf::item<bool> enable;
|
||||||
extern conf::item<bool> profile_queue;
|
extern conf::item<bool> profile_queue;
|
||||||
extern conf::item<uint64_t> watchdog_tsc;
|
extern conf::item<uint64_t> watchdog_tsc;
|
||||||
extern conf::item<milliseconds> nice_rate;
|
extern conf::item<uint64_t> intensity;
|
||||||
extern conf::item<std::string> path;
|
extern conf::item<std::string> path;
|
||||||
extern conf::item<std::string> envs[];
|
extern conf::item<std::string> envs[];
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,11 +65,14 @@ struct ircd::cl::exec::opts
|
||||||
/// For operations which have an offset (or two); otherwise ignored.
|
/// For operations which have an offset (or two); otherwise ignored.
|
||||||
off_t offset[2] {0};
|
off_t offset[2] {0};
|
||||||
|
|
||||||
/// Tune the intensity of the execution. For headless deployments the
|
/// Tune the intensity of the execution. The value is intended to correlate
|
||||||
/// maximum intensity is advised. Lesser values are more intense. The
|
/// with parallel resource consumption on the device by shaping the work
|
||||||
/// default of -1 is the maximum. The value of zero yields the ircd::ctx
|
/// groups submitted over the range. The minimum value of 1 will serialize
|
||||||
/// after submission, but does not otherwise decrease the intensity.
|
/// execution. Values greater than the number of CU's will not increase
|
||||||
int nice {-1};
|
/// concurrency but may still partition a large range with multiple command
|
||||||
|
/// submissions to increase niceness. The default of zero will maximize
|
||||||
|
/// intensity and minimize command submissions (to one).
|
||||||
|
uint intensity {0};
|
||||||
|
|
||||||
/// Starts a new dependency chain; allowing empty deps without implicit
|
/// Starts a new dependency chain; allowing empty deps without implicit
|
||||||
/// dependency on the last work item constructed on the ircd::ctx.
|
/// dependency on the last work item constructed on the ircd::ctx.
|
||||||
|
|
99
ircd/cl.cc
99
ircd/cl.cc
|
@ -141,11 +141,11 @@ ircd::cl::profile_queue
|
||||||
{ "persist", false },
|
{ "persist", false },
|
||||||
};
|
};
|
||||||
|
|
||||||
decltype(ircd::cl::nice_rate)
|
decltype(ircd::cl::intensity)
|
||||||
ircd::cl::nice_rate
|
ircd::cl::intensity
|
||||||
{
|
{
|
||||||
{ "name", "ircd.cl.nice.rate" },
|
{ "name", "ircd.cl.intensity" },
|
||||||
{ "default", 1L },
|
{ "default", 0L },
|
||||||
};
|
};
|
||||||
|
|
||||||
decltype(ircd::cl::watchdog_tsc)
|
decltype(ircd::cl::watchdog_tsc)
|
||||||
|
@ -854,13 +854,13 @@ catch(const std::exception &e)
|
||||||
}
|
}
|
||||||
|
|
||||||
ircd::cl::exec::exec(kern &kern,
|
ircd::cl::exec::exec(kern &kern,
|
||||||
const kern::range &work,
|
const kern::range &range,
|
||||||
const opts &opts)
|
const opts &opts)
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
size_t dim(0);
|
size_t dim(0);
|
||||||
for(size_t i(0); i < work.global.size(); ++i)
|
for(size_t i(0); i < range.global.size(); ++i)
|
||||||
dim += work.global[i] > 0 && dim == i;
|
dim += range.global[i] > 0 && dim == i;
|
||||||
|
|
||||||
if(!dim)
|
if(!dim)
|
||||||
return;
|
return;
|
||||||
|
@ -886,32 +886,67 @@ try
|
||||||
assert(!this->object);
|
assert(!this->object);
|
||||||
this->object = &kern;
|
this->object = &kern;
|
||||||
|
|
||||||
|
size_t global_size(range.global[0]);
|
||||||
|
size_t local_size(range.local[0]);
|
||||||
|
for(size_t d(1); d < dim; ++d)
|
||||||
|
{
|
||||||
|
global_size *= range.global[d];
|
||||||
|
local_size *= range.local[d];
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(global_size % local_size == 0);
|
||||||
|
const size_t groups
|
||||||
|
{
|
||||||
|
global_size / local_size
|
||||||
|
};
|
||||||
|
|
||||||
|
assert(groups > 0);
|
||||||
|
size_t intensity
|
||||||
|
{
|
||||||
|
cl::intensity?
|
||||||
|
std::max(opts.intensity, uint(cl::intensity)):
|
||||||
|
opts.intensity
|
||||||
|
};
|
||||||
|
|
||||||
|
if(intensity < groups)
|
||||||
|
while(intensity > 1 && groups % intensity != 0)
|
||||||
|
intensity--;
|
||||||
|
|
||||||
|
const size_t tasks
|
||||||
|
{
|
||||||
|
intensity && intensity < groups?
|
||||||
|
groups / intensity:
|
||||||
|
1
|
||||||
|
};
|
||||||
|
|
||||||
assert(!this->handle);
|
assert(!this->handle);
|
||||||
call
|
for(size_t i(0); i < tasks; ++i)
|
||||||
(
|
{
|
||||||
clEnqueueNDRangeKernel,
|
kern::range sub_range(range);
|
||||||
q,
|
for(size_t d(0); d < dim; ++d)
|
||||||
cl_kernel(kern.handle),
|
{
|
||||||
dim,
|
sub_range.global[d] /= tasks;
|
||||||
work.offset.data(),
|
sub_range.offset[d] += sub_range.global[d] * i;
|
||||||
work.global.data(),
|
}
|
||||||
work.local.data(),
|
|
||||||
deps.size(),
|
|
||||||
deps.size()? deps.data(): nullptr,
|
|
||||||
addressof_handle(this)
|
|
||||||
);
|
|
||||||
|
|
||||||
size_t global_size(work.global[0]);
|
call
|
||||||
for(size_t i(1); i < dim; ++i)
|
(
|
||||||
global_size *= work.global[i];
|
clEnqueueNDRangeKernel,
|
||||||
|
q,
|
||||||
|
cl_kernel(kern.handle),
|
||||||
|
dim,
|
||||||
|
sub_range.offset.data(),
|
||||||
|
sub_range.global.data(),
|
||||||
|
sub_range.local.data(),
|
||||||
|
deps.size(),
|
||||||
|
deps.size()? deps.data(): nullptr,
|
||||||
|
i == tasks - 1? addressof_handle(this): nullptr
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
size_t local_size(work.local[0]);
|
primary_stats.exec_kern_tasks += tasks;
|
||||||
for(size_t i(1); i < dim; ++i)
|
|
||||||
local_size *= work.local[i];
|
|
||||||
|
|
||||||
primary_stats.exec_kern_tasks += 1;
|
|
||||||
primary_stats.exec_kern_threads += global_size;
|
primary_stats.exec_kern_threads += global_size;
|
||||||
primary_stats.exec_kern_groups += global_size / local_size;
|
primary_stats.exec_kern_groups += groups;
|
||||||
handle_submitted(this, opts);
|
handle_submitted(this, opts);
|
||||||
}
|
}
|
||||||
catch(const std::exception &e)
|
catch(const std::exception &e)
|
||||||
|
@ -1136,12 +1171,6 @@ ircd::cl::handle_submitted(cl::exec *const &exec,
|
||||||
|
|
||||||
if(likely(!opts.blocking))
|
if(likely(!opts.blocking))
|
||||||
check_submit_blocking(exec, opts);
|
check_submit_blocking(exec, opts);
|
||||||
|
|
||||||
if(opts.nice == 0)
|
|
||||||
ctx::yield();
|
|
||||||
|
|
||||||
if(opts.nice > 0)
|
|
||||||
ctx::sleep(opts.nice * milliseconds(nice_rate));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks if the OpenCL runtime blocked this thread to sound the alarms.
|
/// Checks if the OpenCL runtime blocked this thread to sound the alarms.
|
||||||
|
|
Loading…
Reference in a new issue