0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-06-10 22:18:54 +02:00

ircd::gpt: Split debug related; improve flush options; minor cleanup.

This commit is contained in:
Jason Volk 2021-05-14 05:50:45 -07:00
parent 179d9abcf7
commit c3cb249f78
6 changed files with 179 additions and 122 deletions

View file

@ -22,6 +22,8 @@ namespace ircd::gpt::pipe
extern code *default_code;
extern desc *default_desc;
void generate(task &);
void init(), fini() noexcept;
};

View file

@ -15,7 +15,8 @@
///
struct ircd_gpt_gate
{
ushort code[8];
ushort offset;
ushort code[7];
}
__attribute__((aligned(16)));

View file

@ -26,11 +26,14 @@ struct ircd_gpt_opts
/// Reference to the model (currently not available in device software).
#ifndef __cplusplus
const intptr_t model;
const void *model;
#else
const ircd::gpt::model::decoder *model;
#endif
/// Seed for the task's PRNG.
ulong seed;
/// Limit number of output tokens. Default of -1 is unlimited; the number
/// of tokens generated will be limited by other factors.
uint limit;
@ -81,18 +84,21 @@ struct ircd_gpt_opts
/// Number of possible target n-grams.
uint logits;
/// Seed for the task's PRNG.
ulong seed;
/// Training steps
ulong training_steps;
uint training_steps;
/// Validation steps
ulong validation_steps;
uint validation_steps;
/// Testing steps
uint testing_steps;
/// Target label
ushort label;
/// Bitbar toggling various debug modes
ushort debug;
/// Learning rate
float alpha;
@ -115,7 +121,7 @@ __attribute__((aligned(4096)));
#ifdef __cplusplus
namespace ircd::gpt
{
using opts = struct ircd_gpt_opts;
using opts = ::ircd_gpt_opts;
}
static_assert(sizeof(struct ircd_gpt_opts) == 4096);

View file

@ -39,6 +39,8 @@ namespace ircd::gpt
static u16 argmax(const float *, const opts &);
static void embed(float *, const u16 token, const u16 position, const opts &);
static void generate_debug(task &, const uint &, const uint &);
static f32
logit alignas(64) [65536],
embeds alignas(64) [1024 * 768],
@ -91,21 +93,76 @@ ircd::gpt::generate(const vector_view<u16> &out,
ctrl.tokens.count = 0;
ctrl.tokens.head = 0;
for(uint j(0); j < in.size(); ++j)
ctrl.token[ctrl.tokens.count++] = in[j];
uint j(0);
for(uint i(0); i < opts.gates; ++i)
for(uint k(0); k < 8; ++k)
{
const auto &gate
{
opts.gate[i]
};
while(j < in.size() && j < gate.offset && ctrl.tokens.count < opts.buffer_tokens)
ctrl.token[ctrl.tokens.count++] = in[j++];
for(uint k(0); k < 7; ++k)
{
if(ctrl.tokens.count >= opts.buffer_tokens)
break;
if(opts.gate[i].code[k] == 0)
if(gate.code[k] == 0)
break;
ctrl.token[ctrl.tokens.count] = opts.gate[i].code[k];
ctrl.tokens.count++;
ctrl.token[ctrl.tokens.count++] = gate.code[k];
}
}
while(j < in.size() && ctrl.tokens.count < opts.buffer_tokens)
ctrl.token[ctrl.tokens.count++] = in[j++];
const size_t in_size
{
ctrl.tokens.count
};
generate(task);
for(uint i(0); i < ctrl.tokens.count && ret < out.size() && !halt; ++i)
{
const auto j
{
(i + ctrl.tokens.head) % opts.buffer_tokens
};
const auto tok
{
ctrl.token[j]
};
if(j >= in_size)
out[ret++] = tok;
if(likely(~opts.debug & 0x01))
continue;
if(likely(~opts.debug & 0x02))
if(j < in_size)
continue;
generate_debug(task, j, in_size);
}
ctx::interruption_point();
return vector_view<u16>
{
out, ret
};
}
void
ircd::gpt::generate(task &task)
{
const auto &opts(*task.opts);
auto &ctrl(*task.ctrl);
const size_t in_size
{
@ -153,11 +210,10 @@ ircd::gpt::generate(const vector_view<u16> &out,
ctrl.cert.last = ctrl.cert.mean;
ctrl.prop = false;
pipe::default_model->invalid = true;
return {};
return;
}
cycles = 0;
milliseconds last_time {0};
util::timer stopwatch;
{
const prof::scope_cycles task_cycles
@ -165,66 +221,69 @@ ircd::gpt::generate(const vector_view<u16> &out,
cycles
};
generate(task);
pipe::generate(task);
}
last_time = stopwatch.at<milliseconds>();
const milliseconds last_time
{
stopwatch.at<milliseconds>()
};
ctrl.epic.elapsed += last_time.count();
}
for(uint j(0); j < ctrl.tokens.count && ret < out.size() && !halt; ++j)
void
ircd::gpt::generate_debug(task &task,
const uint &i,
const uint &in_size)
{
const auto &opts(*task.opts);
auto &ctrl(*task.ctrl);
const auto j
{
const auto tok
{
ctrl.token[j]
};
(i + ctrl.tokens.head) % opts.buffer_tokens
};
if(j >= in_size)
out[ret++] = tok;
if(j < in_size)
continue;
static char dbuf[512] {0};
char report[1536] {0};
char tmbuf[4][64] {0};
const size_t bsz(ctrl.tokens.count - in_size);
const size_t report_size = snprintf
(
report, sizeof(report),
"%-3u %4u:%-4u %4lu:%-4lu %6.1f%% %5.1fP %6.3fL [%c%c%c] %5u %6.3fL %6.2fP %5.1f%% %s %04x %8s %8s | %8s",
j,
ret - 1,
ctrl.tokens.count,
ctrl.epic.epoch,
ctrl.epic.cycle,
std::clamp(ctrl.cert.mean * 100.0f, 0.0f, 100.0f),
std::clamp(ctrl.perp.mean, 0.0f, 100.0f),
std::clamp(ctrl.loss.mean, 0.0f, 99.99f),
opts.label == tok? '+': ' ',
' ', // flag place
' ', // flag place
opts.label,
std::clamp(ctrl.loss.last, 0.0f, 99.99f),
std::clamp(ctrl.perp.last, 0.0f, 100.0f),
std::clamp(ctrl.cert.last * 100.0f, 0.0f, 100.0f),
vocab::debug(dbuf, tok).c_str(),
tok,
pretty(tmbuf[0], milliseconds(last_time / bsz), 1).c_str(),
pretty(tmbuf[1], si(cycles / bsz), 1).c_str(),
pretty(tmbuf[2], milliseconds(ctrl.epic.elapsed), 1).c_str()
);
log::logf
{
log, log::level::DEBUG,
"%s",
string_view{report, report_size}
};
}
ctx::interruption_point();
return vector_view<u16>
const auto tok
{
out, ret
ctrl.token[j]
};
static char dbuf[512];
static char report[1536];
static char tmbuf[4][64];
const size_t bsz(ctrl.tokens.count - in_size);
const size_t report_size = snprintf
(
report, sizeof(report),
"%-3u %-4u %4lu:%-4lu %6.1f%% %5.1fP %6.3fL [%c%c%c] %5u %6.3fL %6.2fP %5.1f%% %s %04x %8s %8s | %8s",
j,
ctrl.tokens.count,
ctrl.epic.epoch,
ctrl.epic.cycle,
std::clamp(ctrl.cert.mean * 100.0f, 0.0f, 100.0f),
std::clamp(ctrl.perp.mean, 0.0f, 100.0f),
std::clamp(ctrl.loss.mean, 0.0f, 99.99f),
opts.label == tok? '+': ' ',
' ', // flag place
' ', // flag place
opts.label,
std::clamp(ctrl.loss.last, 0.0f, 99.99f),
std::clamp(ctrl.perp.last, 0.0f, 100.0f),
std::clamp(ctrl.cert.last * 100.0f, 0.0f, 100.0f),
vocab::debug(dbuf, tok).c_str(),
tok,
pretty(tmbuf[0], milliseconds(0ms / bsz), 1).c_str(),
pretty(tmbuf[1], si(0UL / bsz), 1).c_str(),
pretty(tmbuf[2], milliseconds(ctrl.epic.elapsed), 1).c_str()
);
log::logf
{
log, log::level::DEBUG,
"%s",
string_view{report, report_size}
};
}
@ -849,7 +908,11 @@ ircd_gpt_opts::ircd_gpt_opts(const ircd::gpt::model::decoder *const model)
noexcept
:model
{
model
model?: ircd::gpt::model::default_model
}
,seed
{
1234567890UL
}
,limit
{
@ -915,10 +978,6 @@ noexcept
{
50257
}
,seed
{
1234567890UL
}
,training_steps
{
250000
@ -927,10 +986,18 @@ noexcept
{
5000
}
,testing_steps
{
5000
}
,label
{
198
}
,debug
{
0x01
}
,alpha
{
0.001f

View file

@ -600,8 +600,6 @@ ircd_gpt_leave(__global struct ircd_gpt_task *const ctrl,
if(li != 0)
return;
// On the last cycle, with no prior call or error code set, indicate
// a nominal exit condition.
if(ctrl->epic.cycle + 1 >= opts->limit)
ctrl->epic.epoch += 1;
@ -620,8 +618,7 @@ ircd_gpt_lm_result(__global struct ircd_gpt_task *const ctrl,
__global const float *const restrict logit)
{
// To read from cells other than idx[0] we need this barrier.
if(opts->top_k > 1)
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
// Mask for write-leader
if(li != 0)
@ -661,9 +658,10 @@ ircd_gpt_lm_result(__global struct ircd_gpt_task *const ctrl,
loss_sum = ctrl->loss.sum[0] + ctrl->loss.sum[1] + ctrl->loss.sum[2] + loss,
perp_sum = ctrl->perp.sum[0] + ctrl->perp.sum[1] + ctrl->perp.sum[2] + perp,
cert_sum = ctrl->cert.sum[0] + ctrl->cert.sum[1] + ctrl->cert.sum[2] + cert,
loss_mean = loss_sum / (ctrl->epic.epoch + 1.0f),
perp_mean = perp_sum / (ctrl->epic.epoch + 1.0f),
cert_mean = cert_sum / (ctrl->epic.epoch + 1.0f);
mean_div = ctrl->epic.epoch + 1.0f,
loss_mean = loss_sum / mean_div,
perp_mean = perp_sum / mean_div,
cert_mean = cert_sum / mean_div;
ctrl->loss.last = loss;
ctrl->loss.sum[sum_sel] += loss;

View file

@ -12,14 +12,13 @@ namespace ircd::gpt::pipe
{
static void profile_dumplog(pipe::exec &);
static ircd::cl::exec::opts
negative_opts, positive_opts, selfattn_opts,
cathode_opts, anode_opts, lmhead_opts, lmamax_opts,
backprop_opts;
extern conf::item<size_t> flush_cycles;
extern conf::item<size_t> queue_cycles;
extern const ircd::run::changed handle_quit;
static ircd::cl::exec::opts
send_opts_opts, send_ctrl_opts, send_coil_opts, send_head_opts,
anode_opts, negative_opts, positive_opts, cathode_opts,
lmhead_opts, lmamax_opts, backprop_opts, recv_ctrl_opts;
}
decltype(ircd::gpt::pipe::queue_cycles)
@ -29,13 +28,6 @@ ircd::gpt::pipe::queue_cycles
{ "default", 1L, },
};
decltype(ircd::gpt::pipe::flush_cycles)
ircd::gpt::pipe::flush_cycles
{
{ "name", "ircd.gpt.pipe.flush" },
{ "default", 1L, },
};
decltype(ircd::gpt::pipe::default_model)
ircd::gpt::pipe::default_model;
@ -75,6 +67,13 @@ ircd::gpt::pipe::init()
*pipe::default_code, *pipe::default_model
};
//XXX
send_ctrl_opts.flush = true;
send_ctrl_opts.nice = 1;
lmamax_opts.flush = true;
lmamax_opts.nice = 2;
recv_ctrl_opts.flush = true;
log::debug
{
log, "Pipe initialized from model:%p data:%p code:%p desc:%p",
@ -113,7 +112,7 @@ noexcept
//
void
ircd::gpt::generate(task &task)
ircd::gpt::pipe::generate(task &task)
{
assert(pipe::default_model);
@ -131,9 +130,10 @@ ircd::gpt::generate(task &task)
ctrl.epic.cycle = 0;
ctrl.epic.host_tsc = prof::cycles();
volatile const size_t tokens(ctrl.tokens.count);
volatile const auto epoch(ctrl.epic.epoch);
volatile size_t cycle(ctrl.epic.cycle);
const auto tokens(ctrl.tokens.count);
const auto epoch(ctrl.epic.epoch);
volatile auto cycle(ctrl.epic.cycle);
std::deque<pipe::exec> list;
for(; cycle < opts.limit; ++cycle)
@ -150,23 +150,6 @@ ircd::gpt::generate(task &task)
task, tokens + cycle, rel, acq
);
// Conditions for a cl::flush here
const bool flush
{
// Flushing here is enabled by the configuration
pipe::flush_cycles
// Skip flushing on cycles already performing IO or waiting.
&& !acq && list.size() <= pipe::queue_cycles
// The configuration item can specify an interval greater than
// one between flushes.
&& cycle % pipe::flush_cycles == 0
};
if(flush)
cl::flush();
if(ctx::interruption_requested())
if(acq || termination(ctx::cur()))
break;
@ -301,19 +284,19 @@ ircd::gpt::pipe::exec::exec(task &task,
}
,release_opts
{
desc->opts, send_opts
desc->opts, send_opts, send_opts_opts,
}
,release_ctrl
{
desc->ctrl, send_ctrl
desc->ctrl, send_ctrl, send_ctrl_opts
}
,release_coil
{
desc->model->decode->master[0], send_coil
desc->model->decode->master[0], send_coil, send_coil_opts
}
,release_head
{
desc->model->embed->master[0], send_head
desc->model->embed->master[0], send_head, send_head_opts
}
,lm_embed
{
@ -364,7 +347,7 @@ ircd::gpt::pipe::exec::exec(task &task,
}
,acquire_ctrl
{
desc->ctrl, recv_ctrl
desc->ctrl, recv_ctrl, recv_ctrl_opts
}
{
if(release && desc->model->invalid)