mirror of
https://github.com/matrix-construct/construct
synced 2024-06-10 22:18:54 +02:00
ircd::gpt: Split debug related; improve flush options; minor cleanup.
This commit is contained in:
parent
179d9abcf7
commit
c3cb249f78
|
@ -22,6 +22,8 @@ namespace ircd::gpt::pipe
|
|||
extern code *default_code;
|
||||
extern desc *default_desc;
|
||||
|
||||
void generate(task &);
|
||||
|
||||
void init(), fini() noexcept;
|
||||
};
|
||||
|
||||
|
|
|
@ -15,7 +15,8 @@
|
|||
///
|
||||
struct ircd_gpt_gate
|
||||
{
|
||||
ushort code[8];
|
||||
ushort offset;
|
||||
ushort code[7];
|
||||
}
|
||||
__attribute__((aligned(16)));
|
||||
|
||||
|
|
|
@ -26,11 +26,14 @@ struct ircd_gpt_opts
|
|||
|
||||
/// Reference to the model (currently not available in device software).
|
||||
#ifndef __cplusplus
|
||||
const intptr_t model;
|
||||
const void *model;
|
||||
#else
|
||||
const ircd::gpt::model::decoder *model;
|
||||
#endif
|
||||
|
||||
/// Seed for the task's PRNG.
|
||||
ulong seed;
|
||||
|
||||
/// Limit number of output tokens. Default of -1 is unlimited; the number
|
||||
/// of tokens generated will be limited by other factors.
|
||||
uint limit;
|
||||
|
@ -81,18 +84,21 @@ struct ircd_gpt_opts
|
|||
/// Number of possible target n-grams.
|
||||
uint logits;
|
||||
|
||||
/// Seed for the task's PRNG.
|
||||
ulong seed;
|
||||
|
||||
/// Training steps
|
||||
ulong training_steps;
|
||||
uint training_steps;
|
||||
|
||||
/// Validation steps
|
||||
ulong validation_steps;
|
||||
uint validation_steps;
|
||||
|
||||
/// Testing steps
|
||||
uint testing_steps;
|
||||
|
||||
/// Target label
|
||||
ushort label;
|
||||
|
||||
/// Bitbar toggling various debug modes
|
||||
ushort debug;
|
||||
|
||||
/// Learning rate
|
||||
float alpha;
|
||||
|
||||
|
@ -115,7 +121,7 @@ __attribute__((aligned(4096)));
|
|||
#ifdef __cplusplus
|
||||
namespace ircd::gpt
|
||||
{
|
||||
using opts = struct ircd_gpt_opts;
|
||||
using opts = ::ircd_gpt_opts;
|
||||
}
|
||||
|
||||
static_assert(sizeof(struct ircd_gpt_opts) == 4096);
|
||||
|
|
203
ircd/gpt.cc
203
ircd/gpt.cc
|
@ -39,6 +39,8 @@ namespace ircd::gpt
|
|||
static u16 argmax(const float *, const opts &);
|
||||
static void embed(float *, const u16 token, const u16 position, const opts &);
|
||||
|
||||
static void generate_debug(task &, const uint &, const uint &);
|
||||
|
||||
static f32
|
||||
logit alignas(64) [65536],
|
||||
embeds alignas(64) [1024 * 768],
|
||||
|
@ -91,21 +93,76 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
ctrl.tokens.count = 0;
|
||||
ctrl.tokens.head = 0;
|
||||
|
||||
for(uint j(0); j < in.size(); ++j)
|
||||
ctrl.token[ctrl.tokens.count++] = in[j];
|
||||
|
||||
uint j(0);
|
||||
for(uint i(0); i < opts.gates; ++i)
|
||||
for(uint k(0); k < 8; ++k)
|
||||
{
|
||||
const auto &gate
|
||||
{
|
||||
opts.gate[i]
|
||||
};
|
||||
|
||||
while(j < in.size() && j < gate.offset && ctrl.tokens.count < opts.buffer_tokens)
|
||||
ctrl.token[ctrl.tokens.count++] = in[j++];
|
||||
|
||||
for(uint k(0); k < 7; ++k)
|
||||
{
|
||||
if(ctrl.tokens.count >= opts.buffer_tokens)
|
||||
break;
|
||||
|
||||
if(opts.gate[i].code[k] == 0)
|
||||
if(gate.code[k] == 0)
|
||||
break;
|
||||
|
||||
ctrl.token[ctrl.tokens.count] = opts.gate[i].code[k];
|
||||
ctrl.tokens.count++;
|
||||
ctrl.token[ctrl.tokens.count++] = gate.code[k];
|
||||
}
|
||||
}
|
||||
|
||||
while(j < in.size() && ctrl.tokens.count < opts.buffer_tokens)
|
||||
ctrl.token[ctrl.tokens.count++] = in[j++];
|
||||
|
||||
const size_t in_size
|
||||
{
|
||||
ctrl.tokens.count
|
||||
};
|
||||
|
||||
generate(task);
|
||||
|
||||
for(uint i(0); i < ctrl.tokens.count && ret < out.size() && !halt; ++i)
|
||||
{
|
||||
const auto j
|
||||
{
|
||||
(i + ctrl.tokens.head) % opts.buffer_tokens
|
||||
};
|
||||
|
||||
const auto tok
|
||||
{
|
||||
ctrl.token[j]
|
||||
};
|
||||
|
||||
if(j >= in_size)
|
||||
out[ret++] = tok;
|
||||
|
||||
if(likely(~opts.debug & 0x01))
|
||||
continue;
|
||||
|
||||
if(likely(~opts.debug & 0x02))
|
||||
if(j < in_size)
|
||||
continue;
|
||||
|
||||
generate_debug(task, j, in_size);
|
||||
}
|
||||
|
||||
ctx::interruption_point();
|
||||
return vector_view<u16>
|
||||
{
|
||||
out, ret
|
||||
};
|
||||
}
|
||||
|
||||
void
|
||||
ircd::gpt::generate(task &task)
|
||||
{
|
||||
const auto &opts(*task.opts);
|
||||
auto &ctrl(*task.ctrl);
|
||||
|
||||
const size_t in_size
|
||||
{
|
||||
|
@ -153,11 +210,10 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
ctrl.cert.last = ctrl.cert.mean;
|
||||
ctrl.prop = false;
|
||||
pipe::default_model->invalid = true;
|
||||
return {};
|
||||
return;
|
||||
}
|
||||
|
||||
cycles = 0;
|
||||
milliseconds last_time {0};
|
||||
util::timer stopwatch;
|
||||
{
|
||||
const prof::scope_cycles task_cycles
|
||||
|
@ -165,66 +221,69 @@ ircd::gpt::generate(const vector_view<u16> &out,
|
|||
cycles
|
||||
};
|
||||
|
||||
generate(task);
|
||||
pipe::generate(task);
|
||||
}
|
||||
last_time = stopwatch.at<milliseconds>();
|
||||
|
||||
const milliseconds last_time
|
||||
{
|
||||
stopwatch.at<milliseconds>()
|
||||
};
|
||||
|
||||
ctrl.epic.elapsed += last_time.count();
|
||||
}
|
||||
|
||||
for(uint j(0); j < ctrl.tokens.count && ret < out.size() && !halt; ++j)
|
||||
void
|
||||
ircd::gpt::generate_debug(task &task,
|
||||
const uint &i,
|
||||
const uint &in_size)
|
||||
{
|
||||
const auto &opts(*task.opts);
|
||||
auto &ctrl(*task.ctrl);
|
||||
|
||||
const auto j
|
||||
{
|
||||
const auto tok
|
||||
{
|
||||
ctrl.token[j]
|
||||
};
|
||||
(i + ctrl.tokens.head) % opts.buffer_tokens
|
||||
};
|
||||
|
||||
if(j >= in_size)
|
||||
out[ret++] = tok;
|
||||
|
||||
if(j < in_size)
|
||||
continue;
|
||||
|
||||
static char dbuf[512] {0};
|
||||
char report[1536] {0};
|
||||
char tmbuf[4][64] {0};
|
||||
const size_t bsz(ctrl.tokens.count - in_size);
|
||||
const size_t report_size = snprintf
|
||||
(
|
||||
report, sizeof(report),
|
||||
"%-3u %4u:%-4u %4lu:%-4lu %6.1f%% %5.1fP %6.3fL [%c%c%c] %5u %6.3fL %6.2fP %5.1f%% %s %04x %8s %8s | %8s",
|
||||
j,
|
||||
ret - 1,
|
||||
ctrl.tokens.count,
|
||||
ctrl.epic.epoch,
|
||||
ctrl.epic.cycle,
|
||||
std::clamp(ctrl.cert.mean * 100.0f, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.perp.mean, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.loss.mean, 0.0f, 99.99f),
|
||||
opts.label == tok? '+': ' ',
|
||||
' ', // flag place
|
||||
' ', // flag place
|
||||
opts.label,
|
||||
std::clamp(ctrl.loss.last, 0.0f, 99.99f),
|
||||
std::clamp(ctrl.perp.last, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.cert.last * 100.0f, 0.0f, 100.0f),
|
||||
vocab::debug(dbuf, tok).c_str(),
|
||||
tok,
|
||||
pretty(tmbuf[0], milliseconds(last_time / bsz), 1).c_str(),
|
||||
pretty(tmbuf[1], si(cycles / bsz), 1).c_str(),
|
||||
pretty(tmbuf[2], milliseconds(ctrl.epic.elapsed), 1).c_str()
|
||||
);
|
||||
|
||||
log::logf
|
||||
{
|
||||
log, log::level::DEBUG,
|
||||
"%s",
|
||||
string_view{report, report_size}
|
||||
};
|
||||
}
|
||||
|
||||
ctx::interruption_point();
|
||||
return vector_view<u16>
|
||||
const auto tok
|
||||
{
|
||||
out, ret
|
||||
ctrl.token[j]
|
||||
};
|
||||
|
||||
static char dbuf[512];
|
||||
static char report[1536];
|
||||
static char tmbuf[4][64];
|
||||
const size_t bsz(ctrl.tokens.count - in_size);
|
||||
const size_t report_size = snprintf
|
||||
(
|
||||
report, sizeof(report),
|
||||
"%-3u %-4u %4lu:%-4lu %6.1f%% %5.1fP %6.3fL [%c%c%c] %5u %6.3fL %6.2fP %5.1f%% %s %04x %8s %8s | %8s",
|
||||
j,
|
||||
ctrl.tokens.count,
|
||||
ctrl.epic.epoch,
|
||||
ctrl.epic.cycle,
|
||||
std::clamp(ctrl.cert.mean * 100.0f, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.perp.mean, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.loss.mean, 0.0f, 99.99f),
|
||||
opts.label == tok? '+': ' ',
|
||||
' ', // flag place
|
||||
' ', // flag place
|
||||
opts.label,
|
||||
std::clamp(ctrl.loss.last, 0.0f, 99.99f),
|
||||
std::clamp(ctrl.perp.last, 0.0f, 100.0f),
|
||||
std::clamp(ctrl.cert.last * 100.0f, 0.0f, 100.0f),
|
||||
vocab::debug(dbuf, tok).c_str(),
|
||||
tok,
|
||||
pretty(tmbuf[0], milliseconds(0ms / bsz), 1).c_str(),
|
||||
pretty(tmbuf[1], si(0UL / bsz), 1).c_str(),
|
||||
pretty(tmbuf[2], milliseconds(ctrl.epic.elapsed), 1).c_str()
|
||||
);
|
||||
|
||||
log::logf
|
||||
{
|
||||
log, log::level::DEBUG,
|
||||
"%s",
|
||||
string_view{report, report_size}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -849,7 +908,11 @@ ircd_gpt_opts::ircd_gpt_opts(const ircd::gpt::model::decoder *const model)
|
|||
noexcept
|
||||
:model
|
||||
{
|
||||
model
|
||||
model?: ircd::gpt::model::default_model
|
||||
}
|
||||
,seed
|
||||
{
|
||||
1234567890UL
|
||||
}
|
||||
,limit
|
||||
{
|
||||
|
@ -915,10 +978,6 @@ noexcept
|
|||
{
|
||||
50257
|
||||
}
|
||||
,seed
|
||||
{
|
||||
1234567890UL
|
||||
}
|
||||
,training_steps
|
||||
{
|
||||
250000
|
||||
|
@ -927,10 +986,18 @@ noexcept
|
|||
{
|
||||
5000
|
||||
}
|
||||
,testing_steps
|
||||
{
|
||||
5000
|
||||
}
|
||||
,label
|
||||
{
|
||||
198
|
||||
}
|
||||
,debug
|
||||
{
|
||||
0x01
|
||||
}
|
||||
,alpha
|
||||
{
|
||||
0.001f
|
||||
|
|
|
@ -600,8 +600,6 @@ ircd_gpt_leave(__global struct ircd_gpt_task *const ctrl,
|
|||
if(li != 0)
|
||||
return;
|
||||
|
||||
// On the last cycle, with no prior call or error code set, indicate
|
||||
// a nominal exit condition.
|
||||
if(ctrl->epic.cycle + 1 >= opts->limit)
|
||||
ctrl->epic.epoch += 1;
|
||||
|
||||
|
@ -620,8 +618,7 @@ ircd_gpt_lm_result(__global struct ircd_gpt_task *const ctrl,
|
|||
__global const float *const restrict logit)
|
||||
{
|
||||
// To read from cells other than idx[0] we need this barrier.
|
||||
if(opts->top_k > 1)
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// Mask for write-leader
|
||||
if(li != 0)
|
||||
|
@ -661,9 +658,10 @@ ircd_gpt_lm_result(__global struct ircd_gpt_task *const ctrl,
|
|||
loss_sum = ctrl->loss.sum[0] + ctrl->loss.sum[1] + ctrl->loss.sum[2] + loss,
|
||||
perp_sum = ctrl->perp.sum[0] + ctrl->perp.sum[1] + ctrl->perp.sum[2] + perp,
|
||||
cert_sum = ctrl->cert.sum[0] + ctrl->cert.sum[1] + ctrl->cert.sum[2] + cert,
|
||||
loss_mean = loss_sum / (ctrl->epic.epoch + 1.0f),
|
||||
perp_mean = perp_sum / (ctrl->epic.epoch + 1.0f),
|
||||
cert_mean = cert_sum / (ctrl->epic.epoch + 1.0f);
|
||||
mean_div = ctrl->epic.epoch + 1.0f,
|
||||
loss_mean = loss_sum / mean_div,
|
||||
perp_mean = perp_sum / mean_div,
|
||||
cert_mean = cert_sum / mean_div;
|
||||
|
||||
ctrl->loss.last = loss;
|
||||
ctrl->loss.sum[sum_sel] += loss;
|
||||
|
|
|
@ -12,14 +12,13 @@ namespace ircd::gpt::pipe
|
|||
{
|
||||
static void profile_dumplog(pipe::exec &);
|
||||
|
||||
static ircd::cl::exec::opts
|
||||
negative_opts, positive_opts, selfattn_opts,
|
||||
cathode_opts, anode_opts, lmhead_opts, lmamax_opts,
|
||||
backprop_opts;
|
||||
|
||||
extern conf::item<size_t> flush_cycles;
|
||||
extern conf::item<size_t> queue_cycles;
|
||||
extern const ircd::run::changed handle_quit;
|
||||
|
||||
static ircd::cl::exec::opts
|
||||
send_opts_opts, send_ctrl_opts, send_coil_opts, send_head_opts,
|
||||
anode_opts, negative_opts, positive_opts, cathode_opts,
|
||||
lmhead_opts, lmamax_opts, backprop_opts, recv_ctrl_opts;
|
||||
}
|
||||
|
||||
decltype(ircd::gpt::pipe::queue_cycles)
|
||||
|
@ -29,13 +28,6 @@ ircd::gpt::pipe::queue_cycles
|
|||
{ "default", 1L, },
|
||||
};
|
||||
|
||||
decltype(ircd::gpt::pipe::flush_cycles)
|
||||
ircd::gpt::pipe::flush_cycles
|
||||
{
|
||||
{ "name", "ircd.gpt.pipe.flush" },
|
||||
{ "default", 1L, },
|
||||
};
|
||||
|
||||
decltype(ircd::gpt::pipe::default_model)
|
||||
ircd::gpt::pipe::default_model;
|
||||
|
||||
|
@ -75,6 +67,13 @@ ircd::gpt::pipe::init()
|
|||
*pipe::default_code, *pipe::default_model
|
||||
};
|
||||
|
||||
//XXX
|
||||
send_ctrl_opts.flush = true;
|
||||
send_ctrl_opts.nice = 1;
|
||||
lmamax_opts.flush = true;
|
||||
lmamax_opts.nice = 2;
|
||||
recv_ctrl_opts.flush = true;
|
||||
|
||||
log::debug
|
||||
{
|
||||
log, "Pipe initialized from model:%p data:%p code:%p desc:%p",
|
||||
|
@ -113,7 +112,7 @@ noexcept
|
|||
//
|
||||
|
||||
void
|
||||
ircd::gpt::generate(task &task)
|
||||
ircd::gpt::pipe::generate(task &task)
|
||||
{
|
||||
assert(pipe::default_model);
|
||||
|
||||
|
@ -131,9 +130,10 @@ ircd::gpt::generate(task &task)
|
|||
|
||||
ctrl.epic.cycle = 0;
|
||||
ctrl.epic.host_tsc = prof::cycles();
|
||||
volatile const size_t tokens(ctrl.tokens.count);
|
||||
volatile const auto epoch(ctrl.epic.epoch);
|
||||
volatile size_t cycle(ctrl.epic.cycle);
|
||||
|
||||
const auto tokens(ctrl.tokens.count);
|
||||
const auto epoch(ctrl.epic.epoch);
|
||||
volatile auto cycle(ctrl.epic.cycle);
|
||||
|
||||
std::deque<pipe::exec> list;
|
||||
for(; cycle < opts.limit; ++cycle)
|
||||
|
@ -150,23 +150,6 @@ ircd::gpt::generate(task &task)
|
|||
task, tokens + cycle, rel, acq
|
||||
);
|
||||
|
||||
// Conditions for a cl::flush here
|
||||
const bool flush
|
||||
{
|
||||
// Flushing here is enabled by the configuration
|
||||
pipe::flush_cycles
|
||||
|
||||
// Skip flushing on cycles already performing IO or waiting.
|
||||
&& !acq && list.size() <= pipe::queue_cycles
|
||||
|
||||
// The configuration item can specify an interval greater than
|
||||
// one between flushes.
|
||||
&& cycle % pipe::flush_cycles == 0
|
||||
};
|
||||
|
||||
if(flush)
|
||||
cl::flush();
|
||||
|
||||
if(ctx::interruption_requested())
|
||||
if(acq || termination(ctx::cur()))
|
||||
break;
|
||||
|
@ -301,19 +284,19 @@ ircd::gpt::pipe::exec::exec(task &task,
|
|||
}
|
||||
,release_opts
|
||||
{
|
||||
desc->opts, send_opts
|
||||
desc->opts, send_opts, send_opts_opts,
|
||||
}
|
||||
,release_ctrl
|
||||
{
|
||||
desc->ctrl, send_ctrl
|
||||
desc->ctrl, send_ctrl, send_ctrl_opts
|
||||
}
|
||||
,release_coil
|
||||
{
|
||||
desc->model->decode->master[0], send_coil
|
||||
desc->model->decode->master[0], send_coil, send_coil_opts
|
||||
}
|
||||
,release_head
|
||||
{
|
||||
desc->model->embed->master[0], send_head
|
||||
desc->model->embed->master[0], send_head, send_head_opts
|
||||
}
|
||||
,lm_embed
|
||||
{
|
||||
|
@ -364,7 +347,7 @@ ircd::gpt::pipe::exec::exec(task &task,
|
|||
}
|
||||
,acquire_ctrl
|
||||
{
|
||||
desc->ctrl, recv_ctrl
|
||||
desc->ctrl, recv_ctrl, recv_ctrl_opts
|
||||
}
|
||||
{
|
||||
if(release && desc->model->invalid)
|
||||
|
|
Loading…
Reference in a new issue