mirror of
https://github.com/matrix-construct/construct
synced 2024-12-27 07:54:05 +01:00
1497 lines
23 KiB
C++
1497 lines
23 KiB
C++
// Matrix Construct Is All You Need Is All You Need Is AllĊĊĊĊĊĊĊĊ
|
|
//
|
|
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
|
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
|
|
//
|
|
// Permission to use, copy, modify, and/or distribute this software for any
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
// copyright notice and this permission notice is present in all copies. The
|
|
// full license for this software is available in the LICENSE file.
|
|
|
|
decltype(ircd::gpt::log)
|
|
ircd::gpt::log
|
|
{
|
|
"gpt"
|
|
};
|
|
|
|
//
|
|
// debug
|
|
//
|
|
|
|
void
|
|
ircd::gpt::log_debug_prof(const opts &opts,
|
|
const ctrl &ctrl,
|
|
const pipe::prof &prof)
|
|
{
|
|
static char
|
|
buf[2][512];
|
|
|
|
const auto head
|
|
{
|
|
debug_head(buf[0], opts, ctrl)
|
|
};
|
|
|
|
for(uint i(0); i < prof.stages; ++i)
|
|
{
|
|
if(!std::get<1>(prof.info[i]))
|
|
continue;
|
|
|
|
log::logf
|
|
{
|
|
log, log::level::DEBUG,
|
|
"%s %2u: %s",
|
|
head,
|
|
i,
|
|
pipe::debug(buf[1], prof, i),
|
|
};
|
|
}
|
|
}
|
|
|
|
void
|
|
ircd::gpt::log_debug_topn(const opts &opts,
|
|
const ctrl &ctrl)
|
|
{
|
|
static char
|
|
buf[2][512];
|
|
|
|
const auto head
|
|
{
|
|
debug_head(buf[0], opts, ctrl)
|
|
};
|
|
|
|
for(uint i(0); i < opts.top_n; ++i)
|
|
log::logf
|
|
{
|
|
log, log::level::DEBUG,
|
|
"%s %s",
|
|
head,
|
|
debug_top(buf[1], opts, ctrl, i),
|
|
};
|
|
}
|
|
|
|
void
|
|
ircd::gpt::log_debug_labels(const opts &opts,
|
|
const ctrl &ctrl)
|
|
{
|
|
static char
|
|
buf[2][512];
|
|
|
|
const auto head
|
|
{
|
|
debug_head(buf[0], opts, ctrl)
|
|
};
|
|
|
|
for(uint i(0); i < opts.labels; ++i)
|
|
log::logf
|
|
{
|
|
log, log::level::DEBUG,
|
|
"%s %s",
|
|
head,
|
|
debug_label(buf[1], opts, ctrl, i, 1),
|
|
};
|
|
}
|
|
|
|
void
|
|
ircd::gpt::log_debug_attns_top(const opts &opts,
|
|
const ctrl &ctrl)
|
|
{
|
|
static char
|
|
buf[8][512];
|
|
|
|
const auto head
|
|
{
|
|
debug_head(buf[0], opts, ctrl)
|
|
};
|
|
|
|
std::map<uint, uint> tokm;
|
|
for(uint i(0); i < opts.layers; ++i)
|
|
for(uint j(0); j < opts.attn_rank; ++j)
|
|
tokm[ctrl.attn[i][j]]++;
|
|
|
|
std::vector<std::pair<uint, uint>> tok(begin(tokm), end(tokm));
|
|
std::sort(begin(tok), end(tok), [&tokm]
|
|
(const auto &a, const auto &b)
|
|
{
|
|
return b.second < a.second;
|
|
});
|
|
|
|
for(const auto &[idx, score] : tok)
|
|
{
|
|
const auto barsz
|
|
{
|
|
std::min(score, std::min(80U, uint(sizeof(buf[2]) - 1)))
|
|
};
|
|
|
|
memset(buf[2], '|', barsz);
|
|
buf[2][barsz] = '\0';
|
|
|
|
log::logf
|
|
{
|
|
log, log::level::DEBUG,
|
|
"%s %s [%3u] %s %-3u",
|
|
head,
|
|
vocab::debug(buf[1], ctrl.token[idx], 1),
|
|
idx,
|
|
buf[2],
|
|
score,
|
|
};
|
|
}
|
|
}
|
|
|
|
void
|
|
ircd::gpt::log_debug_attns(const opts &opts,
|
|
const ctrl &ctrl)
|
|
{
|
|
static char
|
|
buf[2][512];
|
|
|
|
const auto head
|
|
{
|
|
debug_head(buf[0], opts, ctrl)
|
|
};
|
|
|
|
for(uint i(0); i < ctrl.count; ++i)
|
|
log::logf
|
|
{
|
|
log, log::level::DEBUG,
|
|
"%s %s",
|
|
head,
|
|
debug_attn(buf[1], opts, ctrl, i),
|
|
};
|
|
}
|
|
|
|
void
|
|
ircd::gpt::log_debug_token(const opts &opts,
|
|
const ctrl &ctrl,
|
|
const uint i)
|
|
{
|
|
static char
|
|
buf[2][512];
|
|
|
|
log::logf
|
|
{
|
|
log, log::level::DEBUG,
|
|
"%s %s",
|
|
debug_head(buf[0], opts, ctrl),
|
|
debug_token_at(buf[1], opts, ctrl, i),
|
|
};
|
|
}
|
|
|
|
void
|
|
ircd::gpt::log_debug(const opts &opts,
|
|
const ctrl &ctrl)
|
|
{
|
|
static char
|
|
buf[2][512];
|
|
|
|
log::logf
|
|
{
|
|
log, log::level::DEBUG,
|
|
"%s %s",
|
|
debug_head(buf[0], opts, ctrl),
|
|
debug(buf[1], opts, ctrl),
|
|
};
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// gpt::task
|
|
//
|
|
|
|
void
|
|
ircd::gpt::reset(task &task)
|
|
noexcept
|
|
{
|
|
clear(task);
|
|
seed(task);
|
|
}
|
|
|
|
void
|
|
ircd::gpt::clear(task &task)
|
|
noexcept
|
|
{
|
|
assert(task.ctrl);
|
|
memset(task.ctrl, 0x0, sizeof(gpt::ctrl));
|
|
}
|
|
|
|
void
|
|
ircd::gpt::seed(task &task)
|
|
noexcept
|
|
{
|
|
assert(task.opts);
|
|
seed(task, task.opts->seed);
|
|
}
|
|
|
|
void
|
|
ircd::gpt::seed(task &task,
|
|
const uint64_t &val)
|
|
noexcept
|
|
{
|
|
assert(task.ctrl);
|
|
task.ctrl->rand[0] = val;
|
|
task.ctrl->rand[1] = val;
|
|
task.ctrl->rand[2] = 65537;
|
|
task.ctrl->rand[3] = -1UL;
|
|
}
|
|
|
|
//
|
|
// gpt::task::task
|
|
//
|
|
|
|
ircd::gpt::task::task(const gpt::opts *const opts,
|
|
gpt::ctrl *const ctrl)
|
|
try
|
|
:opts
|
|
{
|
|
opts
|
|
}
|
|
,ctrl
|
|
{
|
|
ctrl
|
|
}
|
|
,code
|
|
{
|
|
pipe::default_code?:
|
|
(pipe::default_code = std::make_shared<pipe::code>())
|
|
}
|
|
,model
|
|
{
|
|
std::make_unique<pipe::model>
|
|
(
|
|
*const_cast<const gpt::model::decoder *>(gpt::model::default_model)
|
|
)
|
|
}
|
|
,desc
|
|
{
|
|
this->opts,
|
|
this->ctrl,
|
|
*this->model,
|
|
*this->code,
|
|
}
|
|
{
|
|
assert(aligned(opts, size_t(cl::data::gart_page_size)));
|
|
assert(aligned(ctrl, size_t(cl::data::gart_page_size)));
|
|
|
|
seed(*this, this->opts->seed);
|
|
}
|
|
catch(const std::exception &e)
|
|
{
|
|
log::error
|
|
{
|
|
log, "Task ctor :%s", e.what()
|
|
};
|
|
|
|
throw;
|
|
}
|
|
|
|
ircd::gpt::task::~task()
|
|
noexcept
|
|
{
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::task::operator()(const mutable_buffer &out,
|
|
const string_view &in)
|
|
{
|
|
u16 input_buf[1024];
|
|
const auto input_tokens
|
|
{
|
|
gpt::vocab::tokenize(input_buf, in)
|
|
};
|
|
|
|
u16 output_buf[1024];
|
|
const auto output_tokens
|
|
{
|
|
operator()(output_buf, input_tokens)
|
|
};
|
|
|
|
const auto output
|
|
{
|
|
gpt::vocab::detokenize(out, output_tokens)
|
|
};
|
|
|
|
return output;
|
|
}
|
|
|
|
ircd::vector_view<ircd::u16>
|
|
ircd::gpt::task::operator()(const vector_view<u16> &out,
|
|
const vector_view<const u16> &in)
|
|
{
|
|
assert(this->opts);
|
|
const auto &opts{*this->opts};
|
|
|
|
assert(this->ctrl);
|
|
auto &ctrl{*this->ctrl};
|
|
|
|
size_t in_i(0);
|
|
for(; in_i < in.size() && ctrl.count < opts.buffer_tokens; in_i++)
|
|
if(in[in_i] == 628)
|
|
{
|
|
ctrl.token[ctrl.count++] = 198;
|
|
ctrl.token[ctrl.count++] = 198;
|
|
}
|
|
else ctrl.token[ctrl.count++] = in[in_i];
|
|
|
|
const auto in_count
|
|
{
|
|
ctrl.count
|
|
};
|
|
|
|
gpt::epoch epoch
|
|
{
|
|
*this,
|
|
};
|
|
|
|
gpt::step step
|
|
{
|
|
epoch
|
|
};
|
|
|
|
gpt::samp samp
|
|
{
|
|
step
|
|
};
|
|
|
|
bool halt {false}; do
|
|
{
|
|
halt = samp();
|
|
}
|
|
while(!halt);
|
|
|
|
size_t out_i(0);
|
|
for(; out_i < out.size() && in_count + out_i < ctrl.count; out_i++)
|
|
out[out_i] = ctrl.token[in_count + out_i];
|
|
|
|
return vector_view<u16>
|
|
{
|
|
out, out_i
|
|
};
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::task::operator()()
|
|
{
|
|
gpt::epoch epoch
|
|
{
|
|
*this
|
|
};
|
|
|
|
while(!epoch())
|
|
ctx::interruption_point();
|
|
|
|
return done();
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::task::done()
|
|
const noexcept
|
|
{
|
|
return false;
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// epoch
|
|
//
|
|
|
|
namespace ircd::gpt
|
|
{
|
|
static thread_local u16 marker alignas(64) [1024];
|
|
}
|
|
|
|
//
|
|
// epoch::epoch
|
|
//
|
|
|
|
ircd::gpt::epoch::epoch(gpt::task &task)
|
|
:task
|
|
{
|
|
task
|
|
}
|
|
,desc
|
|
{
|
|
task.desc
|
|
}
|
|
,opts
|
|
{
|
|
*task.opts
|
|
}
|
|
,ctrl
|
|
{
|
|
*task.ctrl
|
|
}
|
|
,id
|
|
{
|
|
ctrl.clk.epoch
|
|
}
|
|
,start
|
|
{
|
|
0
|
|
}
|
|
,stop
|
|
{
|
|
std::min(start + uint(opts.batch_size), gpt::model::default_data.size())
|
|
}
|
|
,moment
|
|
{
|
|
gpt::model::default_moment[0],
|
|
gpt::model::default_moment[1],
|
|
}
|
|
{
|
|
assert(task.opts);
|
|
assert(task.ctrl);
|
|
|
|
ctrl.clk.step = 0;
|
|
}
|
|
|
|
ircd::gpt::epoch::~epoch()
|
|
noexcept
|
|
{
|
|
if(opts.debug & 0x80000000U)
|
|
log_debug_prof(opts, ctrl, this->profile);
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::epoch::operator()()
|
|
{
|
|
gpt::step step
|
|
{
|
|
*this
|
|
};
|
|
|
|
while(!step())
|
|
ctx::interruption_point();
|
|
|
|
if(!step.backpropagate())
|
|
throw error
|
|
{
|
|
"Failed to backprop."
|
|
};
|
|
|
|
return done();
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::epoch::done()
|
|
const noexcept
|
|
{
|
|
return ctrl.clk.epoch != id;
|
|
}
|
|
|
|
void
|
|
ircd::gpt::epoch::profile_accumulate(const pipe::prof &profile)
|
|
{
|
|
for(size_t i(0); i < profile.ts.size(); ++i)
|
|
for(size_t j(0); j < profile.phases; ++j)
|
|
this->profile.ts[i][j] += profile.ts[i][j];
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// step::step
|
|
//
|
|
|
|
ircd::gpt::step::step(gpt::epoch &epoch)
|
|
:epoch
|
|
{
|
|
epoch
|
|
}
|
|
,desc
|
|
{
|
|
epoch.desc
|
|
}
|
|
,opts
|
|
{
|
|
epoch.opts
|
|
}
|
|
,ctrl
|
|
{
|
|
epoch.ctrl
|
|
}
|
|
,id
|
|
{
|
|
ctrl.clk.step
|
|
}
|
|
,start
|
|
{
|
|
ctrl.clk.step * opts.batch_size
|
|
}
|
|
{
|
|
assert(opts.batch_size > 0);
|
|
|
|
ctrl.clk.samp = 0;
|
|
ctrl.hit = 0;
|
|
ctrl.miss = 0;
|
|
ctrl.target.ppl = {{0}};
|
|
ctrl.target.loss = {{0}};
|
|
ctrl.select.ppl = {{0}};
|
|
ctrl.select.loss = {{0}};
|
|
|
|
for(uint i(0); i < opts.labels; ++i)
|
|
{
|
|
ctrl.label[i].ppl = {{0}};
|
|
ctrl.label[i].loss = {{0}};
|
|
}
|
|
}
|
|
|
|
ircd::gpt::step::~step()
|
|
noexcept
|
|
{
|
|
if(opts.debug & 0x40000000U)
|
|
log_debug_prof(opts, ctrl, this->profile);
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::step::backpropagate()
|
|
{
|
|
const auto hit
|
|
{
|
|
ctrl.target.logit.token == ctrl.select.logit.token
|
|
};
|
|
|
|
const auto select_loss_mean
|
|
{
|
|
ctrl.select.loss.mean
|
|
};
|
|
|
|
const auto target_loss_mean
|
|
{
|
|
ctrl.target.loss.mean
|
|
};
|
|
|
|
const auto loss_mean
|
|
{
|
|
(target_loss_mean + select_loss_mean) / 2.0f
|
|
};
|
|
|
|
static float mean_best { 10000.0f }, target_mean_best { 10000.0f };
|
|
static ulong hit_best;
|
|
static bool tack, last_tack;
|
|
last_tack = tack;
|
|
|
|
const auto loss
|
|
{
|
|
loss_mean
|
|
};
|
|
|
|
const bool improve_global
|
|
{
|
|
target_loss_mean < target_mean_best
|
|
};
|
|
|
|
const bool improve
|
|
{
|
|
improve_global
|
|
};
|
|
|
|
if(improve)
|
|
mean_best = loss,
|
|
target_mean_best = target_loss_mean,
|
|
hit_best = ctrl.hit;
|
|
else
|
|
tack = !tack;
|
|
|
|
const auto grad
|
|
{
|
|
!tack? loss : -loss
|
|
};
|
|
|
|
const auto steps
|
|
{
|
|
(opts.training_steps + opts.validation_steps + opts.testing_steps) / opts.batch_size
|
|
};
|
|
|
|
const auto step
|
|
{
|
|
this->epoch.id * steps + this->id
|
|
};
|
|
|
|
log::logf
|
|
{
|
|
log, improve? log::level::INFO: log::level::ERROR,
|
|
"epoch:%u step:%u completed range[%u -> %zu] dsid:%u target:%-10.7f select:%-10.7f loss:%-10.7f [ %10.7f ] hit:%u miss:%u",
|
|
this->epoch.id,
|
|
step,
|
|
this->start,
|
|
this->start + opts.batch_size,
|
|
this->id * opts.batch_size + ctrl.clk.samp,
|
|
target_loss_mean,
|
|
select_loss_mean,
|
|
loss,
|
|
grad * opts.alpha,
|
|
ctrl.hit,
|
|
ctrl.miss,
|
|
};
|
|
|
|
if(!opts.alpha)
|
|
return true;
|
|
|
|
if(!improve)
|
|
return false;
|
|
|
|
cl::exec
|
|
{
|
|
desc.model->decode->master[0], std::memory_order_acq_rel
|
|
};
|
|
|
|
auto &model
|
|
{
|
|
*mutable_cast(desc.model->decode_const)
|
|
};
|
|
|
|
const mutable_buffer model_buffer
|
|
{
|
|
reinterpret_cast<char *>(&model),
|
|
sizeof(gpt::model::decoder) * 3
|
|
};
|
|
|
|
const mutable_buffer checkpoint_buffer
|
|
{
|
|
reinterpret_cast<char *>(&model) + sizeof(gpt::model::decoder) * 3,
|
|
sizeof(gpt::model::decoder) * 3
|
|
};
|
|
|
|
if(improve)
|
|
copy(checkpoint_buffer, model_buffer);
|
|
else
|
|
copy(model_buffer, checkpoint_buffer);
|
|
|
|
ircd::timer stopwatch;
|
|
backprop(opts, step, grad, model, epoch.moment);
|
|
allocator::sync(model_buffer);
|
|
|
|
char pbuf[1][32];
|
|
log::logf
|
|
{
|
|
log, improve? log::level::DEBUG: log::level::ERROR,
|
|
"backpropagation step:%u lr:%-8.6f mean:%-10.7f$L hits:%-5u Tbest:%-10.7f$L Mbest:%-10.7f$L Hbest:%-5lu grad:{ %10.7f$L } %s",
|
|
step,
|
|
opts.alpha,
|
|
loss_mean,
|
|
ctrl.hit,
|
|
target_mean_best,
|
|
mean_best,
|
|
hit_best,
|
|
grad,
|
|
pretty(pbuf[0], stopwatch.at<milliseconds>(), 1),
|
|
};
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::step::operator()()
|
|
{
|
|
gpt::samp samp
|
|
{
|
|
*this
|
|
};
|
|
|
|
while(!samp())
|
|
ctx::interruption_point();
|
|
|
|
return done();
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::step::done()
|
|
const noexcept
|
|
{
|
|
return ctrl.clk.step != id;
|
|
}
|
|
|
|
void
|
|
ircd::gpt::step::profile_accumulate(const pipe::prof &profile)
|
|
{
|
|
for(size_t i(0); i < profile.ts.size(); ++i)
|
|
for(size_t j(0); j < profile.phases; ++j)
|
|
this->profile.ts[i][j] += profile.ts[i][j];
|
|
|
|
epoch.profile_accumulate(profile);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// samp::samp
|
|
//
|
|
|
|
ircd::gpt::samp::samp(gpt::step &step)
|
|
:step
|
|
{
|
|
step
|
|
}
|
|
,desc
|
|
{
|
|
step.desc
|
|
}
|
|
,opts
|
|
{
|
|
step.opts
|
|
}
|
|
,ctrl
|
|
{
|
|
step.ctrl
|
|
}
|
|
,id
|
|
{
|
|
ctrl.clk.samp
|
|
}
|
|
,accept
|
|
{
|
|
-1
|
|
}
|
|
,dispatch
|
|
{
|
|
1
|
|
}
|
|
,cycle
|
|
{
|
|
0
|
|
}
|
|
,tokens
|
|
{
|
|
ctrl.count?:
|
|
tokenize()
|
|
}
|
|
,count
|
|
{
|
|
opts.limit < 0?
|
|
std::min(std::abs(opts.limit), int(tokens)):
|
|
opts.limit > 0?
|
|
tokens:
|
|
1U
|
|
}
|
|
{
|
|
desc.cached = 0;
|
|
|
|
ctrl.clk.cycle = cycle;
|
|
ctrl.dispatch = dispatch;
|
|
ctrl.accept = accept;
|
|
ctrl.count = count;
|
|
ctrl.tokens = tokens;
|
|
ctrl.magic = 0xDEADBEEF;
|
|
|
|
for(uint i(0); i < opts.labels; ++i)
|
|
{
|
|
ctrl.label[i].ppl = {{0}};
|
|
ctrl.label[i].loss = {{0}};
|
|
}
|
|
|
|
assert(ctrl.count > 0);
|
|
assert(ctrl.count < opts.context_tokens);
|
|
assert(ctrl.count <= ctrl.tokens);
|
|
|
|
if(opts.debug & 0x01)
|
|
for(uint j(0); j < ctrl.count; ++j)
|
|
log_debug_token(opts, ctrl, j);
|
|
}
|
|
|
|
ircd::gpt::samp::~samp()
|
|
noexcept
|
|
{
|
|
if(run::level != run::level::RUN)
|
|
return;
|
|
|
|
if(!desc.ctrl.mapped)
|
|
{
|
|
cl::exec
|
|
{
|
|
desc.ctrl, std::memory_order_acq_rel
|
|
};
|
|
|
|
assert(ctrl.magic != 0xC7012C70UL);
|
|
assert(ctrl.magic == 0xDEADBEEF);
|
|
}
|
|
|
|
if(opts.debug & 0x04)
|
|
log_debug(opts, ctrl);
|
|
|
|
if(opts.debug & 0x40)
|
|
log_debug_labels(opts, ctrl);
|
|
|
|
if(opts.debug & 0x20000000U)
|
|
log_debug_prof(opts, ctrl, this->profile);
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::samp::operator()()
|
|
{
|
|
ctx::interruption_point();
|
|
|
|
if(dispatche())
|
|
return false;
|
|
|
|
while(!queue.empty())
|
|
{
|
|
const unwind pop{[this]
|
|
{
|
|
queue.pop_front();
|
|
}};
|
|
|
|
if(evaluate(queue.front()))
|
|
break;
|
|
}
|
|
|
|
return done();
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::samp::done()
|
|
const noexcept
|
|
{
|
|
return accept >= 0 || !dispatch;
|
|
}
|
|
|
|
uint
|
|
ircd::gpt::samp::tokenize()
|
|
{
|
|
const auto idx
|
|
{
|
|
step.start + ctrl.clk.samp
|
|
};
|
|
|
|
const gpt::model::text text
|
|
{
|
|
gpt::model::default_data.at(idx)
|
|
};
|
|
|
|
const json::string input
|
|
{
|
|
json::get<"text"_>(text)
|
|
};
|
|
|
|
thread_local char str_buf[16_KiB];
|
|
const string_view str
|
|
{
|
|
json::unescape(str_buf, input)
|
|
};
|
|
|
|
assert(!empty(str));
|
|
static const auto delim
|
|
{
|
|
"\n\n"_sv
|
|
};
|
|
|
|
const int phrases
|
|
(
|
|
ircd::token_count(str, delim)
|
|
);
|
|
|
|
uint count(0);
|
|
int p(phrases);
|
|
assert(p >= 0);
|
|
|
|
if(startswith(str, delim))
|
|
{
|
|
ctrl.token[count++] = 198;
|
|
ctrl.token[count++] = 198;
|
|
}
|
|
|
|
ircd::tokens(str, delim, [this, &count, &p, &phrases]
|
|
(const string_view &phrase) noexcept -> bool
|
|
{
|
|
assert(!empty(phrase));
|
|
const vector_view<u16> buf
|
|
{
|
|
ctrl.token + count, opts.buffer_tokens - count
|
|
};
|
|
|
|
const auto in
|
|
{
|
|
gpt::vocab::tokenize(buf, phrase)
|
|
};
|
|
|
|
if(count + size(in) + 2 > opts.context_tokens)
|
|
return false;
|
|
|
|
count += size(in);
|
|
ctrl.token[count++] = 198;
|
|
ctrl.token[count++] = 198;
|
|
|
|
assert(p > 0);
|
|
marker[--p] = count;
|
|
return true;
|
|
});
|
|
|
|
for(assert(p >= 0); p < phrases; ++p)
|
|
if(marker[p] <= opts.context_tokens)
|
|
break;
|
|
|
|
assert(p <= phrases);
|
|
count = marker[p];
|
|
|
|
for(uint i(count); i < opts.buffer_tokens; ++i)
|
|
ctrl.token[i] = 198;
|
|
|
|
if(!endswith(str, delim))
|
|
count -= 2;
|
|
|
|
assert(count > 0);
|
|
assert(count <= opts.context_tokens);
|
|
return count;
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::samp::dispatche()
|
|
{
|
|
if(!dispatch)
|
|
return false;
|
|
|
|
assert(accept < 0);
|
|
assert(count > 0);
|
|
assert(tokens >= count);
|
|
assert(cycle < count);
|
|
assert(dispatch > 0);
|
|
|
|
if(cycle == 0)
|
|
{
|
|
ctrl.prof.acquired = 0;
|
|
ctrl.prof.released = prof::cycles();
|
|
}
|
|
|
|
queue.emplace_back(*this);
|
|
desc.cached = tokens;
|
|
tokens += count >= tokens;
|
|
count += 1;
|
|
cycle += 1;
|
|
dispatch -= 1;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::samp::evaluate(pipe::cycle &cycle)
|
|
{
|
|
const auto &frame
|
|
{
|
|
acquire(cycle)
|
|
};
|
|
|
|
if(!retire(cycle, frame))
|
|
return false;
|
|
|
|
const uint
|
|
batch_size = opts.batch_size,
|
|
samps = opts.training_steps + opts.validation_steps + opts.testing_steps,
|
|
steps = samps / batch_size;
|
|
|
|
const bool
|
|
accepting = accept >= 0,
|
|
cycling = !accepting,
|
|
sampling = accepting,
|
|
stepping = sampling && (frame.clk.samp + 1) >= batch_size,
|
|
epoching = stepping && (frame.clk.step + 1) >= steps;
|
|
|
|
if(!accepting)
|
|
return true;
|
|
|
|
cl::exec
|
|
{
|
|
desc.ctrl, std::memory_order_acq_rel
|
|
};
|
|
|
|
// Workaround buggy drivers which flake on write-back to user ptrs.
|
|
// We manually copy the last frame out to ctrl. On working systems ctrl
|
|
// can be acquired by changing the fence to std::memory_order_acquire.
|
|
memcpy(&ctrl, &frame, sizeof(gpt::ctrl));
|
|
|
|
assert(ctrl.magic != 0xDEADBEEF);
|
|
assert(ctrl.magic == 0xC7012C70UL);
|
|
|
|
ctrl.prof.acquired = prof::cycles();
|
|
ctrl.clk.cycle += cycling;
|
|
ctrl.clk.samp += sampling;
|
|
ctrl.clk.step += stepping;
|
|
ctrl.clk.epoch += epoching;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
ircd::gpt::samp::retire(pipe::cycle &cycle,
|
|
const gpt::ctrl &frame)
|
|
{
|
|
assert(accept < 0);
|
|
accept = frame.accept;
|
|
dispatch = frame.dispatch;
|
|
|
|
if(cl::profile_queue)
|
|
{
|
|
const pipe::prof profile
|
|
{
|
|
cycle
|
|
};
|
|
|
|
if(opts.debug & 0x10000000U)
|
|
log_debug_prof(opts, frame, profile);
|
|
|
|
profile_accumulate(profile);
|
|
}
|
|
|
|
if(opts.debug & 0x02)
|
|
log_debug(opts, frame);
|
|
|
|
if(opts.debug & 0x20)
|
|
log_debug_labels(opts, frame);
|
|
|
|
if(opts.debug & 0x10)
|
|
log_debug_topn(opts, frame);
|
|
|
|
if(opts.debug & 0x200)
|
|
log_debug_attns_top(opts, frame);
|
|
|
|
dispatch &= boolmask<uint>(ircd::run::level == run::level::RUN);
|
|
dispatch &= boolmask<uint>(!ctx::interruption_requested());
|
|
dispatch &= boolmask<uint>(accept < 0);
|
|
const bool finished
|
|
{
|
|
dispatch == 0
|
|
};
|
|
|
|
return finished;
|
|
}
|
|
|
|
void
|
|
ircd::gpt::samp::profile_accumulate(const pipe::prof &profile)
|
|
{
|
|
for(size_t i(0); i < profile.ts.size(); ++i)
|
|
for(size_t j(0); j < profile.phases; ++j)
|
|
this->profile.ts[i][j] += profile.ts[i][j];
|
|
|
|
step.profile_accumulate(profile);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// ctrl
|
|
//
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug_top(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl &ctrl,
|
|
const uint i)
|
|
{
|
|
thread_local char buf[2][256];
|
|
|
|
assert(opts.top_n > i);
|
|
const auto &top
|
|
{
|
|
ctrl.top[i]
|
|
};
|
|
|
|
return fmt::sprintf
|
|
{
|
|
out, "%s T%02d %s",
|
|
vocab::debug(buf[0], top.token, 1),
|
|
i,
|
|
debug(buf[1], opts, top),
|
|
};
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug_label(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl &ctrl,
|
|
const uint i,
|
|
const uint fmt)
|
|
{
|
|
thread_local char buf[2][256];
|
|
|
|
assert(opts.labels > i);
|
|
const auto &label
|
|
{
|
|
ctrl.label[i]
|
|
};
|
|
|
|
return fmt::sprintf
|
|
{
|
|
out, "%s L%02d %s",
|
|
vocab::debug(buf[0], label.logit.token, 1),
|
|
i,
|
|
debug(buf[1], opts, label, fmt),
|
|
};
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug_attn(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl &ctrl,
|
|
const uint ti)
|
|
{
|
|
thread_local char buf[4][256];
|
|
assert(ti < ctrl.count);
|
|
|
|
memset(buf[1], 0x0, sizeof(buf[1]));
|
|
for(uint i(0); i < opts.layers; ++i)
|
|
{
|
|
const auto f{[&](const auto &a) { return a == ti; }};
|
|
if(std::none_of(ctrl.attn[i], ctrl.attn[i] + opts.attn_rank, f))
|
|
continue;
|
|
|
|
strlcat{buf[1], fmt::sprintf
|
|
{
|
|
buf[2], " %1x[", uint(i)
|
|
}};
|
|
|
|
for(uint j(0); j < opts.attn_rank; ++j)
|
|
if(ctrl.attn[i][j] == ti)
|
|
strlcat{buf[1], fmt::sprintf
|
|
{
|
|
buf[2], "%1x", uint(j)
|
|
}};
|
|
|
|
strlcat{buf[1], "]"_sv};
|
|
}
|
|
|
|
return fmt::sprintf
|
|
{
|
|
out, "%s [%3u] <-%s",
|
|
vocab::debug(buf[0], ctrl.token[ti], 1),
|
|
ti,
|
|
string_view{buf[1]},
|
|
};
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl &ctrl)
|
|
{
|
|
thread_local char
|
|
buf[8][128],
|
|
tmbuf[4][32];
|
|
|
|
int top_idx {-1};
|
|
for(uint i(0); i < opts.top_n; ++i)
|
|
if(ctrl.top[i].token == ctrl.select.logit.token)
|
|
{
|
|
top_idx = i;
|
|
break;
|
|
}
|
|
|
|
return fmt::sprintf
|
|
{
|
|
out, "%s %s %c T%02d %3u %6.2f%% %10.7f$L %c %s %s %s",
|
|
vocab::debug(buf[0], ctrl.select.logit.token, 1),
|
|
debug(buf[1], opts, ctrl.select),
|
|
ctrl.target.logit.token == ctrl.top[0].token? '=' : ' ',
|
|
top_idx,
|
|
ctrl.hit,
|
|
(ctrl.hit / float(ctrl.hit + ctrl.miss)) * 100.0f,
|
|
ctrl.target.loss.mean - ctrl.select.loss.mean,
|
|
ctrl.target.logit.token == ctrl.select.logit.token? '=' : ' ',
|
|
debug(buf[2], opts, ctrl.target),
|
|
vocab::debug(buf[3], ctrl.target.logit.token, 1),
|
|
debug(buf[4], opts, ctrl.prof),
|
|
};
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl_label &label,
|
|
const uint fmt)
|
|
{
|
|
thread_local char buf[64], bar[128];
|
|
|
|
const auto diff
|
|
{
|
|
log2f(65536) - label.loss.mean
|
|
};
|
|
|
|
const auto pct
|
|
{
|
|
(diff / log2f(opts.logits)) * 100.0f
|
|
};
|
|
|
|
const auto barsz
|
|
{
|
|
std::min(uint(pct), std::min(66U, uint(sizeof(bar) - 1)))
|
|
};
|
|
|
|
memset(bar, '|', barsz);
|
|
bar[barsz] = '\0';
|
|
|
|
return fmt::sprintf
|
|
{
|
|
out,
|
|
fmt == 1?
|
|
"%s %10.7f$La %6.2f%% %s":
|
|
"%s %10.7f$La",
|
|
debug(buf, opts, label.logit, fmt),
|
|
label.loss.mean,
|
|
pct,
|
|
string_view{bar},
|
|
};
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl_logit &logit,
|
|
const uint fmt)
|
|
{
|
|
return fmt::sprintf
|
|
{
|
|
out, "%6.2f%% %10.7f$L %4.1f$P",
|
|
logit.samax * 100.0f,
|
|
+0.0f - logf(logit.samax),
|
|
(1.0f - logit.samax) * log2f(opts.logits),
|
|
};
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl_prof &prof)
|
|
{
|
|
thread_local char buf[1][32];
|
|
|
|
const auto kern_cycles
|
|
{
|
|
prof.finished - prof.entered
|
|
};
|
|
|
|
const auto host_cycles
|
|
{
|
|
prof.acquired - prof.released
|
|
};
|
|
|
|
return fmt::sprintf
|
|
{
|
|
out, "%s",
|
|
kern_cycles > 0?
|
|
pretty(buf[0], si(kern_cycles), 1):
|
|
host_cycles > 0?
|
|
pretty(buf[0], si(host_cycles), 1):
|
|
string_view{},
|
|
};
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug_head(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl &ctrl)
|
|
{
|
|
thread_local char head[64];
|
|
|
|
assert(ctrl.count > 0);
|
|
return fmt::sprintf
|
|
{
|
|
out, "%s[%4u]-%1u",
|
|
debug_head(head, opts, ctrl.clk),
|
|
ctrl.count - 1,
|
|
ctrl.dispatch,
|
|
};
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug_head(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl_clk &clk)
|
|
{
|
|
return fmt::sprintf
|
|
{
|
|
out, "%02u:%06u|%04u|%04u|%04u",
|
|
clk.epoch,
|
|
clk.step * opts.batch_size + clk.samp,
|
|
clk.step,
|
|
clk.samp,
|
|
clk.cycle,
|
|
};
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug_token(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl &ctrl,
|
|
const uint fmt)
|
|
{
|
|
assert(ctrl.count > 0);
|
|
const auto pos
|
|
{
|
|
ctrl.count - 1
|
|
};
|
|
|
|
return debug_token_at(out, opts, ctrl, pos, fmt);
|
|
}
|
|
|
|
ircd::string_view
|
|
ircd::gpt::debug_token_at(const mutable_buffer &out,
|
|
const opts &opts,
|
|
const ctrl &ctrl,
|
|
const uint i,
|
|
const uint fmt)
|
|
{
|
|
const auto &token
|
|
{
|
|
ctrl.token[i]
|
|
};
|
|
|
|
return vocab::debug(out, token, fmt);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// opts
|
|
//
|
|
|
|
ircd_gpt_opts::ircd_gpt_opts()
|
|
noexcept
|
|
:seed
|
|
{
|
|
1234567890UL
|
|
}
|
|
,top_k
|
|
{
|
|
16
|
|
}
|
|
,top_p
|
|
{
|
|
0.90f
|
|
}
|
|
,top_n
|
|
{
|
|
0
|
|
}
|
|
,labels
|
|
{
|
|
0
|
|
}
|
|
,frames
|
|
{
|
|
8
|
|
}
|
|
,limit
|
|
{
|
|
-1
|
|
}
|
|
,debug
|
|
{
|
|
0x00
|
|
}
|
|
,accept
|
|
{
|
|
{ 198, 198, ushort(-1), },
|
|
{ 0, 0, 0, ushort(-1), },
|
|
{ ushort(-1), },
|
|
{ ushort(-1), },
|
|
}
|
|
,batch_size
|
|
{
|
|
32
|
|
}
|
|
,training_steps
|
|
{
|
|
250000
|
|
}
|
|
,validation_steps
|
|
{
|
|
5000
|
|
}
|
|
,testing_steps
|
|
{
|
|
5000
|
|
}
|
|
,alpha
|
|
{
|
|
0.00002
|
|
}
|
|
,beta
|
|
{
|
|
0.9f,
|
|
0.999f,
|
|
}
|
|
,epsilon
|
|
{
|
|
0.00001
|
|
}
|
|
,lambda
|
|
{
|
|
0.5
|
|
}
|
|
,logits
|
|
{
|
|
50256
|
|
}
|
|
,buffer_tokens
|
|
{
|
|
1024 - 16 // XXX
|
|
}
|
|
,context_tokens
|
|
{
|
|
512 // 1024
|
|
}
|
|
,layers
|
|
{
|
|
12
|
|
}
|
|
,lanes
|
|
{
|
|
4
|
|
}
|
|
,embed_elems
|
|
{
|
|
768
|
|
}
|
|
,embed_width
|
|
{
|
|
embed_elems / lanes
|
|
}
|
|
,attn_rank
|
|
{
|
|
12
|
|
}
|
|
,attn_mult
|
|
{
|
|
3
|
|
}
|
|
,attn_elems
|
|
{
|
|
embed_elems * attn_mult
|
|
}
|
|
,attn_fcon_width
|
|
{
|
|
attn_elems / lanes
|
|
}
|
|
,attn_fcon_height
|
|
{
|
|
embed_elems / lanes
|
|
}
|
|
,attn_proj_width
|
|
{
|
|
embed_elems / lanes
|
|
}
|
|
,attn_proj_height
|
|
{
|
|
embed_elems / lanes
|
|
}
|
|
,attn_self_elems
|
|
{
|
|
(uint(powl(context_tokens, 2)) / 2) * attn_rank
|
|
}
|
|
,ffnn_mult
|
|
{
|
|
4
|
|
}
|
|
,ffnn_elems
|
|
{
|
|
embed_elems * ffnn_mult
|
|
}
|
|
,ffnn_fcon_width
|
|
{
|
|
ffnn_elems / lanes
|
|
}
|
|
,ffnn_fcon_height
|
|
{
|
|
embed_elems / lanes
|
|
}
|
|
,ffnn_proj_width
|
|
{
|
|
embed_elems / lanes
|
|
}
|
|
,ffnn_proj_height
|
|
{
|
|
ffnn_elems / lanes
|
|
}
|
|
{
|
|
}
|