0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-11-26 08:42:34 +01:00
construct/ircd/gpt_pipe.cc

1322 lines
29 KiB
C++
Raw Normal View History

2021-03-30 03:22:42 +02:00
// Tensor Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2021 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
namespace ircd::gpt::pipe
{
static void handle_quit() noexcept;
2021-04-02 22:01:38 +02:00
extern const ircd::run::changed quit_handler;
2021-03-30 03:22:42 +02:00
}
decltype(ircd::gpt::pipe::default_code)
ircd::gpt::pipe::default_code;
2022-10-07 00:05:52 +02:00
[[gnu::visibility("hidden")]]
decltype(ircd::gpt::pipe::quit_handler)
ircd::gpt::pipe::quit_handler
2021-03-30 03:22:42 +02:00
{
run::level::QUIT, handle_quit
2021-03-30 03:22:42 +02:00
};
[[gnu::cold]]
2021-03-30 03:22:42 +02:00
void
ircd::gpt::pipe::handle_quit()
2021-03-30 03:22:42 +02:00
noexcept
{
if constexpr(!IRCD_USE_OPENCL)
return;
const auto pending
{
cl::work::list.size()
};
2021-03-30 03:22:42 +02:00
if(pending)
log::warning
{
log, "Waiting for %zu pending tasks to leave the pipe...",
pending,
};
cl::sync();
2022-06-20 03:59:29 +02:00
ctx::yield();
pipe::default_code.reset();
2021-03-30 03:22:42 +02:00
}
//
2022-06-20 03:59:29 +02:00
// pipe::prof
2021-03-30 03:22:42 +02:00
//
2022-06-20 03:59:29 +02:00
ircd::string_view
ircd::gpt::pipe::debug(const mutable_buffer &buf,
const prof &p)
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
window_buffer window(buf);
for(uint i(0); i < p.stages; ++i)
window([&p, &i](auto buf)
{
size_t ret(0);
ret += consume(buf, size(debug(buf, p, i)));
ret += consume(buf, copy(buf, '\n'));
return ret;
});
2021-03-30 03:22:42 +02:00
2022-06-20 03:59:29 +02:00
return window.completed();
}
ircd::string_view
ircd::gpt::pipe::debug(const mutable_buffer &buf,
const prof &p,
const size_t &i)
{
using phase = prof::phase;
assert(i < p.info.size());
assert(i < p.ts.size());
2021-03-30 03:22:42 +02:00
char tbuf[5][32];
2022-06-20 03:59:29 +02:00
return fmt::sprintf
2021-04-02 22:01:38 +02:00
{
buf, "%-20s %04x [ %10s %10s %10s %10s %10s ]",
2022-06-20 03:59:29 +02:00
std::get<0>(p.info[i]),
std::get<1>(p.info[i]),
pretty(tbuf[0], p.ts[i][phase::QUEUE], 1),
pretty(tbuf[1], p.ts[i][phase::SUBMIT], 1),
pretty(tbuf[2], p.ts[i][phase::START], 1),
pretty(tbuf[3], p.ts[i][phase::END], 1),
pretty(tbuf[4], p.ts[i][phase::COMPLETE], 1),
2021-04-02 22:01:38 +02:00
};
2022-06-20 03:59:29 +02:00
}
2021-04-02 22:01:38 +02:00
2022-06-20 03:59:29 +02:00
//
// prof::prof
//
decltype(ircd::gpt::pipe::prof::info)
ircd::gpt::pipe::prof::info;
2022-06-20 03:59:29 +02:00
decltype(ircd::gpt::pipe::prof::name)
ircd::gpt::pipe::prof::name;
2021-04-02 22:01:38 +02:00
2022-06-20 03:59:29 +02:00
[[gnu::visibility("hidden")]]
decltype(ircd::gpt::pipe::prof::init)
ircd::gpt::pipe::prof::init;
ircd::gpt::pipe::prof::prof()
noexcept
{
for(uint i(0); i < stages; ++i)
for(uint j(0); j < phases; ++j)
ts[i][j] = 0ns;
}
ircd::gpt::pipe::prof::prof(const cycle &c)
{
if(!std::exchange(init, true))
init_info(c);
if(!cl::profile_queue)
return;
for(uint i(0); i < stages; ++i)
2021-04-02 22:01:38 +02:00
{
2022-06-20 03:59:29 +02:00
const cl::work::prof p
2021-04-02 22:01:38 +02:00
{
2022-06-20 03:59:29 +02:00
c.stage[i]
};
2021-04-02 22:01:38 +02:00
ts[i][phase::QUEUE] = p[phase::SUBMIT] > p[phase::QUEUE]?
p[phase::SUBMIT] - p[phase::QUEUE]: 0ns;
ts[i][phase::SUBMIT] = p[phase::START] > p[phase::SUBMIT]?
p[phase::START] - p[phase::SUBMIT]: 0ns;
ts[i][phase::START] = p[phase::END] > p[phase::START]?
p[phase::END] - p[phase::START]: 0ns;
ts[i][phase::END] = p[phase::END] > p[phase::QUEUE]?
p[phase::END] - p[phase::QUEUE]: 0ns;
ts[i][phase::COMPLETE] = p[phase::COMPLETE] > p[phase::QUEUE]?
p[phase::COMPLETE] - p[phase::QUEUE]: 0ns;
2021-04-02 22:01:38 +02:00
}
2022-06-20 03:59:29 +02:00
}
2021-04-02 22:01:38 +02:00
2022-06-20 03:59:29 +02:00
[[gnu::visibility("hidden")]]
void
ircd::gpt::pipe::prof::init_info(const cycle &c)
{
static_assert
(
name.size() >= stages
);
2021-04-02 22:01:38 +02:00
2022-06-20 03:59:29 +02:00
for(uint i(0); i < stages; ++i)
info[i] = info_type
{
c.stage[i].name(name[i]),
c.stage[i].type(),
};
2021-04-02 22:01:38 +02:00
}
2022-06-20 03:59:29 +02:00
///////////////////////////////////////////////////////////////////////////////
//
// pipe::cycle
//
const ircd::gpt::ctrl &
ircd::gpt::pipe::acquire(cycle &cycle)
2021-04-02 22:01:38 +02:00
{
2022-06-20 03:59:29 +02:00
// Some tail stages may not be active each cycle
const auto last_exec
2021-04-02 22:01:38 +02:00
{
2022-06-20 03:59:29 +02:00
std::find_if(std::rbegin(cycle.stage), std::rend(cycle.stage), []
(const auto &work)
{
return work.handle;
})
2021-04-02 22:01:38 +02:00
};
2022-06-20 03:59:29 +02:00
assert(last_exec != std::rend(cycle.stage));
// Block here for results; the ircd::ctx will yield.
2022-06-20 03:59:29 +02:00
last_exec->wait();
// Get the pointer to the output buffer.
2022-06-20 03:59:29 +02:00
const auto ctrl
2021-04-02 22:01:38 +02:00
{
2022-06-20 03:59:29 +02:00
reinterpret_cast<const gpt::ctrl *>(cycle.desc.frame[cycle.frame].ptr())
};
2021-04-02 22:01:38 +02:00
// Check the output is a valid control page and return to user.
2022-06-20 03:59:29 +02:00
assert(ctrl);
assert(ctrl->magic != 0xDEADBEEF);
assert(ctrl->magic == 0xC7012C70UL);
return *ctrl;
2021-03-30 03:22:42 +02:00
}
//
2022-06-20 03:59:29 +02:00
// pipe::cycle::cycle
2021-03-30 03:22:42 +02:00
//
2022-06-20 03:59:29 +02:00
ircd::gpt::pipe::cycle::cycle(gpt::samp &samp)
2021-03-30 03:22:42 +02:00
:desc
{
2022-06-20 03:59:29 +02:00
samp.desc
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,tick
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
samp.cycle
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,count
{
2022-06-20 03:59:29 +02:00
samp.count
}
2022-06-20 03:59:29 +02:00
,tokens
{
2022-06-20 03:59:29 +02:00
samp.tokens
}
2022-06-20 03:59:29 +02:00
,cached
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
desc.cached
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,frame
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
tick % samp.opts.frames
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,range
2021-03-30 03:22:42 +02:00
{
samp.opts,
2022-06-20 03:59:29 +02:00
tick,
count,
tokens,
cached,
true,
((false) && gpt::model::cache_shared)
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,stage
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
cl::exec // data
{
desc.opts, std::memory_order_release
},
cl::exec // data
{
desc.ctrl, std::memory_order_release
},
cl::exec // data
{
desc.frame[frame], std::memory_order_release
},
cl::exec // data
{
desc.model->decode->master[0], std::memory_order_release
},
cl::exec // Initial kernel
{
desc.alloc, range.alloc,
},
cl::exec // Initial cycle kernel
{
desc.enter, range.select,
2022-06-20 03:59:29 +02:00
},
cl::exec // Compute token and positional embeddings.
{
desc.lm_embed, range.embed,
},
// Forward Pass
cl::exec { desc.layer[0x00]->attn, range.attn },
cl::exec { desc.layer[0x00]->ffnn, range.ffnn },
cl::exec { desc.layer[0x01]->attn, range.attn },
cl::exec { desc.layer[0x01]->ffnn, range.ffnn },
cl::exec { desc.layer[0x02]->attn, range.attn },
cl::exec { desc.layer[0x02]->ffnn, range.ffnn },
cl::exec { desc.layer[0x03]->attn, range.attn },
cl::exec { desc.layer[0x03]->ffnn, range.ffnn },
cl::exec { desc.layer[0x04]->attn, range.attn },
cl::exec { desc.layer[0x04]->ffnn, range.ffnn },
cl::exec { desc.layer[0x05]->attn, range.attn },
cl::exec { desc.layer[0x05]->ffnn, range.ffnn },
cl::exec { desc.layer[0x06]->attn, range.attn },
cl::exec { desc.layer[0x06]->ffnn, range.ffnn },
cl::exec { desc.layer[0x07]->attn, range.attn },
cl::exec { desc.layer[0x07]->ffnn, range.ffnn },
cl::exec { desc.layer[0x08]->attn, range.attn },
cl::exec { desc.layer[0x08]->ffnn, range.ffnn },
cl::exec { desc.layer[0x09]->attn, range.attn },
cl::exec { desc.layer[0x09]->ffnn, range.ffnn },
cl::exec { desc.layer[0x0a]->attn, range.attn },
cl::exec { desc.layer[0x0a]->ffnn, range.ffnn },
cl::exec { desc.layer[0x0b]->attn, range.attn },
cl::exec { desc.layer[0x0b]->ffnn, range.fffnn },
cl::exec // Final normalization.
{
desc.lm_norm, range.fnorm
},
cl::exec // Compute language logits.
{
desc.lm_logit, range.logit
},
cl::exec // Statistics on the logits.
{
desc.lm_logsm, range.logsm
},
cl::exec // Select next token.
{
desc.lm_select, range.select
},
cl::exec // Backpropagate
{
desc.lm_prop_embed, range.prop_embed
},
cl::exec // Backpropagate
{
desc.lm_prop_norm, range.prop_norm
},
// Backward Pass
cl::exec { desc.layer[0x0b]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x0b]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x0a]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x0a]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x09]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x09]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x08]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x08]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x07]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x07]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x06]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x06]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x05]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x05]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x04]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x04]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x03]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x03]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x02]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x02]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x01]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x01]->prop_attn, range.prop_attn },
cl::exec { desc.layer[0x00]->prop_ffnn, range.prop_ffnn },
cl::exec { desc.layer[0x00]->prop_attn, range.prop_attn },
cl::exec // Final kernel
{
desc.leave[frame], range.select
},
cl::exec // Frame out
{
desc.frame[frame], std::memory_order_consume
},
2021-03-30 03:22:42 +02:00
}
{
}
2022-06-20 03:59:29 +02:00
ircd::gpt::pipe::cycle::~cycle()
noexcept
2021-03-30 03:22:42 +02:00
{
}
2022-06-20 03:59:29 +02:00
//////////////////////////////////////////////////////////////////////////////
//
// pipe::range
//
ircd::gpt::pipe::range::range(const opts &opts,
const uint tick,
2022-06-20 03:59:29 +02:00
const uint count,
const uint tokens,
const uint cached,
const bool fwd,
const bool rev)
noexcept
:_full
{
{ opts.embed_width * (tokens - cached) },
{ opts.embed_width },
{ opts.embed_width * cached },
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,_last
2021-03-30 03:22:42 +02:00
{
{ opts.embed_width * 1 },
{ opts.embed_width },
{ opts.embed_width * (count - 1) },
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,alloc
2021-03-30 03:22:42 +02:00
{
{ opts.embed_width * (tick == 0) },
{ opts.embed_width },
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,embed
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
fwd?
_full:
cl::kern::range{},
2021-04-02 22:01:38 +02:00
}
2022-06-20 03:59:29 +02:00
,attn
{
2022-06-20 03:59:29 +02:00
fwd?
_full:
cl::kern::range{},
}
2022-06-20 03:59:29 +02:00
,ffnn
{
2022-06-20 03:59:29 +02:00
fwd?
_full:
cl::kern::range{},
}
2022-06-20 03:59:29 +02:00
,fffnn
2021-04-02 22:01:38 +02:00
{
2022-06-20 03:59:29 +02:00
fwd && tokens > count?
_full:
fwd?
_last:
cl::kern::range{},
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,fnorm
2021-04-02 22:01:38 +02:00
{
2022-06-20 03:59:29 +02:00
fwd?
_last:
cl::kern::range{},
2021-04-02 22:01:38 +02:00
}
,logit
2021-04-02 22:01:38 +02:00
{
{ pad_to(opts.logits, 64L) * int(fwd) },
{ 64L },
2021-04-02 22:01:38 +02:00
}
2022-06-20 03:59:29 +02:00
,logsm
{
{ 256UL * int(fwd) },
{ 256UL },
}
2022-06-20 03:59:29 +02:00
,select
2021-03-30 03:22:42 +02:00
{
{ 256UL * int(fwd) },
{ 256UL },
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,prop_embed
2021-03-30 03:22:42 +02:00
{
{ opts.embed_width * int(rev) },
{ opts.embed_width },
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,prop_norm
2021-03-30 03:22:42 +02:00
{
{ opts.embed_width * int(rev) },
{ opts.embed_width },
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,prop_attn
2021-03-30 03:22:42 +02:00
{
{ opts.embed_width * int(rev) },
{ opts.embed_width },
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,prop_ffnn
2021-03-30 03:22:42 +02:00
{
{ opts.embed_width * int(rev) },
{ opts.embed_width },
2021-03-30 03:22:42 +02:00
}
{
}
2022-06-20 03:59:29 +02:00
///////////////////////////////////////////////////////////////////////////////
2021-03-30 03:22:42 +02:00
//
// pipe::desc
//
2022-06-20 03:59:29 +02:00
ircd::gpt::pipe::desc::desc(const gpt::opts *const &opt,
gpt::ctrl *const &ctrl_,
pipe::model &model,
pipe::code &code)
2021-03-30 03:22:42 +02:00
:model
{
&model
}
,code
{
&code
}
2022-06-20 03:59:29 +02:00
,opts
{
2022-06-20 03:59:29 +02:00
const_buffer
{
reinterpret_cast<const char *>(opt),
sizeof(gpt::opts)
},
}
,ctrl
{
mutable_buffer
2022-06-20 03:59:29 +02:00
{
reinterpret_cast<char *>(ctrl_),
2022-06-20 03:59:29 +02:00
sizeof(gpt::ctrl)
},
}
,master
{
0
2022-06-20 03:59:29 +02:00
+ opt->layers * opt->context_tokens * opt->attn_elems * sizeof(float)
+ opt->context_tokens * opt->embed_elems * sizeof(float)
+ 65536 * sizeof(float)
2022-06-20 03:59:29 +02:00
+ opt->layers * opt->attn_self_elems * sizeof(float)
}
2022-06-20 03:59:29 +02:00
,state
2021-03-30 03:22:42 +02:00
{
master,
{
2022-06-20 03:59:29 +02:00
opt->layers * opt->context_tokens * opt->attn_elems * sizeof(float),
off_t(0),
2022-06-20 03:59:29 +02:00
}
}
,accum
{
master,
{
opt->context_tokens * opt->embed_elems * sizeof(float),
state.offset() + off_t(state.size()),
},
2021-03-30 03:22:42 +02:00
}
2021-04-02 22:01:38 +02:00
,logit
2021-03-30 03:22:42 +02:00
{
master,
{
65536 * sizeof(float),
accum.offset() + off_t(accum.size()),
},
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,attns
{
master,
{
2022-06-20 03:59:29 +02:00
opt->layers * opt->attn_self_elems * sizeof(float),
logit.offset() + off_t(logit.size())
}
}
2022-06-20 03:59:29 +02:00
,frame
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
// size, read, write, }, // idx
{ sizeof(gpt::ctrl), true, false, }, // 0
{ sizeof(gpt::ctrl), true, false, }, // 1
{ sizeof(gpt::ctrl), true, false, }, // 2
{ sizeof(gpt::ctrl), true, false, }, // 3
{ sizeof(gpt::ctrl), true, false, }, // 4
{ sizeof(gpt::ctrl), true, false, }, // 5
{ sizeof(gpt::ctrl), true, false, }, // 6
{ sizeof(gpt::ctrl), true, false, }, // 7
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,alloc
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
code,
"ircd_gpt_alloc",
model.decode->master[0],
master,
opts,
ctrl,
frame[0],
frame[1],
frame[2],
frame[3],
frame[4],
frame[5],
frame[6],
frame[7],
}
,enter
{
code,
"ircd_gpt_enter",
model.decode->master[0],
state,
master,
opts,
ctrl,
2021-03-30 03:22:42 +02:00
}
2021-04-02 22:01:38 +02:00
,lm_embed
2021-03-30 03:22:42 +02:00
{
code,
2021-04-02 22:01:38 +02:00
"ircd_gpt_lm_embed",
2021-03-30 03:22:42 +02:00
ctrl,
opts,
accum,
2022-06-20 03:59:29 +02:00
model.decode->embed.pos.param,
model.decode->embed.token.param,
2021-03-30 03:22:42 +02:00
}
2021-04-02 22:01:38 +02:00
,lm_norm
2021-03-30 03:22:42 +02:00
{
code,
2021-04-02 22:01:38 +02:00
"ircd_gpt_lm_norm",
2021-03-30 03:22:42 +02:00
ctrl,
opts,
accum,
2022-06-20 03:59:29 +02:00
model.decode->embed.norm.bias.param,
model.decode->embed.norm.weight.param,
2021-03-30 03:22:42 +02:00
}
2021-04-02 22:01:38 +02:00
,lm_logit
2021-03-30 03:22:42 +02:00
{
code,
2021-04-02 22:01:38 +02:00
"ircd_gpt_lm_logit",
2021-03-30 03:22:42 +02:00
ctrl,
opts,
logit,
accum,
2022-06-20 03:59:29 +02:00
model.decode->embed.pos.param,
model.decode->embed.token.param,
2021-03-30 03:22:42 +02:00
}
,lm_logsm
{
code,
"ircd_gpt_lm_logsm",
ctrl,
opts,
logit,
}
2021-04-02 22:01:38 +02:00
,lm_select
2021-03-30 03:22:42 +02:00
{
code,
2021-04-02 22:01:38 +02:00
"ircd_gpt_lm_select",
2021-03-30 03:22:42 +02:00
ctrl,
opts,
logit,
2022-06-20 03:59:29 +02:00
attns,
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,lm_prop_embed
2021-04-17 20:59:30 +02:00
{
code,
2022-06-20 03:59:29 +02:00
"ircd_gpt_lm_embed_prop",
2021-04-17 20:59:30 +02:00
ctrl,
opts,
2022-06-20 03:59:29 +02:00
model.decode->embed.pos.param,
model.decode->embed.pos.moment[0],
model.decode->embed.pos.moment[1],
model.decode->embed.token.param,
model.decode->embed.token.moment[0],
model.decode->embed.token.moment[1],
2021-04-17 20:59:30 +02:00
}
2022-06-20 03:59:29 +02:00
,lm_prop_norm
2021-04-17 20:59:30 +02:00
{
code,
2022-06-20 03:59:29 +02:00
"ircd_gpt_norm_prop",
2021-04-17 20:59:30 +02:00
ctrl,
opts,
2022-06-20 03:59:29 +02:00
model.decode->embed.norm.bias.param,
model.decode->embed.norm.bias.moment[0],
model.decode->embed.norm.bias.moment[1],
model.decode->embed.norm.weight.param,
model.decode->embed.norm.weight.moment[0],
model.decode->embed.norm.weight.moment[1],
}
,leave
{
{
code,
"ircd_gpt_leave",
model.decode->master[0],
state,
master,
opts,
ctrl,
frame[0],
},
{
code,
"ircd_gpt_leave",
model.decode->master[0],
state,
master,
opts,
ctrl,
frame[1],
},
{
code,
"ircd_gpt_leave",
model.decode->master[0],
state,
master,
opts,
ctrl,
frame[2],
},
{
code,
"ircd_gpt_leave",
model.decode->master[0],
state,
master,
opts,
ctrl,
frame[3],
},
{
code,
"ircd_gpt_leave",
model.decode->master[0],
state,
master,
opts,
ctrl,
frame[4],
},
{
code,
"ircd_gpt_leave",
model.decode->master[0],
state,
master,
opts,
ctrl,
frame[5],
},
{
code,
"ircd_gpt_leave",
model.decode->master[0],
state,
master,
opts,
ctrl,
frame[6],
},
{
code,
"ircd_gpt_leave",
model.decode->master[0],
state,
master,
opts,
ctrl,
frame[7],
},
2021-04-17 20:59:30 +02:00
}
2021-04-02 22:01:38 +02:00
,layer
{
2022-06-20 03:59:29 +02:00
std::make_unique<struct desc::layer>(*this, opt, 0x00),
std::make_unique<struct desc::layer>(*this, opt, 0x01),
std::make_unique<struct desc::layer>(*this, opt, 0x02),
std::make_unique<struct desc::layer>(*this, opt, 0x03),
std::make_unique<struct desc::layer>(*this, opt, 0x04),
std::make_unique<struct desc::layer>(*this, opt, 0x05),
std::make_unique<struct desc::layer>(*this, opt, 0x06),
std::make_unique<struct desc::layer>(*this, opt, 0x07),
std::make_unique<struct desc::layer>(*this, opt, 0x08),
std::make_unique<struct desc::layer>(*this, opt, 0x09),
std::make_unique<struct desc::layer>(*this, opt, 0x0a),
std::make_unique<struct desc::layer>(*this, opt, 0x0b),
2021-04-02 22:01:38 +02:00
}
2021-03-30 03:22:42 +02:00
{
}
//
// pipe::desc::layer
//
ircd::gpt::pipe::desc::layer::layer(pipe::desc &desc,
2022-06-20 03:59:29 +02:00
const gpt::opts *const &opts,
const uint laynum)
:state
{
desc.state,
{
2022-06-20 03:59:29 +02:00
opts->context_tokens * opts->attn_elems * sizeof(float),
laynum * opts->context_tokens * opts->attn_elems * sizeof(float),
}
}
2022-06-20 03:59:29 +02:00
,attns
{
desc.attns,
{
opts->attn_self_elems * sizeof(float),
laynum * opts->attn_self_elems * sizeof(float),
}
}
,attn
2021-03-30 03:22:42 +02:00
{
*desc.code,
2021-04-02 22:01:38 +02:00
"ircd_gpt_attn_fcon",
2021-03-30 03:22:42 +02:00
desc.ctrl,
desc.opts,
2022-06-20 03:59:29 +02:00
laynum,
state,
2021-03-30 03:22:42 +02:00
desc.accum,
2022-06-20 03:59:29 +02:00
desc.model->decode->layer[laynum].attn.norm.bias.param,
desc.model->decode->layer[laynum].attn.norm.weight.param,
desc.model->decode->layer[laynum].attn.fcon.bias.param,
desc.model->decode->layer[laynum].attn.fcon.weight.param,
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,ffnn
2021-03-30 03:22:42 +02:00
{
*desc.code,
2021-04-02 22:01:38 +02:00
"ircd_gpt_coil",
2021-03-30 03:22:42 +02:00
desc.ctrl,
desc.opts,
2022-06-20 03:59:29 +02:00
laynum,
2021-03-30 03:22:42 +02:00
desc.accum,
2022-06-20 03:59:29 +02:00
attns,
state,
2022-06-20 03:59:29 +02:00
desc.model->decode->layer[laynum].attn.proj.bias.param,
desc.model->decode->layer[laynum].attn.proj.weight.param,
desc.model->decode->layer[laynum].ffnn.norm.bias.param,
desc.model->decode->layer[laynum].ffnn.norm.weight.param,
desc.model->decode->layer[laynum].ffnn.fcon.bias.param,
desc.model->decode->layer[laynum].ffnn.fcon.weight.param,
desc.model->decode->layer[laynum].ffnn.proj.bias.param,
desc.model->decode->layer[laynum].ffnn.proj.weight.param,
2021-04-17 20:59:30 +02:00
}
2022-06-20 03:59:29 +02:00
,prop_attn
2021-04-17 20:59:30 +02:00
{
*desc.code,
"ircd_gpt_coil_prop_attn",
desc.ctrl,
desc.opts,
2022-06-20 03:59:29 +02:00
desc.model->decode->layer[laynum].attn.norm.bias.param,
desc.model->decode->layer[laynum].attn.norm.bias.moment[0],
desc.model->decode->layer[laynum].attn.norm.bias.moment[1],
desc.model->decode->layer[laynum].attn.norm.weight.param,
desc.model->decode->layer[laynum].attn.norm.weight.moment[0],
desc.model->decode->layer[laynum].attn.norm.weight.moment[1],
desc.model->decode->layer[laynum].attn.fcon.bias.param,
desc.model->decode->layer[laynum].attn.fcon.bias.moment[0],
desc.model->decode->layer[laynum].attn.fcon.bias.moment[1],
desc.model->decode->layer[laynum].attn.fcon.weight.param,
desc.model->decode->layer[laynum].attn.fcon.weight.moment[0],
desc.model->decode->layer[laynum].attn.fcon.weight.moment[1],
desc.model->decode->layer[laynum].attn.proj.bias.param,
desc.model->decode->layer[laynum].attn.proj.bias.moment[0],
desc.model->decode->layer[laynum].attn.proj.bias.moment[1],
desc.model->decode->layer[laynum].attn.proj.weight.param,
desc.model->decode->layer[laynum].attn.proj.weight.moment[0],
desc.model->decode->layer[laynum].attn.proj.weight.moment[1],
}
,prop_ffnn
2021-04-17 20:59:30 +02:00
{
*desc.code,
"ircd_gpt_coil_prop_ffnn",
desc.ctrl,
desc.opts,
2022-06-20 03:59:29 +02:00
desc.model->decode->layer[laynum].ffnn.norm.bias.param,
desc.model->decode->layer[laynum].ffnn.norm.bias.moment[0],
desc.model->decode->layer[laynum].ffnn.norm.bias.moment[1],
desc.model->decode->layer[laynum].ffnn.norm.weight.param,
desc.model->decode->layer[laynum].ffnn.norm.weight.moment[0],
desc.model->decode->layer[laynum].ffnn.norm.weight.moment[1],
desc.model->decode->layer[laynum].ffnn.fcon.bias.param,
desc.model->decode->layer[laynum].ffnn.fcon.bias.moment[0],
desc.model->decode->layer[laynum].ffnn.fcon.bias.moment[1],
desc.model->decode->layer[laynum].ffnn.fcon.weight.param,
desc.model->decode->layer[laynum].ffnn.fcon.weight.moment[0],
desc.model->decode->layer[laynum].ffnn.fcon.weight.moment[1],
desc.model->decode->layer[laynum].ffnn.proj.bias.param,
desc.model->decode->layer[laynum].ffnn.proj.bias.moment[0],
desc.model->decode->layer[laynum].ffnn.proj.bias.moment[1],
desc.model->decode->layer[laynum].ffnn.proj.weight.param,
desc.model->decode->layer[laynum].ffnn.proj.weight.moment[0],
desc.model->decode->layer[laynum].ffnn.proj.weight.moment[1],
2021-03-30 03:22:42 +02:00
}
{
}
///////////////////////////////////////////////////////////////////////////////
//
// model
//
//
// pipe::model::model
//
2022-06-20 03:59:29 +02:00
ircd::gpt::pipe::model::model(gpt::model::decoder &decoder)
:decode_const
{
2022-06-20 03:59:29 +02:00
std::addressof(decoder)
}
2022-06-20 03:59:29 +02:00
,decode_mutable
{
2022-06-20 03:59:29 +02:00
std::addressof(decoder)
}
,decode
{
std::make_unique<model::decoder>(decoder)
}
{
}
2022-06-20 03:59:29 +02:00
ircd::gpt::pipe::model::model(const gpt::model::decoder &decoder)
:decode_const
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
std::addressof(decoder)
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,decode
2021-03-30 03:22:42 +02:00
{
2022-06-20 03:59:29 +02:00
std::make_unique<model::decoder>(decoder)
2021-03-30 03:22:42 +02:00
}
{
}
ircd::gpt::pipe::model::~model()
noexcept
{
}
//
// pipe::model::decoder
//
ircd::gpt::pipe::model::decoder::decoder(gpt::model::decoder &decoder)
:master
{
// params
{
2022-06-20 03:59:29 +02:00
mutable_buffer
{
2022-06-20 03:59:29 +02:00
reinterpret_cast<char *>(&decoder) + sizeof(gpt::model::decoder) * 0,
sizeof(gpt::model::decoder)
}
},
// first moment
{
2022-06-20 03:59:29 +02:00
mutable_buffer
{
reinterpret_cast<char *>(&decoder) + sizeof(gpt::model::decoder) * 1,
sizeof(gpt::model::decoder)
}
},
// second moment
{
2022-06-20 03:59:29 +02:00
mutable_buffer
{
reinterpret_cast<char *>(&decoder) + sizeof(gpt::model::decoder) * 2,
sizeof(gpt::model::decoder)
}
},
}
2022-06-20 03:59:29 +02:00
,layer
{
{ master, sizeof(gpt::model::block) * 0x00, decoder.layer[0x00], 0x00, },
{ master, sizeof(gpt::model::block) * 0x01, decoder.layer[0x01], 0x01, },
{ master, sizeof(gpt::model::block) * 0x02, decoder.layer[0x02], 0x02, },
{ master, sizeof(gpt::model::block) * 0x03, decoder.layer[0x03], 0x03, },
{ master, sizeof(gpt::model::block) * 0x04, decoder.layer[0x04], 0x04, },
{ master, sizeof(gpt::model::block) * 0x05, decoder.layer[0x05], 0x05, },
{ master, sizeof(gpt::model::block) * 0x06, decoder.layer[0x06], 0x06, },
{ master, sizeof(gpt::model::block) * 0x07, decoder.layer[0x07], 0x07, },
{ master, sizeof(gpt::model::block) * 0x08, decoder.layer[0x08], 0x08, },
{ master, sizeof(gpt::model::block) * 0x09, decoder.layer[0x09], 0x09, },
{ master, sizeof(gpt::model::block) * 0x0a, decoder.layer[0x0a], 0x0a, },
{ master, sizeof(gpt::model::block) * 0x0b, decoder.layer[0x0b], 0x0b, },
}
2022-06-20 03:59:29 +02:00
,embed
{
master,
2022-06-20 03:59:29 +02:00
off_t(offsetof(gpt::model::decoder, embed)),
decoder.embed,
}
{
}
2021-03-30 03:22:42 +02:00
ircd::gpt::pipe::model::decoder::decoder(const gpt::model::decoder &decoder)
2021-04-02 22:01:38 +02:00
:master
{
// params
2021-04-02 22:01:38 +02:00
{
2022-06-20 03:59:29 +02:00
const_buffer
{
2022-06-20 03:59:29 +02:00
reinterpret_cast<const char *>(&decoder),
sizeof(gpt::model::decoder)
}
},
2021-04-02 22:01:38 +02:00
}
2022-06-20 03:59:29 +02:00
,layer
2021-04-02 22:01:38 +02:00
{
2022-06-20 03:59:29 +02:00
{ master, off_t(offsetof(gpt::model::decoder, layer[0x00])), decoder.layer[0x00], 0x00, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x01])), decoder.layer[0x01], 0x01, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x02])), decoder.layer[0x02], 0x02, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x03])), decoder.layer[0x03], 0x03, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x04])), decoder.layer[0x04], 0x04, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x05])), decoder.layer[0x05], 0x05, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x06])), decoder.layer[0x06], 0x06, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x07])), decoder.layer[0x07], 0x07, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x08])), decoder.layer[0x08], 0x08, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x09])), decoder.layer[0x09], 0x09, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x0a])), decoder.layer[0x0a], 0x0a, },
{ master, off_t(offsetof(gpt::model::decoder, layer[0x0b])), decoder.layer[0x0b], 0x0b, },
2021-03-30 03:22:42 +02:00
}
2022-06-20 03:59:29 +02:00
,embed
2021-03-30 03:22:42 +02:00
{
2021-04-02 22:01:38 +02:00
master,
2022-06-20 03:59:29 +02:00
off_t(offsetof(gpt::model::decoder, embed)),
decoder.embed,
2021-03-30 03:22:42 +02:00
}
{
}
ircd::gpt::pipe::model::decoder::~decoder()
noexcept
{
}
//
2022-06-20 03:59:29 +02:00
// pipe::model::embed
//
2022-06-20 03:59:29 +02:00
ircd::gpt::pipe::model::embed::embed(cl::data *const master,
const off_t offset,
gpt::model::embed &embed)
:norm
{
2022-06-20 03:59:29 +02:00
master,
offset + off_t(offsetof(gpt::model::embed, norm)) + off_t(offsetof(gpt::model::norm, bias)),
mutable_buffer{embed.norm.bias.elem},
offset + off_t(offsetof(gpt::model::embed, norm)) + off_t(offsetof(gpt::model::norm, weight)),
mutable_buffer{embed.norm.weight.elem},
}
,pos
{
2022-06-20 03:59:29 +02:00
master,
offset + off_t(offsetof(gpt::model::embed, pos)),
mutable_buffer{embed.pos}
}
,token
{
2022-06-20 03:59:29 +02:00
master,
offset + off_t(offsetof(gpt::model::embed, token)),
mutable_buffer{embed.token}
}
{
}
2022-06-20 03:59:29 +02:00
ircd::gpt::pipe::model::embed::embed(cl::data *const master,
const off_t offset,
const gpt::model::embed &embed)
:norm
{
2022-06-20 03:59:29 +02:00
master,
offset + off_t(offsetof(gpt::model::embed, norm)) + off_t(offsetof(gpt::model::norm, bias)),
const_buffer{embed.norm.bias.elem},
offset + off_t(offsetof(gpt::model::embed, norm)) + off_t(offsetof(gpt::model::norm, weight)),
const_buffer{embed.norm.weight.elem},
}
,pos
{
2022-06-20 03:59:29 +02:00
master,
offset + off_t(offsetof(gpt::model::embed, pos)),
const_buffer{embed.pos}
}
,token
{
2022-06-20 03:59:29 +02:00
master,
offset + off_t(offsetof(gpt::model::embed, token)),
const_buffer{embed.token}
}
{
}
2021-03-30 03:22:42 +02:00
//
// pipe::model::block
//
ircd::gpt::pipe::model::block::block(cl::data *const master,
const off_t offset,
gpt::model::block &block,
const size_t layer)
:attn
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::block, attn)),
block.attn,
}
,ffnn
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::block, ffnn)),
block.ffnn,
}
{
}
ircd::gpt::pipe::model::block::block(cl::data *const master,
2021-04-02 22:01:38 +02:00
const off_t offset,
const gpt::model::block &block,
const size_t layer)
:attn
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::block, attn)),
2021-04-02 22:01:38 +02:00
block.attn,
}
,ffnn
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::block, ffnn)),
2021-04-02 22:01:38 +02:00
block.ffnn,
}
{
}
2021-03-30 03:22:42 +02:00
//
// pipe::model::ffnn
//
ircd::gpt::pipe::model::ffnn::ffnn(cl::data *const master,
const off_t offset,
gpt::model::ffnn &ffnn)
:norm
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::ffnn, norm)) + off_t(offsetof(gpt::model::norm, bias)),
mutable_buffer{ffnn.norm.bias.elem},
offset + off_t(offsetof(gpt::model::ffnn, norm)) + off_t(offsetof(gpt::model::norm, weight)),
mutable_buffer{ffnn.norm.weight.elem},
}
,fcon
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::ffnn, fcon_bias)),
mutable_buffer{ffnn.fcon_bias.fcon},
offset + off_t(offsetof(gpt::model::ffnn, fcon_weight)),
mutable_buffer{ffnn.fcon_weight},
}
,proj
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::ffnn, proj_bias)),
mutable_buffer{ffnn.proj_bias.elem},
offset + off_t(offsetof(gpt::model::ffnn, proj_weight)),
mutable_buffer{ffnn.proj_weight},
}
{
}
ircd::gpt::pipe::model::ffnn::ffnn(cl::data *const master,
2021-03-30 03:22:42 +02:00
const off_t offset,
const gpt::model::ffnn &ffnn)
:norm
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::ffnn, norm)) + off_t(offsetof(gpt::model::norm, bias)),
const_buffer{ffnn.norm.bias.elem},
offset + off_t(offsetof(gpt::model::ffnn, norm)) + off_t(offsetof(gpt::model::norm, weight)),
const_buffer{ffnn.norm.weight.elem},
2021-03-30 03:22:42 +02:00
}
,fcon
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::ffnn, fcon_bias)),
const_buffer{ffnn.fcon_bias.fcon},
offset + off_t(offsetof(gpt::model::ffnn, fcon_weight)),
const_buffer{ffnn.fcon_weight},
2021-03-30 03:22:42 +02:00
}
,proj
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::ffnn, proj_bias)),
const_buffer{ffnn.proj_bias.elem},
offset + off_t(offsetof(gpt::model::ffnn, proj_weight)),
2021-03-30 03:22:42 +02:00
const_buffer{ffnn.proj_weight},
}
{
}
//
// pipe::model::attn
//
ircd::gpt::pipe::model::attn::attn(cl::data *const master,
const off_t offset,
gpt::model::attn &attn)
:norm
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::attn, norm)) + off_t(offsetof(gpt::model::norm, bias)),
mutable_buffer{attn.norm.bias.elem},
offset + off_t(offsetof(gpt::model::attn, norm)) + off_t(offsetof(gpt::model::norm, weight)),
mutable_buffer{attn.norm.weight.elem},
}
,fcon
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::attn, fcon_bias)),
mutable_buffer{attn.fcon_bias.fcon},
offset + off_t(offsetof(gpt::model::attn, fcon_weight)),
mutable_buffer{attn.fcon_weight},
}
,proj
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::attn, proj_bias)),
mutable_buffer{attn.proj_bias.elem},
offset + off_t(offsetof(gpt::model::attn, proj_weight)),
mutable_buffer{attn.proj_weight},
}
{
}
ircd::gpt::pipe::model::attn::attn(cl::data *const master,
2021-03-30 03:22:42 +02:00
const off_t offset,
const gpt::model::attn &attn)
:norm
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::attn, norm)) + off_t(offsetof(gpt::model::norm, bias)),
const_buffer{attn.norm.bias.elem},
offset + off_t(offsetof(gpt::model::attn, norm)) + off_t(offsetof(gpt::model::norm, weight)),
const_buffer{attn.norm.weight.elem},
2021-03-30 03:22:42 +02:00
}
,fcon
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::attn, fcon_bias)),
const_buffer{attn.fcon_bias.fcon},
offset + off_t(offsetof(gpt::model::attn, fcon_weight)),
const_buffer{attn.fcon_weight},
2021-03-30 03:22:42 +02:00
}
,proj
{
master,
2022-06-20 03:59:29 +02:00
offset + off_t(offsetof(gpt::model::attn, proj_bias)),
const_buffer{attn.proj_bias.elem},
offset + off_t(offsetof(gpt::model::attn, proj_weight)),
2021-03-30 03:22:42 +02:00
const_buffer{attn.proj_weight},
}
{
}
//
// pipe::model::tensor
//
ircd::gpt::pipe::model::tensor::tensor(cl::data *const master,
2022-06-20 03:59:29 +02:00
const off_t bias_offset,
const mutable_buffer &bias,
2022-06-20 03:59:29 +02:00
const off_t weight_offset,
const mutable_buffer &weight)
2021-03-30 03:22:42 +02:00
:bias
{
master,
2022-06-20 03:59:29 +02:00
bias_offset,
2021-03-30 03:22:42 +02:00
bias,
}
,weight
{
master,
2022-06-20 03:59:29 +02:00
weight_offset,
2021-03-30 03:22:42 +02:00
weight,
}
{
}
ircd::gpt::pipe::model::tensor::tensor(cl::data *const master,
2022-06-20 03:59:29 +02:00
const off_t bias_offset,
2021-03-30 03:22:42 +02:00
const const_buffer &bias,
2022-06-20 03:59:29 +02:00
const off_t weight_offset,
2021-03-30 03:22:42 +02:00
const const_buffer &weight)
:bias
{
master,
2022-06-20 03:59:29 +02:00
bias_offset,
bias,
2021-03-30 03:22:42 +02:00
}
,weight
{
master,
2022-06-20 03:59:29 +02:00
weight_offset,
weight,
2021-03-30 03:22:42 +02:00
}
{
}
2021-04-02 22:01:38 +02:00
//
// pipe::model::matrix
2021-04-02 22:01:38 +02:00
//
ircd::gpt::pipe::model::matrix::matrix(cl::data *const master,
const off_t offset,
const mutable_buffer &param)
:param
2021-04-02 22:01:38 +02:00
{
master[0],
{
2022-06-20 03:59:29 +02:00
pad_to(ircd::size(param), 4096),
offset,
},
2021-04-02 22:01:38 +02:00
}
,moment
2021-04-02 22:01:38 +02:00
{
// first moment
{
master[1],
{
2022-06-20 03:59:29 +02:00
pad_to(ircd::size(param), 4096),
offset,
},
},
2021-04-02 22:01:38 +02:00
// second moment
{
master[2],
{
2022-06-20 03:59:29 +02:00
pad_to(ircd::size(param), 4096),
offset,
},
},
2021-04-02 22:01:38 +02:00
}
{
2022-06-20 03:59:29 +02:00
assert(aligned(offset, 4096));
2021-04-02 22:01:38 +02:00
}
ircd::gpt::pipe::model::matrix::matrix(cl::data *const master,
const off_t offset,
const const_buffer &param)
:param
2021-04-02 22:01:38 +02:00
{
master[0],
2021-04-02 22:01:38 +02:00
{
2022-06-20 03:59:29 +02:00
pad_to(ircd::size(param), 4096),
offset,
},
}
{
assert(aligned(offset, 4096));
}