0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-12-27 16:04:15 +01:00

ircd::gpt: Add adaptive moment state to pipe model; move task prelim defs.

This commit is contained in:
Jason Volk 2021-04-17 11:53:50 -07:00
parent 4ed6582472
commit 14a1561cad
3 changed files with 439 additions and 86 deletions

View file

@ -13,6 +13,7 @@
struct ircd::gpt::pipe::model struct ircd::gpt::pipe::model
{ {
struct matrix;
struct tensor; struct tensor;
struct norm; struct norm;
struct proj; struct proj;
@ -27,56 +28,99 @@ struct ircd::gpt::pipe::model
std::unique_ptr<model::language> embed; std::unique_ptr<model::language> embed;
model(const gpt::model::decoder &, const gpt::model::embed &); model(const gpt::model::decoder &, const gpt::model::embed &);
model(gpt::model::decoder &, gpt::model::embed &);
~model() noexcept; ~model() noexcept;
}; };
struct ircd::gpt::pipe::model::matrix
{
cl::data
param, // Weights
moment[2]; // Adaptive moment estimations
matrix(cl::data *, const off_t, const const_buffer &param);
matrix(cl::data *, const off_t, const mutable_buffer &param);
};
struct ircd::gpt::pipe::model::tensor struct ircd::gpt::pipe::model::tensor
{ {
cl::data bias, weight; matrix
bias,
weight;
tensor(const const_buffer &bias, const const_buffer &weight); tensor(cl::data *, const off_t, const const_buffer &bias, const const_buffer &weight);
tensor(cl::data &, const off_t, const const_buffer &bias, const const_buffer &weight); tensor(cl::data *, const off_t, const mutable_buffer &bias, const mutable_buffer &weight);
}; };
struct ircd::gpt::pipe::model::attn struct ircd::gpt::pipe::model::attn
{ {
tensor norm, fcon, proj; tensor
cl::data mask; norm,
fcon,
proj;
attn(cl::data &, const off_t, const gpt::model::norm &, const gpt::model::attn &); cl::data
mask;
attn(cl::data *, const off_t, const gpt::model::norm &, const gpt::model::attn &);
attn(cl::data *, const off_t, gpt::model::norm &, gpt::model::attn &);
}; };
struct ircd::gpt::pipe::model::ffnn struct ircd::gpt::pipe::model::ffnn
{ {
tensor norm, fcon, proj; tensor
norm,
fcon,
proj;
ffnn(cl::data &, const off_t, const gpt::model::norm &, const gpt::model::ffnn &); ffnn(cl::data *, const off_t, const gpt::model::norm &, const gpt::model::ffnn &);
ffnn(cl::data *, const off_t, gpt::model::norm &, gpt::model::ffnn &);
}; };
struct ircd::gpt::pipe::model::block struct ircd::gpt::pipe::model::block
{ {
cl::data master; // Single layer memory roots
cl::data
master[3];
// Layer units
model::attn attn; model::attn attn;
model::ffnn ffnn; model::ffnn ffnn;
block(cl::data &, const off_t, const gpt::model::block &, const size_t); block(cl::data *, const off_t, const gpt::model::block &, const size_t);
block(cl::data *, const off_t, gpt::model::block &, const size_t);
block(const gpt::model::block &, const size_t); block(const gpt::model::block &, const size_t);
block(gpt::model::block &, const size_t);
}; };
struct ircd::gpt::pipe::model::decoder struct ircd::gpt::pipe::model::decoder
{ {
cl::data master; // Combined-layer memory roots
model::block block[12]; cl::data
master[3];
// Layer blocks
model::block
block[12];
// Final norm
tensor norm; tensor norm;
decoder(const gpt::model::decoder &); decoder(const gpt::model::decoder &);
decoder(gpt::model::decoder &);
~decoder() noexcept; ~decoder() noexcept;
}; };
struct ircd::gpt::pipe::model::language struct ircd::gpt::pipe::model::language
{ {
cl::data pos, token; cl::data
master[3];
matrix
pos,
token;
language(const gpt::model::embed &); language(const gpt::model::embed &);
language(gpt::model::embed &);
~language() noexcept; ~language() noexcept;
}; };

View file

@ -628,3 +628,50 @@ ircd::gpt::gelu(f32x4 &out,
{ {
out = 0.5 * in * (1.0 + tanh(in * f32(0.7978845608) * (1.0 + f32(0.044715) * in * in))); out = 0.5 * in * (1.0 + tanh(in * f32(0.7978845608) * (1.0 + f32(0.044715) * in * in)));
} }
//
// gpt::task
//
ircd::gpt::task::task(const gpt::opts *const opts,
struct ircd_gpt_task *const ctrl)
:opts
{
opts
}
,ctrl
{
ctrl
}
{
memset(this->ctrl, 0x0, sizeof(ircd_gpt_task));
this->ctrl->rand[0] = this->opts->seed;
this->ctrl->rand[1] = this->opts->seed;
this->ctrl->rand[2] = -1UL;
this->ctrl->rand[3] = -1UL;
}
ircd::gpt::task::~task()
noexcept
{
}
//
// hypercall
//
ircd::string_view
ircd::gpt::reflect(const enum ircd_gpt_hypercall code)
noexcept
{
switch(code)
{
case IRCD_GPT_ACCEPT: return "ACCEPT";
case IRCD_GPT_ECOMPLETE: return "ECOMPLETE";
case IRCD_GPT_ETOKENS: return "ETOKENS";
}
return "??????";
}

View file

@ -585,6 +585,19 @@ ircd::gpt::pipe::desc::layer::layer(pipe::desc &desc,
// pipe::model::model // pipe::model::model
// //
ircd::gpt::pipe::model::model(gpt::model::decoder &decoder,
gpt::model::embed &embed)
:decode
{
std::make_unique<model::decoder>(decoder)
}
,embed
{
std::make_unique<model::language>(embed)
}
{
}
ircd::gpt::pipe::model::model(const gpt::model::decoder &decoder, ircd::gpt::pipe::model::model(const gpt::model::decoder &decoder,
const gpt::model::embed &embed) const gpt::model::embed &embed)
:decode :decode
@ -607,16 +620,57 @@ noexcept
// pipe::model::language // pipe::model::language
// //
ircd::gpt::pipe::model::language::language(const gpt::model::embed &embed) ircd::gpt::pipe::model::language::language(gpt::model::embed &embed)
:pos :master
{ {
sizeof(embed.pos), // params
const_buffer{embed.pos} {
sizeof(embed), mutable_buffer
{
reinterpret_cast<char *>(&embed),
sizeof(embed),
}
},
// first moment
{
sizeof(embed), mutable_buffer{},
},
// second moment
{
sizeof(embed), mutable_buffer{},
},
}
,pos
{
master, 0, mutable_buffer{embed.pos}
} }
,token ,token
{ {
sizeof(embed.token), master, sizeof(embed.pos), mutable_buffer{embed.token}
const_buffer{embed.token} }
{
}
ircd::gpt::pipe::model::language::language(const gpt::model::embed &embed)
:master
{
{
sizeof(embed), const_buffer
{
reinterpret_cast<const char *>(&embed),
sizeof(embed),
}
},
}
,pos
{
master, 0, const_buffer{embed.pos}
}
,token
{
master, sizeof(embed.pos), const_buffer{embed.token}
} }
{ {
} }
@ -630,14 +684,66 @@ noexcept
// pipe::model::decoder // pipe::model::decoder
// //
ircd::gpt::pipe::model::decoder::decoder(gpt::model::decoder &decoder)
:master
{
// params
{
sizeof(gpt::model::block) * 12 + sizeof(gpt::model::norm), mutable_buffer
{
reinterpret_cast<char *>(decoder.layer),
sizeof(decoder.layer) + sizeof(decoder.f)
}
},
// first moment
{
sizeof(gpt::model::block) * 12 + sizeof(gpt::model::norm),
mutable_buffer{}
},
// second moment
{
sizeof(gpt::model::block) * 12 + sizeof(gpt::model::norm),
mutable_buffer{}
},
}
,block
{
{ master, sizeof(gpt::model::block) * 0x00, decoder.layer[0x00], 0x00, },
{ master, sizeof(gpt::model::block) * 0x01, decoder.layer[0x01], 0x01, },
{ master, sizeof(gpt::model::block) * 0x02, decoder.layer[0x02], 0x02, },
{ master, sizeof(gpt::model::block) * 0x03, decoder.layer[0x03], 0x03, },
{ master, sizeof(gpt::model::block) * 0x04, decoder.layer[0x04], 0x04, },
{ master, sizeof(gpt::model::block) * 0x05, decoder.layer[0x05], 0x05, },
{ master, sizeof(gpt::model::block) * 0x06, decoder.layer[0x06], 0x06, },
{ master, sizeof(gpt::model::block) * 0x07, decoder.layer[0x07], 0x07, },
{ master, sizeof(gpt::model::block) * 0x08, decoder.layer[0x08], 0x08, },
{ master, sizeof(gpt::model::block) * 0x09, decoder.layer[0x09], 0x09, },
{ master, sizeof(gpt::model::block) * 0x0a, decoder.layer[0x0a], 0x0a, },
{ master, sizeof(gpt::model::block) * 0x0b, decoder.layer[0x0b], 0x0b, },
}
,norm
{
master,
off_t(sizeof(gpt::model::block) * 12),
mutable_buffer{decoder.f.bias},
mutable_buffer{decoder.f.weight},
}
{
}
ircd::gpt::pipe::model::decoder::decoder(const gpt::model::decoder &decoder) ircd::gpt::pipe::model::decoder::decoder(const gpt::model::decoder &decoder)
:master :master
{ {
// params
{
sizeof(gpt::model::block) * 12 + sizeof(gpt::model::norm), const_buffer sizeof(gpt::model::block) * 12 + sizeof(gpt::model::norm), const_buffer
{ {
reinterpret_cast<const char *>(decoder.layer), reinterpret_cast<const char *>(decoder.layer),
sizeof(decoder.layer) + sizeof(decoder.f) sizeof(decoder.layer) + sizeof(decoder.f)
} }
},
} }
,block ,block
{ {
@ -673,14 +779,58 @@ noexcept
// pipe::model::block // pipe::model::block
// //
ircd::gpt::pipe::model::block::block(gpt::model::block &block,
const size_t layer)
:master
{
// params
{
sizeof(block), mutable_buffer
{
reinterpret_cast<char *>(&block), sizeof(block)
}
},
// first moment
{
sizeof(block),
mutable_buffer{}
},
// second moment
{
sizeof(block),
mutable_buffer{}
},
}
,attn
{
master,
0,
block.ln1,
block.attn,
}
,ffnn
{
master,
off_t(sizeof(block.ln1) + sizeof(block.attn)),
block.ln2,
block.ffnn,
}
{
}
ircd::gpt::pipe::model::block::block(const gpt::model::block &block, ircd::gpt::pipe::model::block::block(const gpt::model::block &block,
const size_t layer) const size_t layer)
:master :master
{ {
// params
{
sizeof(block), const_buffer sizeof(block), const_buffer
{ {
reinterpret_cast<const char *>(&block), sizeof(block) reinterpret_cast<const char *>(&block), sizeof(block)
} }
}
} }
,attn ,attn
{ {
@ -692,14 +842,35 @@ ircd::gpt::pipe::model::block::block(const gpt::model::block &block,
,ffnn ,ffnn
{ {
master, master,
sizeof(block.ln1) + sizeof(block.attn), off_t(sizeof(block.ln1) + sizeof(block.attn)),
block.ln2, block.ln2,
block.ffnn, block.ffnn,
} }
{ {
} }
ircd::gpt::pipe::model::block::block(cl::data &master, ircd::gpt::pipe::model::block::block(cl::data *const master,
const off_t offset,
gpt::model::block &block,
const size_t layer)
:attn
{
master,
offset,
block.ln1,
block.attn,
}
,ffnn
{
master,
offset + off_t(sizeof(block.ln1) + sizeof(block.attn)),
block.ln2,
block.ffnn,
}
{
}
ircd::gpt::pipe::model::block::block(cl::data *const master,
const off_t offset, const off_t offset,
const gpt::model::block &block, const gpt::model::block &block,
const size_t layer) const size_t layer)
@ -724,7 +895,45 @@ ircd::gpt::pipe::model::block::block(cl::data &master,
// pipe::model::ffnn // pipe::model::ffnn
// //
ircd::gpt::pipe::model::ffnn::ffnn(cl::data &master, ircd::gpt::pipe::model::ffnn::ffnn(cl::data *const master,
const off_t offset,
gpt::model::norm &norm,
gpt::model::ffnn &ffnn)
:norm
{
master,
offset,
mutable_buffer{norm.bias},
mutable_buffer{norm.weight},
}
,fcon
{
master,
offset + off_t(sizeof(norm)),
mutable_buffer{ffnn.fc_bias},
mutable_buffer{ffnn.fc_weight},
}
,proj
{
master,
offset + off_t(sizeof(norm) + sizeof(ffnn.fc_bias) + sizeof(ffnn.fc_weight)),
mutable_buffer{ffnn.proj_bias},
mutable_buffer{ffnn.proj_weight},
}
{
always_assert
(
ircd::data(const_buffer{ffnn.proj_weight})
==
ircd::data(const_buffer{norm.bias}) +
sizeof(norm) +
sizeof(ffnn.fc_bias) +
sizeof(ffnn.fc_weight) +
ircd::size(const_buffer{ffnn.proj_bias})
);
}
ircd::gpt::pipe::model::ffnn::ffnn(cl::data *const master,
const off_t offset, const off_t offset,
const gpt::model::norm &norm, const gpt::model::norm &norm,
const gpt::model::ffnn &ffnn) const gpt::model::ffnn &ffnn)
@ -766,7 +975,54 @@ ircd::gpt::pipe::model::ffnn::ffnn(cl::data &master,
// pipe::model::attn // pipe::model::attn
// //
ircd::gpt::pipe::model::attn::attn(cl::data &master, ircd::gpt::pipe::model::attn::attn(cl::data *const master,
const off_t offset,
gpt::model::norm &norm,
gpt::model::attn &attn)
:norm
{
master,
offset,
mutable_buffer{norm.bias},
mutable_buffer{norm.weight},
}
,fcon
{
master,
offset + off_t(sizeof(norm)),
mutable_buffer{attn.attn_bias},
mutable_buffer{attn.attn_weight},
}
,proj
{
master,
offset + off_t(sizeof(norm) + sizeof(attn.attn_bias) + sizeof(attn.attn_weight) + sizeof(attn.bias)),
mutable_buffer{attn.proj_bias},
mutable_buffer{attn.proj_weight},
}
,mask
{
master[0],
{
sizeof(attn.bias),
offset + off_t(sizeof(norm) + sizeof(attn.attn_bias) + sizeof(attn.attn_weight)),
},
}
{
always_assert
(
ircd::data(const_buffer{attn.proj_weight})
==
ircd::data(const_buffer{norm.bias}) +
sizeof(norm) +
sizeof(attn.bias) +
sizeof(attn.attn_bias) +
sizeof(attn.attn_weight) +
ircd::size(const_buffer{attn.proj_bias})
);
}
ircd::gpt::pipe::model::attn::attn(cl::data *const master,
const off_t offset, const off_t offset,
const gpt::model::norm &norm, const gpt::model::norm &norm,
const gpt::model::attn &attn) const gpt::model::attn &attn)
@ -793,7 +1049,7 @@ ircd::gpt::pipe::model::attn::attn(cl::data &master,
} }
,mask ,mask
{ {
master, master[0],
{ {
sizeof(attn.bias), sizeof(attn.bias),
offset + off_t(sizeof(norm) + sizeof(attn.attn_bias) + sizeof(attn.attn_weight)), offset + off_t(sizeof(norm) + sizeof(attn.attn_bias) + sizeof(attn.attn_weight)),
@ -817,86 +1073,92 @@ ircd::gpt::pipe::model::attn::attn(cl::data &master,
// pipe::model::tensor // pipe::model::tensor
// //
ircd::gpt::pipe::model::tensor::tensor(const const_buffer &bias, ircd::gpt::pipe::model::tensor::tensor(cl::data *const master,
const const_buffer &weight) const off_t offset,
const mutable_buffer &bias,
const mutable_buffer &weight)
:bias :bias
{ {
ircd::size(bias), master,
offset,
bias, bias,
} }
,weight ,weight
{ {
ircd::size(weight), master,
off_t(offset + ircd::size(bias)),
weight, weight,
} }
{ {
} }
ircd::gpt::pipe::model::tensor::tensor(cl::data &master, ircd::gpt::pipe::model::tensor::tensor(cl::data *const master,
const off_t offset, const off_t offset,
const const_buffer &bias, const const_buffer &bias,
const const_buffer &weight) const const_buffer &weight)
:bias :bias
{ {
master, master,
{ offset,
ircd::size(bias), // size bias,
offset, // offset
},
} }
,weight ,weight
{ {
master, master,
off_t(offset + ircd::size(bias)),
weight,
}
{
}
//
// pipe::model::matrix
//
ircd::gpt::pipe::model::matrix::matrix(cl::data *const master,
const off_t offset,
const mutable_buffer &param)
:param
{
master[0],
{ {
ircd::size(weight), // size ircd::size(param),
offset + ircd::size(bias), // offset offset,
} },
} }
,moment
{ {
} // first moment
//
// gpt::task
//
ircd::gpt::task::task(const gpt::opts *const opts,
struct ircd_gpt_task *const ctrl)
:opts
{
opts
}
,ctrl
{
ctrl
}
{
memset(this->ctrl, 0x0, sizeof(ircd_gpt_task));
this->ctrl->rand[0] = this->opts->seed;
this->ctrl->rand[1] = this->opts->seed;
this->ctrl->rand[2] = -1UL;
this->ctrl->rand[3] = -1UL;
}
ircd::gpt::task::~task()
noexcept
{
}
//
// hypercall
//
ircd::string_view
ircd::gpt::reflect(const enum ircd_gpt_hypercall code)
noexcept
{
switch(code)
{ {
case IRCD_GPT_ACCEPT: return "ACCEPT"; master[1],
case IRCD_GPT_ECOMPLETE: return "ECOMPLETE"; {
case IRCD_GPT_ETOKENS: return "ETOKENS"; ircd::size(param),
} offset,
},
},
return "??????"; // second moment
{
master[2],
{
ircd::size(param),
offset,
},
},
}
{
}
ircd::gpt::pipe::model::matrix::matrix(cl::data *const master,
const off_t offset,
const const_buffer &param)
:param
{
master[0],
{
ircd::size(param), // size
offset, // offset
},
}
{
} }