0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-12-30 17:34:04 +01:00

ircd::gpt::model: Optimize left-attention mask.

This commit is contained in:
Jason Volk 2021-09-15 02:26:10 -07:00
parent 9ca95591ff
commit ce9abfb321
7 changed files with 11 additions and 79 deletions

View file

@ -33,9 +33,6 @@ struct ircd::gpt::model::attn
attn_bias alignas(align) [2304],
attn_weight alignas(align) [768][2304];
bool
bias alignas(align) [1024][1024];
float
proj_bias alignas(align) [768],
proj_weight alignas(align) [768][768];

View file

@ -60,9 +60,6 @@ struct ircd::gpt::pipe::model::attn
fcon,
proj;
cl::data
mask;
attn(cl::data *, const off_t, const gpt::model::norm &, const gpt::model::attn &);
attn(cl::data *, const off_t, gpt::model::norm &, gpt::model::attn &);
};

View file

@ -11,12 +11,6 @@
#pragma once
#define HAVE_IRCD_GPT_TOKEN_H
struct ircd_gpt_attn_mask
{
bool
token[1024];
};
union ircd_gpt_token
{
float

View file

@ -28,7 +28,7 @@ namespace ircd::gpt
static void norm(f32x4 *, const f32x4 *, const f32x4 *, const f32x4 *, const f32);
static void vals(float (&)[12][1024][64], const float (&)[12][1024][1024], const float (&)[3][1024][12][64], const size_t);
static void pare(float (&)[12][1024][1024], const float (&)[3][1024][12][64], const size_t);
static void mask(float (&)[12][1024][1024], const float (&)[12][1024][1024], const bool (&)[1024][1024], const size_t);
static void mask(float (&)[12][1024][1024], const float (&)[12][1024][1024], const size_t);
static void smax(float (&)[12][1024][1024], const float (&)[12][1024][1024], const size_t);
static void attn(float (&)[3][1024][12][64], const float *const, const size_t, const model::decoder &, const uint layer);
static void ffnn(float *, const float *, const model::decoder &, const uint layer);
@ -173,7 +173,7 @@ ircd::gpt::coil(float *__restrict__ accum,
attn(qkv, accum, tokens, decoder, i);
pare(state, qkv, tokens);
mask(state, state, layer.attn.bias, tokens);
mask(state, state, tokens);
smax(state, state, tokens);
vals(attns, state, qkv, tokens);
@ -287,7 +287,6 @@ ircd::gpt::pare(float (&__restrict__ out)[12][1024][1024],
void
ircd::gpt::mask(float (&__restrict__ out)[12][1024][1024],
const float (&__restrict__ in)[12][1024][1024],
const bool (&__restrict__ bias)[1024][1024],
const size_t num)
{
static const float masked
@ -299,7 +298,7 @@ ircd::gpt::mask(float (&__restrict__ out)[12][1024][1024],
for(uint j(0); j < 12; ++j)
for(uint k(0); k < num; ++k)
for(uint l(0); l < num; ++l)
out[j][k][l] = bias[k][l]? in[j][k][l]: masked;
out[j][k][l] = (k < l)? in[j][k][l]: masked;
}
void

View file

@ -197,8 +197,7 @@ ircd_gpt_attn_self(__global const struct ircd_gpt_ctrl *const ctrl,
__constant const struct ircd_gpt_opts *const opts,
__local union ircd_gpt_tokenv *const restrict out,
__local float self[][12],
__global const struct ircd_gpt_attn_qkvv *const restrict token,
__global const struct ircd_gpt_attn_mask *const restrict mask) // [1024][1024],
__global const struct ircd_gpt_attn_qkvv *const restrict token)
{
const uint
gi = get_global_id(0),
@ -215,7 +214,8 @@ ircd_gpt_attn_self(__global const struct ircd_gpt_ctrl *const ctrl,
{
for(uint i = 0; i < wn; ++i)
{
if(!mask[wi].token[i])
// Left-attention mask
if(wi < i)
{
self[i][li] = -10000.0f;
continue;
@ -288,7 +288,6 @@ ircd_gpt_coil(__global const struct ircd_gpt_ctrl *const ctrl,
__constant const struct ircd_gpt_opts *const opts,
__global union ircd_gpt_tokenv *const restrict accum,
__global const struct ircd_gpt_attn_qkvv *const restrict state,
__global const struct ircd_gpt_attn_mask *const restrict mask, // [1024][1024],
__global const float4 *const restrict attn_proj_bias,
__global const float4 *const restrict attn_proj_weight,
__global const float4 *const restrict ffnn_norm_bias,
@ -324,8 +323,7 @@ ircd_gpt_coil(__global const struct ircd_gpt_ctrl *const ctrl,
opts,
&buf1,
buf.attn_self,
state,
mask
state
);
barrier(CLK_LOCAL_MEM_FENCE);

View file

@ -29,8 +29,7 @@ namespace ircd::gpt::model
init_h_attn_attn_weight(decoder &, const string_view &, const size_t &, const json::array &),
init_h_attn_attn_bias(decoder &, const string_view &, const size_t &, const json::array &),
init_h_attn_proj_weight(decoder &, const string_view &, const size_t &, const json::array &),
init_h_attn_proj_bias(decoder &, const string_view &, const size_t &, const json::array &),
init_h_attn_bias(decoder &, const string_view &, const size_t &, const json::array &);
init_h_attn_proj_bias(decoder &, const string_view &, const size_t &, const json::array &);
static bool init_dataset(const string_view &);
static bool init_from_cache(const string_view &);
@ -74,7 +73,6 @@ ircd::gpt::model::manifest_h
{ "h.%u.attn.c_attn.bias.json", init_h_attn_attn_bias, },
{ "h.%u.attn.c_proj.weight.json", init_h_attn_proj_weight, },
{ "h.%u.attn.c_proj.bias.json", init_h_attn_proj_bias },
{ "h.%u.attn.bias.json", init_h_attn_bias, },
};
decltype(ircd::gpt::model::manifest)
@ -243,7 +241,7 @@ ircd::gpt::model::init_from_json(const string_view &cache_path,
// Load the transformer files by layer
const size_t layers {12};
for(size_t i(0); i < layers; ++i)
for(size_t j(0); j < 13; ++j)
for(size_t j(0); j < 12; ++j)
init_from_json_handle(*decoder, manifest_h[j], i);
const const_buffer src
@ -645,35 +643,3 @@ ircd::gpt::model::init_h_attn_proj_bias(decoder &d,
always_assert(i == sizeof(d.layer[layer].attn.proj_bias) / sizeof(float));
}
void
ircd::gpt::model::init_h_attn_bias(decoder &d,
const string_view &name,
const size_t &layer,
const json::array &mat)
{
for(const json::array dim0 : mat)
{
for(const json::array dim1 : dim0)
{
size_t k(0);
for(const json::array dim2 : dim1)
{
size_t l(0);
for(const auto &elem : dim2)
{
always_assert(elem == "1.0" || elem == "0.0");
d.layer[layer].attn.bias[k][l++] = startswith(elem, '1');
}
++k;
}
always_assert
(
k == sizeof(d.layer[layer].attn.bias)
/ sizeof(d.layer[layer].attn.bias[0])
);
}
}
}

View file

@ -609,7 +609,6 @@ ircd::gpt::pipe::desc::layer::layer(pipe::desc &desc,
desc.opts,
desc.accum,
desc.state,
desc.model->decode->block[laynum].attn.mask,
desc.model->decode->block[laynum].attn.proj.bias.param,
desc.model->decode->block[laynum].attn.proj.weight.param,
desc.model->decode->block[laynum].ffnn.norm.bias.param,
@ -1120,18 +1119,10 @@ ircd::gpt::pipe::model::attn::attn(cl::data *const master,
,proj
{
master,
offset + off_t(sizeof(norm) + sizeof(attn.attn_bias) + sizeof(attn.attn_weight) + sizeof(attn.bias)),
offset + off_t(sizeof(norm) + sizeof(attn.attn_bias) + sizeof(attn.attn_weight)),
mutable_buffer{attn.proj_bias},
mutable_buffer{attn.proj_weight},
}
,mask
{
master[0],
{
sizeof(attn.bias),
offset + off_t(sizeof(norm) + sizeof(attn.attn_bias) + sizeof(attn.attn_weight)),
},
}
{
always_assert
(
@ -1139,7 +1130,6 @@ ircd::gpt::pipe::model::attn::attn(cl::data *const master,
==
ircd::data(const_buffer{norm.bias}) +
sizeof(norm) +
sizeof(attn.bias) +
sizeof(attn.attn_bias) +
sizeof(attn.attn_weight) +
ircd::size(const_buffer{attn.proj_bias})
@ -1167,18 +1157,10 @@ ircd::gpt::pipe::model::attn::attn(cl::data *const master,
,proj
{
master,
offset + off_t(sizeof(norm) + sizeof(attn.attn_bias) + sizeof(attn.attn_weight) + sizeof(attn.bias)),
offset + off_t(sizeof(norm) + sizeof(attn.attn_bias) + sizeof(attn.attn_weight)),
const_buffer{attn.proj_bias},
const_buffer{attn.proj_weight},
}
,mask
{
master[0],
{
sizeof(attn.bias),
offset + off_t(sizeof(norm) + sizeof(attn.attn_bias) + sizeof(attn.attn_weight)),
},
}
{
always_assert
(
@ -1186,7 +1168,6 @@ ircd::gpt::pipe::model::attn::attn(cl::data *const master,
==
ircd::data(const_buffer{norm.bias}) +
sizeof(norm) +
sizeof(attn.bias) +
sizeof(attn.attn_bias) +
sizeof(attn.attn_weight) +
ircd::size(const_buffer{attn.proj_bias})