diff --git a/include/ircd/gpt/model.h b/include/ircd/gpt/model.h index cbbd72cec..e28daa538 100644 --- a/include/ircd/gpt/model.h +++ b/include/ircd/gpt/model.h @@ -30,6 +30,7 @@ namespace ircd::gpt::model extern std::vector default_data; constexpr auto alignment {4096}; + extern conf::item cache_shared; } /// Layer normalization diff --git a/ircd/gpt.cc b/ircd/gpt.cc index b71317047..49e91c9ee 100644 --- a/ircd/gpt.cc +++ b/ircd/gpt.cc @@ -256,10 +256,15 @@ try } ,model { - std::make_unique - ( - *const_cast(gpt::model::default_model) - ) + !gpt::model::cache_shared? + std::make_unique + ( + *const_cast(gpt::model::default_model) + ): + std::make_unique + ( + *const_cast(gpt::model::default_model) + ) } ,desc { diff --git a/ircd/gpt_gpu.cl b/ircd/gpt_gpu.cl index 7290cdf20..d38b82e50 100644 --- a/ircd/gpt_gpu.cl +++ b/ircd/gpt_gpu.cl @@ -1167,7 +1167,7 @@ ircd_gpt_prop_elem(__global const struct ircd_gpt_ctrl *const ctrl, const float4 param = param_[li], - grad = ctrl->label[0].loss.mean, + grad = ctrl->target.loss.mean, alpha[2] = { 1.0f - opts->beta[0], 1.0f - opts->beta[1], }, exp_avg = ts? exp_avg_[li]: 0.0f, exp_avg_sqr = ts? exp_avg_sqr_[li]: 0.0f, @@ -1179,13 +1179,9 @@ ircd_gpt_prop_elem(__global const struct ircd_gpt_ctrl *const ctrl, delta = opts->alpha * (exp_avg_dot / denom), update = param - delta; - param_[li] = param + FLT_EPSILON; - exp_avg_[li] = exp_avg + FLT_EPSILON; - exp_avg_sqr_[li] = exp_avg_sqr + FLT_EPSILON; - - //param_[li] = update; - //exp_avg_[li] = exp_avg_dot; - //exp_avg_sqr_[li] = exp_avg_sqr_dot; + param_[li] = update; + exp_avg_[li] = exp_avg_dot; + exp_avg_sqr_[li] = exp_avg_sqr_dot; } // diff --git a/ircd/gpt_pipe.cc b/ircd/gpt_pipe.cc index a5c01ebdb..3791974e6 100644 --- a/ircd/gpt_pipe.cc +++ b/ircd/gpt_pipe.cc @@ -238,7 +238,7 @@ ircd::gpt::pipe::cycle::cycle(gpt::samp &samp) tokens, cached, true, - false, + ((false) && gpt::model::cache_shared) } ,stage {