0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-06-11 06:28:55 +02:00

ircd::gpt::gpu: Use explicit broadcast for local access.

This commit is contained in:
Jason Volk 2022-10-03 22:14:23 +00:00
parent 03e5f255c2
commit c4cceb425c

View file

@ -742,11 +742,13 @@ ircd_gpt_lm_logsm(__global struct ircd_gpt_ctrl *const ctrl,
if(li == 0)
samax.mu = mu[li];
ircd_simt_broadcast_flldr(mu, ln, li);
sum[li] = 0.0f;
for(uint ti = start; ti < stop; ++ti)
{
const float
sub = logit[ti] - samax.mu,
sub = logit[ti] - mu[li],
res = native_exp(sub);
sum[li] += res;
@ -764,7 +766,7 @@ ircd_gpt_lm_logsm(__global struct ircd_gpt_ctrl *const ctrl,
for(uint ti = start; ti < stop; ++ti)
{
const float
sub = logit[ti] - samax.mu,
sub = logit[ti] - mu[li],
res = lambda[li] * native_exp(sub);
logit[ti] = res;