mirror of
https://github.com/matrix-construct/construct
synced 2024-12-26 15:33:54 +01:00
ircd::gpt::gpu: Use explicit broadcast for local access.
This commit is contained in:
parent
03e5f255c2
commit
c4cceb425c
1 changed files with 4 additions and 2 deletions
|
@ -742,11 +742,13 @@ ircd_gpt_lm_logsm(__global struct ircd_gpt_ctrl *const ctrl,
|
|||
if(li == 0)
|
||||
samax.mu = mu[li];
|
||||
|
||||
ircd_simt_broadcast_flldr(mu, ln, li);
|
||||
|
||||
sum[li] = 0.0f;
|
||||
for(uint ti = start; ti < stop; ++ti)
|
||||
{
|
||||
const float
|
||||
sub = logit[ti] - samax.mu,
|
||||
sub = logit[ti] - mu[li],
|
||||
res = native_exp(sub);
|
||||
|
||||
sum[li] += res;
|
||||
|
@ -764,7 +766,7 @@ ircd_gpt_lm_logsm(__global struct ircd_gpt_ctrl *const ctrl,
|
|||
for(uint ti = start; ti < stop; ++ti)
|
||||
{
|
||||
const float
|
||||
sub = logit[ti] - samax.mu,
|
||||
sub = logit[ti] - mu[li],
|
||||
res = lambda[li] * native_exp(sub);
|
||||
|
||||
logit[ti] = res;
|
||||
|
|
Loading…
Reference in a new issue