From c4cceb425c2788243f0330f2a73e4368b1ed5346 Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Mon, 3 Oct 2022 22:14:23 +0000 Subject: [PATCH] ircd::gpt::gpu: Use explicit broadcast for local access. --- ircd/gpt_gpu.cl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ircd/gpt_gpu.cl b/ircd/gpt_gpu.cl index 4c3d0d468..95013d0c2 100644 --- a/ircd/gpt_gpu.cl +++ b/ircd/gpt_gpu.cl @@ -742,11 +742,13 @@ ircd_gpt_lm_logsm(__global struct ircd_gpt_ctrl *const ctrl, if(li == 0) samax.mu = mu[li]; + ircd_simt_broadcast_flldr(mu, ln, li); + sum[li] = 0.0f; for(uint ti = start; ti < stop; ++ti) { const float - sub = logit[ti] - samax.mu, + sub = logit[ti] - mu[li], res = native_exp(sub); sum[li] += res; @@ -764,7 +766,7 @@ ircd_gpt_lm_logsm(__global struct ircd_gpt_ctrl *const ctrl, for(uint ti = start; ti < stop; ++ti) { const float - sub = logit[ti] - samax.mu, + sub = logit[ti] - mu[li], res = lambda[li] * native_exp(sub); logit[ti] = res;