0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-05-19 19:33:45 +02:00

ircd::simt::norm: Barrier for overlapping input and output buffers. (gfx1011)

This commit is contained in:
Jason Volk 2022-10-20 21:12:59 +00:00
parent 7c8163fcab
commit 561be9973a
2 changed files with 5 additions and 2 deletions

View file

@ -36,6 +36,9 @@ ircd_simt_math_norm_f4lldr(__local float4 *const out,
s = native_sqrt(out[li] + epsilon),
res = sub_mean / s;
if(out == in)
barrier(CLK_LOCAL_MEM_FENCE);
out[li] = res;
}
#endif

View file

@ -761,7 +761,7 @@ ircd_gpt_lm_result_top(__local struct ircd_gpt_ctrl *const ctrl,
token = idx[i];
const float
samax = logsm[token] + FLT_EPSILON;
samax = logsm[token];
ctrl->top[i].token = token;
ctrl->top[i].samax = samax;
@ -797,7 +797,7 @@ ircd_gpt_lm_result_label(__local struct ircd_gpt_ctrl *const ctrl,
token = label->logit.token;
const float
samax = logsm[token] + FLT_EPSILON,
samax = logsm[token],
loss = 0.0f - native_log(samax),
ppl = (1.0f - samax) * native_log2(opts->logits);