mirror of
https://github.com/matrix-construct/construct
synced 2024-05-19 19:33:45 +02:00
ircd::simt::norm: Barrier for overlapping input and output buffers. (gfx1011)
This commit is contained in:
parent
7c8163fcab
commit
561be9973a
|
@ -36,6 +36,9 @@ ircd_simt_math_norm_f4lldr(__local float4 *const out,
|
|||
s = native_sqrt(out[li] + epsilon),
|
||||
res = sub_mean / s;
|
||||
|
||||
if(out == in)
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
out[li] = res;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -761,7 +761,7 @@ ircd_gpt_lm_result_top(__local struct ircd_gpt_ctrl *const ctrl,
|
|||
token = idx[i];
|
||||
|
||||
const float
|
||||
samax = logsm[token] + FLT_EPSILON;
|
||||
samax = logsm[token];
|
||||
|
||||
ctrl->top[i].token = token;
|
||||
ctrl->top[i].samax = samax;
|
||||
|
@ -797,7 +797,7 @@ ircd_gpt_lm_result_label(__local struct ircd_gpt_ctrl *const ctrl,
|
|||
token = label->logit.token;
|
||||
|
||||
const float
|
||||
samax = logsm[token] + FLT_EPSILON,
|
||||
samax = logsm[token],
|
||||
loss = 0.0f - native_log(samax),
|
||||
ppl = (1.0f - samax) * native_log2(opts->logits);
|
||||
|
||||
|
|
Loading…
Reference in a new issue