[ML] Fixing categorization tokens for multi-line messages (#103007)

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
James Gowdy 2021-06-29 10:28:51 +01:00 committed by GitHub
parent b774e37ea1
commit 824463ace5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -145,10 +145,11 @@ export function categorizationExamplesProvider({
for (let g = 0; g < sumLengths.length; g++) {
if (t.start_offset <= sumLengths[g] + g) {
const offset = g > 0 ? sumLengths[g - 1] + g : 0;
const start = t.start_offset - offset;
tokensPerExample[g].push({
...t,
start_offset: t.start_offset - offset,
end_offset: t.end_offset - offset,
start_offset: start,
end_offset: start + t.token.length,
});
break;
}