Skip to content

Commit 824463a

Browse files
[ML] Fixing categorization tokens for multi-line messages (#103007)
Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
1 parent b774e37 commit 824463a

1 file changed

Lines changed: 3 additions & 2 deletions

File tree

  • x-pack/plugins/ml/server/models/job_service/new_job/categorization

x-pack/plugins/ml/server/models/job_service/new_job/categorization/examples.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,11 @@ export function categorizationExamplesProvider({
145145
for (let g = 0; g < sumLengths.length; g++) {
146146
if (t.start_offset <= sumLengths[g] + g) {
147147
const offset = g > 0 ? sumLengths[g - 1] + g : 0;
148+
const start = t.start_offset - offset;
148149
tokensPerExample[g].push({
149150
...t,
150-
start_offset: t.start_offset - offset,
151-
end_offset: t.end_offset - offset,
151+
start_offset: start,
152+
end_offset: start + t.token.length,
152153
});
153154
break;
154155
}

0 commit comments

Comments
 (0)