Skip to content

Commit 0029e6f

Browse files
pgayvalletkibanamachineelasticmachine
authored
[Agent Builder] fix too_long_http_line_exception error when resolving too many indices (#259945)
## Summary Fix elastic/search-team#13563 Check the issue for more details --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com> Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
1 parent 8830177 commit 0029e6f

8 files changed

Lines changed: 381 additions & 33 deletions

File tree

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
import { batchByUrlLength } from './batch_by_url_length';
9+
10+
describe('batchByUrlLength', () => {
11+
it('returns an empty array for empty input', () => {
12+
expect(batchByUrlLength([])).toEqual([]);
13+
});
14+
15+
it('returns a single batch when all names fit', () => {
16+
expect(batchByUrlLength(['a', 'b', 'c'])).toEqual([['a', 'b', 'c']]);
17+
});
18+
19+
it('splits into multiple batches when names exceed the limit', () => {
20+
// Each name is 10 chars. With commas: "aaaaaaaaaa,bbbbbbbbbb" = 21 chars.
21+
// With maxJoinedLength=20, only one name fits per batch after the first.
22+
const names = ['aaaaaaaaaa', 'bbbbbbbbbb', 'cccccccccc'];
23+
const result = batchByUrlLength(names, 20);
24+
expect(result).toEqual([['aaaaaaaaaa'], ['bbbbbbbbbb'], ['cccccccccc']]);
25+
});
26+
27+
it('puts a single name that exceeds the limit into its own batch', () => {
28+
const longName = 'a'.repeat(5000);
29+
const result = batchByUrlLength([longName], 3000);
30+
expect(result).toEqual([[longName]]);
31+
});
32+
33+
it('handles exact boundary correctly', () => {
34+
// "aaa,bbb" = 7 chars. With limit=7, both fit in one batch.
35+
const result = batchByUrlLength(['aaa', 'bbb'], 7);
36+
expect(result).toEqual([['aaa', 'bbb']]);
37+
});
38+
39+
it('starts a new batch when adding one more name would exceed the limit', () => {
40+
// "aaa,bbb" = 7, "aaa,bbb,c" = 9. With limit=8, 'c' goes to next batch.
41+
const result = batchByUrlLength(['aaa', 'bbb', 'c'], 8);
42+
expect(result).toEqual([['aaa', 'bbb'], ['c']]);
43+
});
44+
45+
it('handles typical datastream names with default limit', () => {
46+
// 100 names of ~45 chars each. Joined: 100*45 + 99 commas = 4599 chars.
47+
// With default limit 3000, should produce at least 2 batches.
48+
const names = Array.from(
49+
{ length: 100 },
50+
(_, i) => `logs-elastic_agent.filebeat_input-${String(i).padStart(7, '0')}`
51+
);
52+
const batches = batchByUrlLength(names);
53+
expect(batches.length).toBeGreaterThanOrEqual(2);
54+
// Every name must appear exactly once across all batches
55+
expect(batches.flat()).toEqual(names);
56+
// Each batch's joined length must be <= 3000
57+
for (const batch of batches) {
58+
expect(batch.join(',').length).toBeLessThanOrEqual(3000);
59+
}
60+
});
61+
});
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
const DEFAULT_MAX_JOINED_LENGTH = 3000;
9+
10+
/**
11+
* Splits an array of resource names into batches where each batch's
12+
* comma-joined string stays under {@link maxJoinedLength} characters.
13+
*
14+
* This is used to prevent `too_long_http_line_exception` errors from
15+
* Elasticsearch when many resource names are serialized into URL paths.
16+
*/
17+
export const batchByUrlLength = (
18+
names: string[],
19+
maxJoinedLength: number = DEFAULT_MAX_JOINED_LENGTH
20+
): string[][] => {
21+
if (names.length === 0) {
22+
return [];
23+
}
24+
25+
const batches: string[][] = [];
26+
let currentBatch: string[] = [];
27+
let currentLength = 0;
28+
29+
for (const name of names) {
30+
const addedLength = currentBatch.length === 0 ? name.length : name.length + 1; // +1 for comma
31+
32+
if (currentBatch.length > 0 && currentLength + addedLength > maxJoinedLength) {
33+
batches.push(currentBatch);
34+
currentBatch = [name];
35+
currentLength = name.length;
36+
} else {
37+
currentBatch.push(name);
38+
currentLength += addedLength;
39+
}
40+
}
41+
42+
if (currentBatch.length > 0) {
43+
batches.push(currentBatch);
44+
}
45+
46+
return batches;
47+
};

x-pack/platform/packages/shared/agent-builder/agent-builder-genai-utils/tools/utils/ccs.test.ts

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@
66
*/
77

88
import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
9-
import { isCcsTarget, partitionByCcs, getFieldsFromFieldCaps, getIndexFields } from './ccs';
9+
import {
10+
isCcsTarget,
11+
partitionByCcs,
12+
getFieldsFromFieldCaps,
13+
getBatchedFieldsFromFieldCaps,
14+
getIndexFields,
15+
} from './ccs';
1016
import { getIndexMappings } from './mappings';
1117

1218
describe('isCcsTarget', () => {
@@ -247,3 +253,45 @@ describe('getIndexFields', () => {
247253
expect(esClient.fieldCaps).not.toHaveBeenCalled();
248254
});
249255
});
256+
257+
describe('getBatchedFieldsFromFieldCaps', () => {
258+
it('batches requests when resource names would exceed URL length', async () => {
259+
const resources = Array.from(
260+
{ length: 100 },
261+
(_, i) => `remote_cluster:logs-elastic_agent.input-${String(i).padStart(7, '0')}`
262+
);
263+
264+
const esClient = {
265+
fieldCaps: jest.fn().mockImplementation((params: any) => {
266+
const indexNames = (params.index as string).split(',');
267+
const fields: Record<string, Record<string, any>> = {};
268+
for (const name of indexNames) {
269+
fields[`field_${name}`] = {
270+
keyword: {
271+
type: 'keyword',
272+
searchable: true,
273+
aggregatable: true,
274+
indices: [name],
275+
},
276+
};
277+
}
278+
return Promise.resolve({ indices: indexNames, fields });
279+
}),
280+
} as unknown as ElasticsearchClient;
281+
282+
const result = await getBatchedFieldsFromFieldCaps({ resources, esClient });
283+
284+
expect((esClient.fieldCaps as jest.Mock).mock.calls.length).toBeGreaterThan(1);
285+
286+
for (const call of (esClient.fieldCaps as jest.Mock).mock.calls) {
287+
expect((call[0].index as string).length).toBeLessThanOrEqual(3000);
288+
}
289+
290+
expect(Object.keys(result).length).toBe(100);
291+
for (const name of resources) {
292+
expect(result[name]).toBeDefined();
293+
expect(result[name].length).toBe(1);
294+
expect(result[name][0].path).toBe(`field_${name}`);
295+
}
296+
});
297+
});

x-pack/platform/packages/shared/agent-builder/agent-builder-genai-utils/tools/utils/ccs.ts

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
1010
import type { MappingField } from './mappings';
1111
import { flattenMapping, getIndexMappings } from './mappings';
1212
import { processFieldCapsResponse, processFieldCapsResponsePerIndex } from './field_caps';
13+
import { batchByUrlLength } from './batch_by_url_length';
1314

1415
/**
1516
* Returns true if the resource name targets a remote cluster (contains ':'),
@@ -76,18 +77,32 @@ export const getBatchedFieldsFromFieldCaps = async ({
7677
return {};
7778
}
7879

79-
const fieldCapRes = await esClient.fieldCaps({
80-
index: resources.join(','),
81-
fields: ['*'],
82-
});
80+
const batches = batchByUrlLength(resources);
8381

84-
const perIndex = processFieldCapsResponsePerIndex(fieldCapRes);
82+
const batchResults = await Promise.all(
83+
batches.map(async (batch) => {
84+
const fieldCapRes = await esClient.fieldCaps({
85+
index: batch.join(','),
86+
fields: ['*'],
87+
});
88+
return processFieldCapsResponsePerIndex(fieldCapRes);
89+
})
90+
);
91+
92+
const merged: Record<string, MappingField[]> = {};
93+
for (const batchResult of batchResults) {
94+
for (const [name, fields] of Object.entries(batchResult)) {
95+
merged[name] = fields;
96+
}
97+
}
8598

86-
const result: Record<string, MappingField[]> = {};
8799
for (const name of resources) {
88-
result[name] = perIndex[name] ?? [];
100+
if (!merged[name]) {
101+
merged[name] = [];
102+
}
89103
}
90-
return result;
104+
105+
return merged;
91106
};
92107

93108
export interface IndexFieldsResult {

x-pack/platform/packages/shared/agent-builder/agent-builder-genai-utils/tools/utils/mappings/get_datastream_mappings.test.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,5 +195,51 @@ describe('mappings utilities', () => {
195195

196196
expect(res.data_stream.mappings).toEqual(mappingsA);
197197
});
198+
199+
it('batches requests when datastream names would exceed URL length', async () => {
200+
const datastreams = Array.from(
201+
{ length: 100 },
202+
(_, i) => `logs-elastic_agent.filebeat_input-${String(i).padStart(7, '0')}`
203+
);
204+
205+
const makeResponse = (names: string[]): GetDataStreamMappingsRes => ({
206+
data_streams: names.map((name) => ({
207+
name,
208+
effective_mappings: {
209+
_doc: {
210+
properties: { [`field_${name}`]: { type: 'text' } },
211+
},
212+
},
213+
})),
214+
});
215+
216+
esClient.transport.request.mockImplementation((params: any) => {
217+
const path = params.path as string;
218+
const namesStr = path.replace('/_data_stream/', '').replace('/_mappings', '');
219+
const names = namesStr.split(',');
220+
return Promise.resolve(makeResponse(names));
221+
});
222+
223+
const result = await getDataStreamMappings({
224+
datastreams,
225+
esClient,
226+
cleanup: false,
227+
});
228+
229+
expect(esClient.transport.request.mock.calls.length).toBeGreaterThan(1);
230+
231+
for (const call of esClient.transport.request.mock.calls) {
232+
const path = (call[0] as any).path as string;
233+
expect(path.length).toBeLessThan(4096);
234+
}
235+
236+
expect(Object.keys(result).length).toBe(100);
237+
for (const ds of datastreams) {
238+
expect(result[ds]).toBeDefined();
239+
expect(result[ds].mappings).toEqual({
240+
properties: { [`field_${ds}`]: { type: 'text' } },
241+
});
242+
}
243+
});
198244
});
199245
});

x-pack/platform/packages/shared/agent-builder/agent-builder-genai-utils/tools/utils/mappings/get_datastream_mappings.ts

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import type { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/types';
99
import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
1010
import { cleanupMapping } from './cleanup_mapping';
11+
import { batchByUrlLength } from '../batch_by_url_length';
1112

1213
export interface GetDataStreamMappingEntry {
1314
mappings: MappingTypeMapping;
@@ -42,19 +43,27 @@ export const getDataStreamMappings = async ({
4243
cleanup?: boolean;
4344
esClient: ElasticsearchClient;
4445
}): Promise<GetDataStreamMappingsResults> => {
45-
const response = await esClient.transport.request<GetDataStreamMappingsRes>({
46-
path: `/_data_stream/${datastreams.join(',')}/_mappings`,
47-
method: 'GET',
48-
});
49-
50-
return response.data_streams.reduce((res, datastream) => {
51-
const mappings =
52-
'_doc' in datastream.effective_mappings
53-
? datastream.effective_mappings._doc
54-
: datastream.effective_mappings;
55-
res[datastream.name] = {
56-
mappings: cleanup ? cleanupMapping(mappings) : mappings,
57-
};
58-
return res;
59-
}, {} as GetDataStreamMappingsResults);
46+
const batches = batchByUrlLength(datastreams);
47+
48+
const batchResults = await Promise.all(
49+
batches.map(async (batch) => {
50+
const response = await esClient.transport.request<GetDataStreamMappingsRes>({
51+
path: `/_data_stream/${batch.join(',')}/_mappings`,
52+
method: 'GET',
53+
});
54+
55+
return response.data_streams.reduce((res, datastream) => {
56+
const mappings =
57+
'_doc' in datastream.effective_mappings
58+
? datastream.effective_mappings._doc
59+
: datastream.effective_mappings;
60+
res[datastream.name] = {
61+
mappings: cleanup ? cleanupMapping(mappings) : mappings,
62+
};
63+
return res;
64+
}, {} as GetDataStreamMappingsResults);
65+
})
66+
);
67+
68+
return Object.assign({}, ...batchResults);
6069
};

0 commit comments

Comments
 (0)