Skip to content

Commit 7a02984

Browse files
authored
Merge branch 'main' into feat/docs-utils/4-returns
2 parents 076b882 + 508a667 commit 7a02984

3,350 files changed

Lines changed: 178721 additions & 73457 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.buildkite/pipelines/evals/eval_pipeline.ts

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,31 @@ function parseGithubPrLabels(raw: string): string[] {
6666
.filter(Boolean);
6767
}
6868

69+
function normalizeEvaluationConnectorId(raw: string): string {
70+
// Support `models:judge:eis/<modelId>` where the judge value is a model id, not a connector id.
71+
if (raw.startsWith('eis/')) {
72+
return `eis-${normalizeBuildkiteKey(raw.slice('eis/'.length))}`;
73+
}
74+
75+
// Support `models:judge:<modelGroup>` (e.g. `llm-gateway/gpt-5.2`) where the judge value is a model group.
76+
if (raw.includes('/')) {
77+
return `litellm-${normalizeBuildkiteKey(raw)}`;
78+
}
79+
80+
// Already a connector id (e.g. `litellm-*` / `eis-*`) or some other explicit id.
81+
return raw;
82+
}
83+
6984
function buildEvalsYaml({
7085
selectedSuites,
7186
modelGroups,
87+
evaluationConnectorId,
88+
includeEisModels,
7289
}: {
7390
selectedSuites: EvalsSuiteMetadataEntry[];
7491
modelGroups: string[] | undefined;
92+
evaluationConnectorId: string | undefined;
93+
includeEisModels: boolean;
7594
}): string {
7695
const suiteSteps = selectedSuites
7796
.map((suite) => {
@@ -81,17 +100,29 @@ function buildEvalsYaml({
81100
modelGroups && modelGroups.length > 0
82101
? ` EVAL_MODEL_GROUPS: '${modelGroups.join(',')}'`
83102
: null;
103+
const evaluationConnectorIdEnv = evaluationConnectorId
104+
? ` EVALUATION_CONNECTOR_ID: '${evaluationConnectorId}'`
105+
: null;
106+
const includeEisModelsEnv = includeEisModels
107+
? ` EVAL_INCLUDE_EIS_MODELS: '1'`
108+
: null;
84109
return [
85110
` - label: '${label}'`,
86111
` key: ${key}`,
87112
` command: bash .buildkite/scripts/steps/evals/run_suite.sh`,
88113
` env:`,
89114
` KBN_EVALS: '1'`,
115+
` FTR_EIS_CCM: '1'`,
90116
` EVAL_SUITE_ID: '${suite.id}'`,
91117
` EVAL_FANOUT: '1'`,
118+
...(evaluationConnectorIdEnv ? [evaluationConnectorIdEnv] : []),
119+
...(includeEisModelsEnv ? [includeEisModelsEnv] : []),
92120
...(modelGroupsEnv ? [modelGroupsEnv] : []),
93121
` timeout_in_minutes: 60`,
94122
` agents:`,
123+
` image: family/kibana-ubuntu-2404`,
124+
` imageProject: elastic-images-prod`,
125+
` provider: gcp`,
95126
` machineType: n2-standard-8`,
96127
` preemptible: true`,
97128
` retry:`,
@@ -127,23 +158,35 @@ function buildEvalsYaml({
127158
* for the matching eval suites.
128159
*/
129160
export function getEvalPipeline(githubPrLabels: string): string | null {
161+
const parsedLabels = parseGithubPrLabels(githubPrLabels);
162+
130163
// Run eval suite(s) when their GH label(s) are present (see `evals.suites.json`).
131164
const evalSuites = readEvalsSuiteMetadata();
132-
const runAllEvals = githubPrLabels.includes('evals:all');
165+
const runAllEvals = parsedLabels.includes('evals:all');
133166
const selectedEvalSuites = runAllEvals
134167
? evalSuites
135168
: evalSuites.filter((suite) => {
136169
const labels = suite.ciLabels?.length ? suite.ciLabels : [`evals:${suite.id}`];
137-
return labels.some((label) => githubPrLabels.includes(label));
170+
return labels.some((label) => parsedLabels.includes(label));
138171
});
139172
// Optional model filtering for eval fanout (models:* labels).
140173
// - No `models:*` labels => run all models returned by LiteLLM (current behavior).
141174
// - One or more `models:<model-group>` labels => only run connectors whose `defaultModel`
142175
// matches one of those model groups.
143176
// - `models:all` can be used to explicitly opt into all models (ignored if combined with specifics).
144-
const parsedLabels = parseGithubPrLabels(githubPrLabels);
177+
const rawEvaluationConnectorId = parsedLabels
178+
.find((label) => label.startsWith('models:judge:'))
179+
?.slice('models:judge:'.length)
180+
?.trim();
181+
const evaluationConnectorId = rawEvaluationConnectorId
182+
? normalizeEvaluationConnectorId(rawEvaluationConnectorId)
183+
: undefined;
184+
const includeEisModels =
185+
parsedLabels.some((label) => label === 'models:all' || label.startsWith('models:eis/')) ||
186+
!!rawEvaluationConnectorId?.startsWith('eis/') ||
187+
!!evaluationConnectorId?.startsWith('eis-');
145188
const selectedModelGroups = parsedLabels
146-
.filter((label) => label.startsWith('models:'))
189+
.filter((label) => label.startsWith('models:') && !label.startsWith('models:judge:'))
147190
.map((label) => label.slice('models:'.length))
148191
.map((value) => value.trim())
149192
.filter(Boolean)
@@ -156,5 +199,7 @@ export function getEvalPipeline(githubPrLabels: string): string | null {
156199
return buildEvalsYaml({
157200
selectedSuites: selectedEvalSuites,
158201
modelGroups: selectedModelGroups.length > 0 ? selectedModelGroups : undefined,
202+
evaluationConnectorId,
203+
includeEisModels,
159204
});
160205
}

.buildkite/pipelines/evals/llm_evals.yml

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,18 @@ steps:
1111

1212
- wait
1313

14+
- label: 'Store Cache for build'
15+
command: .buildkite/scripts/steps/store_cache.sh
16+
timeout_in_minutes: 10
17+
id: store_cache
18+
soft_fail: true
19+
agents:
20+
image: family/kibana-ubuntu-2404
21+
imageProject: elastic-images-prod
22+
provider: gcp
23+
machineType: n2-highcpu-8
24+
diskSizeGb: 95
25+
1426
- label: '🧑‍🏭 Build Kibana Distribution'
1527
command: .buildkite/scripts/steps/build_kibana.sh
1628
agents:
@@ -31,9 +43,17 @@ steps:
3143
command: bash .buildkite/scripts/steps/evals/run_suite.sh
3244
env:
3345
KBN_EVALS: '1'
46+
FTR_EIS_CCM: '1'
3447
EVAL_SUITE_ID: 'agent-builder'
3548
EVAL_FANOUT: '1'
36-
EVAL_MODEL_GROUPS: 'all'
49+
EVAL_INCLUDE_EIS_MODELS: '1'
50+
# Weekly pipeline model allowlist:
51+
# - Default behavior (EVAL_MODEL_GROUPS=all) runs against every discovered LiteLLM + EIS model.
52+
# - That is too expensive/noisy for a weekly cadence, so we currently pin to a small EIS allowlist.
53+
#
54+
# NOTE: Use `eis/<modelId>` values (not connector ids) so we can filter purely on the discovered
55+
# EIS model ids in `target/eis_models.json`.
56+
EVAL_MODEL_GROUPS: &weekly_eis_model_groups 'eis/anthropic-claude-4.5-sonnet,eis/anthropic-claude-4.6-opus,eis/google-gemini-3.0-flash,eis/google-gemini-3.0-pro,eis/openai-gpt-5.2,eis/openai-gpt-oss-120b'
3757
timeout_in_minutes: 60
3858
agents:
3959
image: family/kibana-ubuntu-2404
@@ -51,9 +71,11 @@ steps:
5171
command: bash .buildkite/scripts/steps/evals/run_suite.sh
5272
env:
5373
KBN_EVALS: '1'
74+
FTR_EIS_CCM: '1'
5475
EVAL_SUITE_ID: 'esql-generation'
5576
EVAL_FANOUT: '1'
56-
EVAL_MODEL_GROUPS: 'all'
77+
EVAL_INCLUDE_EIS_MODELS: '1'
78+
EVAL_MODEL_GROUPS: *weekly_eis_model_groups
5779
timeout_in_minutes: 60
5880
agents:
5981
image: family/kibana-ubuntu-2404
@@ -68,13 +90,14 @@ steps:
6890

6991
- label: 'Evals: Streams'
7092
key: kbn-evals-weekly-streams
71-
skip: 'Temporarily disabled (requires prerequisite data; will be re-enabled in follow-up PR)'
7293
command: bash .buildkite/scripts/steps/evals/run_suite.sh
7394
env:
7495
KBN_EVALS: '1'
96+
FTR_EIS_CCM: '1'
7597
EVAL_SUITE_ID: 'streams'
7698
EVAL_FANOUT: '1'
77-
EVAL_MODEL_GROUPS: 'all'
99+
EVAL_INCLUDE_EIS_MODELS: '1'
100+
EVAL_MODEL_GROUPS: *weekly_eis_model_groups
78101
timeout_in_minutes: 60
79102
agents:
80103
image: family/kibana-ubuntu-2404
@@ -92,30 +115,11 @@ steps:
92115
command: bash .buildkite/scripts/steps/evals/run_suite.sh
93116
env:
94117
KBN_EVALS: '1'
118+
FTR_EIS_CCM: '1'
95119
EVAL_SUITE_ID: 'llm-tasks'
96120
EVAL_FANOUT: '1'
97-
EVAL_MODEL_GROUPS: 'all'
98-
timeout_in_minutes: 60
99-
agents:
100-
image: family/kibana-ubuntu-2404
101-
imageProject: elastic-images-prod
102-
provider: gcp
103-
machineType: n2-standard-8
104-
preemptible: true
105-
retry:
106-
automatic:
107-
- exit_status: '-1'
108-
limit: 3
109-
110-
- label: 'Evals: Observability AI Assistant'
111-
key: kbn-evals-weekly-obs-ai-assistant
112-
skip: 'Temporarily disabled (for the sake of cost reduction and low maturity of this eval suite; will be re-enabled in follow-up PR)'
113-
command: bash .buildkite/scripts/steps/evals/run_suite.sh
114-
env:
115-
KBN_EVALS: '1'
116-
EVAL_SUITE_ID: 'obs-ai-assistant'
117-
EVAL_FANOUT: '1'
118-
EVAL_MODEL_GROUPS: 'all'
121+
EVAL_INCLUDE_EIS_MODELS: '1'
122+
EVAL_MODEL_GROUPS: *weekly_eis_model_groups
119123
timeout_in_minutes: 60
120124
agents:
121125
image: family/kibana-ubuntu-2404
@@ -127,16 +131,16 @@ steps:
127131
automatic:
128132
- exit_status: '-1'
129133
limit: 3
130-
131-
- label: 'Evals: Observability AI Assistant (AI Insights)'
134+
- label: 'Evals: Observability AI Insights'
132135
key: kbn-evals-weekly-obs-ai-assistant-ai-insights
133-
skip: 'Temporarily disabled (requires prerequisite data; will be re-enabled in follow-up PR)'
134136
command: bash .buildkite/scripts/steps/evals/run_suite.sh
135137
env:
136138
KBN_EVALS: '1'
139+
FTR_EIS_CCM: '1'
137140
EVAL_SUITE_ID: 'obs-ai-assistant/ai_insights'
138141
EVAL_FANOUT: '1'
139-
EVAL_MODEL_GROUPS: 'all'
142+
EVAL_INCLUDE_EIS_MODELS: '1'
143+
EVAL_MODEL_GROUPS: *weekly_eis_model_groups
140144
timeout_in_minutes: 60
141145
agents:
142146
image: family/kibana-ubuntu-2404

.buildkite/pipelines/pull_request/exploratory_view_plugin.yml

Lines changed: 0 additions & 23 deletions
This file was deleted.

.buildkite/scout_ci_config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ plugins:
1111
- data_views
1212
- discover_enhanced
1313
- entity_store
14+
- exploratory_view
1415
- fleet
1516
- gen_ai_settings
1617
- global_search
@@ -47,7 +48,6 @@ packages:
4748
# so they don't rerun alongside plugin/package Scout tests discovered later.
4849
- kbn-scout
4950
- kbn-scout-release-testing # Release tests will run separately as part of the release process
50-
- kbn-evals-suite-agent-builder # Evaluation suite is run in dedicated eval pipelines, but Scout config must be registered for discovery validation
5151

5252
# Define test configs to be excluded from automatic discovery & execution in CI environment (process.env.CI=true)
5353
excluded_configs:

.buildkite/scripts/common/setup_job_env.sh

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,11 @@ EOF
193193
exit 1
194194
fi
195195

196-
# Sanity-check: EVALUATION_CONNECTOR_ID must match a generated connector id
197-
if [[ -n "${EVALUATION_CONNECTOR_ID:-}" ]]; then
196+
# Sanity-check: when the evaluation connector is expected to be LiteLLM-backed, it must match
197+
# a generated LiteLLM connector id. (Non-LiteLLM evaluation connectors may be injected later.)
198+
if [[ -n "${EVALUATION_CONNECTOR_ID:-}" ]] && [[ "${EVALUATION_CONNECTOR_ID}" == litellm-* ]]; then
198199
if ! node -e "const b=process.env.KIBANA_TESTING_AI_CONNECTORS||'';const s=Buffer.from(b,'base64').toString('utf8');const o=JSON.parse(s);const id=process.env.EVALUATION_CONNECTOR_ID;process.exit(Object.prototype.hasOwnProperty.call(o,id)?0:1);" ; then
199-
echo "ERROR: EVALUATION_CONNECTOR_ID ($EVALUATION_CONNECTOR_ID) is not present in generated LiteLLM connectors."
200+
echo "ERROR: EVALUATION_CONNECTOR_ID ($EVALUATION_CONNECTOR_ID) is not present in generated connectors."
200201
echo "Sample generated connector ids:"
201202
node -e "const b=process.env.KIBANA_TESTING_AI_CONNECTORS||'';const s=Buffer.from(b,'base64').toString('utf8');const o=JSON.parse(s);console.log(Object.keys(o).slice(0,20).join('\\n'));"
202203
exit 1
@@ -216,6 +217,9 @@ EOF
216217
if [[ -n "$TRACING_EXPORTERS_JSON" && "$TRACING_EXPORTERS_JSON" != "null" ]]; then
217218
export TRACING_EXPORTERS="$TRACING_EXPORTERS_JSON"
218219
fi
220+
221+
# Optional: GCS service account credentials for snapshot restoration (e.g. AI Insights)
222+
export GCS_CREDENTIALS="$(jq -c '.gcsDatasetAccessCredentials // empty' <<<"$KBN_EVALS_CONFIG_JSON")"
219223
fi
220224
}
221225

.buildkite/scripts/lifecycle/pre_build.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ fi
3030
# Annotate ingestable meta-data (prefixed with 'ingest:')
3131
if [[ "${BUILDKITE_PULL_REQUEST_BASE_BRANCH:-}" != "" ]]; then # if we're in a PR build
3232
# GITHUB_PR_DRAFT is set by our pr build trigger bot
33-
buildkite-agent meta-data set "ingest:is_draft_pr" "${GITHUB_PR_DRAFT:-}"
33+
buildkite-agent meta-data set "ingest:is_draft_pr" "${GITHUB_PR_DRAFT:-false}"
3434
# GITHUB_PR_LABELS is set by our pr build trigger bot, and is a comma-separated list of labels on the PR
35-
buildkite-agent meta-data set "ingest:pr_labels" "${GITHUB_PR_LABELS:-}"
35+
if [[ -n "${GITHUB_PR_LABELS:-}" ]]; then
36+
buildkite-agent meta-data set "ingest:pr_labels" "$GITHUB_PR_LABELS"
37+
fi
3638
fi

.buildkite/scripts/pipelines/pull_request/pipeline.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ const SKIPPABLE_PR_MATCHERS = prConfig.skip_ci_on_only_changed!.map((r) => new R
129129
) {
130130
pipeline.push(getPipeline('.buildkite/pipelines/pull_request/synthetics_plugin.yml'));
131131
pipeline.push(getPipeline('.buildkite/pipelines/pull_request/uptime_plugin.yml'));
132-
pipeline.push(getPipeline('.buildkite/pipelines/pull_request/exploratory_view_plugin.yml'));
133132
pipeline.push(getPipeline('.buildkite/pipelines/pull_request/ux_plugin_e2e.yml'));
134133
}
135134

@@ -144,10 +143,10 @@ const SKIPPABLE_PR_MATCHERS = prConfig.skip_ci_on_only_changed!.map((r) => new R
144143
/^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/openai/,
145144
/^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/inference/,
146145
];
147-
// const agentBuilderPaths = [
148-
// /^x-pack\/platform\/plugins\/shared\/agent_builder/,
149-
// /^x-pack\/platform\/packages\/shared\/agent_builder/,
150-
// ];
146+
const agentBuilderPaths = [
147+
/^x-pack\/platform\/plugins\/shared\/agent_builder/,
148+
/^x-pack\/platform\/packages\/shared\/agent_builder/,
149+
];
151150

152151
if (
153152
(await doAnyChangesMatch([...aiInfraPaths, ...aiConnectorPaths])) ||
@@ -157,9 +156,8 @@ const SKIPPABLE_PR_MATCHERS = prConfig.skip_ci_on_only_changed!.map((r) => new R
157156
pipeline.push(getPipeline('.buildkite/pipelines/pull_request/ai_infra_gen_ai.yml'));
158157
}
159158

160-
// Temporarily disable auto-trigger on file changes - smoke tests still run daily
161159
if (
162-
// (await doAnyChangesMatch([...aiInfraPaths, ...aiConnectorPaths, ...agentBuilderPaths])) ||
160+
(await doAnyChangesMatch([...aiInfraPaths, ...aiConnectorPaths, ...agentBuilderPaths])) ||
163161
GITHUB_PR_LABELS.includes('agent-builder:run-smoke-tests') ||
164162
GITHUB_PR_LABELS.includes('ci:all-gen-ai-suites') ||
165163
ALL_UI_TEST_SUITES

.buildkite/scripts/steps/api_docs/publish_api_docs.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,6 @@ git commit -m "[api-docs] Daily api_docs build"
2626

2727
git push origin "$branch"
2828

29-
prUrl=$(gh pr create --repo elastic/kibana --base main --head "$branch" --title "[api-docs] $(date +%F) Daily api_docs build" --body "Generated by $BUILDKITE_BUILD_URL" --label "release_note:skip" --label "docs")
29+
prUrl=$(gh pr create --repo elastic/kibana --base main --head "$branch" --title "[api-docs] $(date +%F) Daily api_docs build" --body "Generated by $BUILDKITE_BUILD_URL" --label "release_note:skip" --label "backport:skip" --label "docs")
3030
echo "Opened PR: $prUrl"
3131
gh pr merge --repo elastic/kibana --auto --squash "$prUrl"

.buildkite/scripts/steps/check_saved_objects.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ if is_pr; then
5353
# First, we try to obtain its SHA (or one of its ancestors)
5454
MERGE_BASE_REV="$(findExistingSnapshotSha "$GITHUB_PR_MERGE_BASE")"
5555
if [[ $? -ne 0 ]]; then
56-
echo "❌ Could not find an existing snapshot to use as a baseline. Aborting Saved Objects checks" >&2
56+
echo "❌ Could not find an existing snapshot to use as a baseline. Please rebase this PR branch onto the latest 'main' commit, then rerun CI." >&2
5757
exit 1
5858
fi
5959

.buildkite/scripts/steps/esql_grammar_sync.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ main () {
183183

184184
# Note: We run build commands directly instead of `yarn build:antlr4` to skip
185185
# the prebuild:antlr4 hook which uses `brew` (macOS only). CI has antlr installed.
186+
# Pin the ANTLR version to avoid the broken Sonatype Central version-lookup API
187+
# in antlr4-tools (https://github.com/antlr/antlr4-tools/issues/18).
188+
export ANTLR4_TOOLS_ANTLR_VERSION="4.13.2"
186189
cd ./src/platform/packages/shared/kbn-esql-language
187190
yarn build:antlr4:esql
188191
yarn build:antlr4:promql

0 commit comments

Comments
 (0)