elastic
diff --git a/‎.buildkite/pipelines/evals/eval_pipeline.ts‎
Lines changed: 49 additions & 4 deletions b/‎.buildkite/pipelines/evals/eval_pipeline.ts‎
Lines changed: 49 additions & 4 deletions
diff --git a/‎.buildkite/pipelines/evals/llm_evals.yml‎
Lines changed: 34 additions & 30 deletions b/‎.buildkite/pipelines/evals/llm_evals.yml‎
Lines changed: 34 additions & 30 deletions
diff --git a/‎.buildkite/pipelines/pull_request/exploratory_view_plugin.yml‎
Lines changed: 0 additions & 23 deletions b/‎.buildkite/pipelines/pull_request/exploratory_view_plugin.yml‎
Lines changed: 0 additions & 23 deletions
diff --git a/‎.buildkite/scout_ci_config.yml‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/scout_ci_config.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.buildkite/scripts/common/setup_job_env.sh‎
Lines changed: 7 additions & 3 deletions b/‎.buildkite/scripts/common/setup_job_env.sh‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎.buildkite/scripts/lifecycle/pre_build.sh‎
Lines changed: 4 additions & 2 deletions b/‎.buildkite/scripts/lifecycle/pre_build.sh‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎.buildkite/scripts/pipelines/pull_request/pipeline.ts‎
Lines changed: 5 additions & 7 deletions b/‎.buildkite/scripts/pipelines/pull_request/pipeline.ts‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎.buildkite/scripts/steps/api_docs/publish_api_docs.sh‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/scripts/steps/api_docs/publish_api_docs.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.buildkite/scripts/steps/check_saved_objects.sh‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/scripts/steps/check_saved_objects.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.buildkite/scripts/steps/esql_grammar_sync.sh‎
Lines changed: 3 additions & 0 deletions b/‎.buildkite/scripts/steps/esql_grammar_sync.sh‎
Lines changed: 3 additions & 0 deletions
@@ -66,12 +66,31 @@ function parseGithubPrLabels(raw: string): string[] {
     .filter(Boolean);
 }
 
+function normalizeEvaluationConnectorId(raw: string): string {
+  // Support `models:judge:eis/<modelId>` where the judge value is a model id, not a connector id.
+  if (raw.startsWith('eis/')) {
+    return `eis-${normalizeBuildkiteKey(raw.slice('eis/'.length))}`;
+  }
+
+  // Support `models:judge:<modelGroup>` (e.g. `llm-gateway/gpt-5.2`) where the judge value is a model group.
+  if (raw.includes('/')) {
+    return `litellm-${normalizeBuildkiteKey(raw)}`;
+  }
+
+  // Already a connector id (e.g. `litellm-*` / `eis-*`) or some other explicit id.
+  return raw;
+}
+
 function buildEvalsYaml({
   selectedSuites,
   modelGroups,
+  evaluationConnectorId,
+  includeEisModels,
 }: {
   selectedSuites: EvalsSuiteMetadataEntry[];
   modelGroups: string[] | undefined;
+  evaluationConnectorId: string | undefined;
+  includeEisModels: boolean;
 }): string {
   const suiteSteps = selectedSuites
     .map((suite) => {
@@ -81,17 +100,29 @@ function buildEvalsYaml({
         modelGroups && modelGroups.length > 0
           ? `          EVAL_MODEL_GROUPS: '${modelGroups.join(',')}'`
           : null;
+      const evaluationConnectorIdEnv = evaluationConnectorId
+        ? `          EVALUATION_CONNECTOR_ID: '${evaluationConnectorId}'`
+        : null;
+      const includeEisModelsEnv = includeEisModels
+        ? `          EVAL_INCLUDE_EIS_MODELS: '1'`
+        : null;
       return [
         `      - label: '${label}'`,
         `        key: ${key}`,
         `        command: bash .buildkite/scripts/steps/evals/run_suite.sh`,
         `        env:`,
         `          KBN_EVALS: '1'`,
+        `          FTR_EIS_CCM: '1'`,
         `          EVAL_SUITE_ID: '${suite.id}'`,
         `          EVAL_FANOUT: '1'`,
+        ...(evaluationConnectorIdEnv ? [evaluationConnectorIdEnv] : []),
+        ...(includeEisModelsEnv ? [includeEisModelsEnv] : []),
         ...(modelGroupsEnv ? [modelGroupsEnv] : []),
         `        timeout_in_minutes: 60`,
         `        agents:`,
+        `          image: family/kibana-ubuntu-2404`,
+        `          imageProject: elastic-images-prod`,
+        `          provider: gcp`,
         `          machineType: n2-standard-8`,
         `          preemptible: true`,
         `        retry:`,
@@ -127,23 +158,35 @@ function buildEvalsYaml({
  * for the matching eval suites.
  */
 export function getEvalPipeline(githubPrLabels: string): string | null {
+  const parsedLabels = parseGithubPrLabels(githubPrLabels);
+
   // Run eval suite(s) when their GH label(s) are present (see `evals.suites.json`).
   const evalSuites = readEvalsSuiteMetadata();
-  const runAllEvals = githubPrLabels.includes('evals:all');
+  const runAllEvals = parsedLabels.includes('evals:all');
   const selectedEvalSuites = runAllEvals
     ? evalSuites
     : evalSuites.filter((suite) => {
         const labels = suite.ciLabels?.length ? suite.ciLabels : [`evals:${suite.id}`];
-        return labels.some((label) => githubPrLabels.includes(label));
+        return labels.some((label) => parsedLabels.includes(label));
       });
   // Optional model filtering for eval fanout (models:* labels).
   // - No `models:*` labels => run all models returned by LiteLLM (current behavior).
   // - One or more `models:<model-group>` labels => only run connectors whose `defaultModel`
   //   matches one of those model groups.
   // - `models:all` can be used to explicitly opt into all models (ignored if combined with specifics).
-  const parsedLabels = parseGithubPrLabels(githubPrLabels);
+  const rawEvaluationConnectorId = parsedLabels
+    .find((label) => label.startsWith('models:judge:'))
+    ?.slice('models:judge:'.length)
+    ?.trim();
+  const evaluationConnectorId = rawEvaluationConnectorId
+    ? normalizeEvaluationConnectorId(rawEvaluationConnectorId)
+    : undefined;
+  const includeEisModels =
+    parsedLabels.some((label) => label === 'models:all' || label.startsWith('models:eis/')) ||
+    !!rawEvaluationConnectorId?.startsWith('eis/') ||
+    !!evaluationConnectorId?.startsWith('eis-');
   const selectedModelGroups = parsedLabels
-    .filter((label) => label.startsWith('models:'))
+    .filter((label) => label.startsWith('models:') && !label.startsWith('models:judge:'))
     .map((label) => label.slice('models:'.length))
     .map((value) => value.trim())
     .filter(Boolean)
@@ -156,5 +199,7 @@ export function getEvalPipeline(githubPrLabels: string): string | null {
   return buildEvalsYaml({
     selectedSuites: selectedEvalSuites,
     modelGroups: selectedModelGroups.length > 0 ? selectedModelGroups : undefined,
+    evaluationConnectorId,
+    includeEisModels,
   });
 }
@@ -11,6 +11,18 @@ steps:
 
   - wait
 
+  - label: 'Store Cache for build'
+    command: .buildkite/scripts/steps/store_cache.sh
+    timeout_in_minutes: 10
+    id: store_cache
+    soft_fail: true
+    agents:
+      image: family/kibana-ubuntu-2404
+      imageProject: elastic-images-prod
+      provider: gcp
+      machineType: n2-highcpu-8
+      diskSizeGb: 95
+
   - label: '🧑‍🏭 Build Kibana Distribution'
     command: .buildkite/scripts/steps/build_kibana.sh
     agents:
@@ -31,9 +43,17 @@ steps:
         command: bash .buildkite/scripts/steps/evals/run_suite.sh
         env:
           KBN_EVALS: '1'
+          FTR_EIS_CCM: '1'
           EVAL_SUITE_ID: 'agent-builder'
           EVAL_FANOUT: '1'
-          EVAL_MODEL_GROUPS: 'all'
+          EVAL_INCLUDE_EIS_MODELS: '1'
+          # Weekly pipeline model allowlist:
+          # - Default behavior (EVAL_MODEL_GROUPS=all) runs against every discovered LiteLLM + EIS model.
+          # - That is too expensive/noisy for a weekly cadence, so we currently pin to a small EIS allowlist.
+          #
+          # NOTE: Use `eis/<modelId>` values (not connector ids) so we can filter purely on the discovered
+          # EIS model ids in `target/eis_models.json`.
+          EVAL_MODEL_GROUPS: &weekly_eis_model_groups 'eis/anthropic-claude-4.5-sonnet,eis/anthropic-claude-4.6-opus,eis/google-gemini-3.0-flash,eis/google-gemini-3.0-pro,eis/openai-gpt-5.2,eis/openai-gpt-oss-120b'
         timeout_in_minutes: 60
         agents:
           image: family/kibana-ubuntu-2404
@@ -51,9 +71,11 @@ steps:
         command: bash .buildkite/scripts/steps/evals/run_suite.sh
         env:
           KBN_EVALS: '1'
+          FTR_EIS_CCM: '1'
           EVAL_SUITE_ID: 'esql-generation'
           EVAL_FANOUT: '1'
-          EVAL_MODEL_GROUPS: 'all'
+          EVAL_INCLUDE_EIS_MODELS: '1'
+          EVAL_MODEL_GROUPS: *weekly_eis_model_groups
         timeout_in_minutes: 60
         agents:
           image: family/kibana-ubuntu-2404
@@ -68,13 +90,14 @@ steps:
 
       - label: 'Evals: Streams'
         key: kbn-evals-weekly-streams
-        skip: 'Temporarily disabled (requires prerequisite data; will be re-enabled in follow-up PR)'
         command: bash .buildkite/scripts/steps/evals/run_suite.sh
         env:
           KBN_EVALS: '1'
+          FTR_EIS_CCM: '1'
           EVAL_SUITE_ID: 'streams'
           EVAL_FANOUT: '1'
-          EVAL_MODEL_GROUPS: 'all'
+          EVAL_INCLUDE_EIS_MODELS: '1'
+          EVAL_MODEL_GROUPS: *weekly_eis_model_groups
         timeout_in_minutes: 60
         agents:
           image: family/kibana-ubuntu-2404
@@ -92,30 +115,11 @@ steps:
         command: bash .buildkite/scripts/steps/evals/run_suite.sh
         env:
           KBN_EVALS: '1'
+          FTR_EIS_CCM: '1'
           EVAL_SUITE_ID: 'llm-tasks'
           EVAL_FANOUT: '1'
-          EVAL_MODEL_GROUPS: 'all'
-        timeout_in_minutes: 60
-        agents:
-          image: family/kibana-ubuntu-2404
-          imageProject: elastic-images-prod
-          provider: gcp
-          machineType: n2-standard-8
-          preemptible: true
-        retry:
-          automatic:
-            - exit_status: '-1'
-              limit: 3
-
-      - label: 'Evals: Observability AI Assistant'
-        key: kbn-evals-weekly-obs-ai-assistant
-        skip: 'Temporarily disabled (for the sake of cost reduction and low maturity of this eval suite; will be re-enabled in follow-up PR)'
-        command: bash .buildkite/scripts/steps/evals/run_suite.sh
-        env:
-          KBN_EVALS: '1'
-          EVAL_SUITE_ID: 'obs-ai-assistant'
-          EVAL_FANOUT: '1'
-          EVAL_MODEL_GROUPS: 'all'
+          EVAL_INCLUDE_EIS_MODELS: '1'
+          EVAL_MODEL_GROUPS: *weekly_eis_model_groups
         timeout_in_minutes: 60
         agents:
           image: family/kibana-ubuntu-2404
@@ -127,16 +131,16 @@ steps:
           automatic:
             - exit_status: '-1'
               limit: 3
-
-      - label: 'Evals: Observability AI Assistant (AI Insights)'
+      - label: 'Evals: Observability AI Insights'
         key: kbn-evals-weekly-obs-ai-assistant-ai-insights
-        skip: 'Temporarily disabled (requires prerequisite data; will be re-enabled in follow-up PR)'
         command: bash .buildkite/scripts/steps/evals/run_suite.sh
         env:
           KBN_EVALS: '1'
+          FTR_EIS_CCM: '1'
           EVAL_SUITE_ID: 'obs-ai-assistant/ai_insights'
           EVAL_FANOUT: '1'
-          EVAL_MODEL_GROUPS: 'all'
+          EVAL_INCLUDE_EIS_MODELS: '1'
+          EVAL_MODEL_GROUPS: *weekly_eis_model_groups
         timeout_in_minutes: 60
         agents:
           image: family/kibana-ubuntu-2404
 
@@ -11,6 +11,7 @@ plugins:
     - data_views
     - discover_enhanced
     - entity_store
+    - exploratory_view
     - fleet
     - gen_ai_settings
     - global_search
@@ -47,7 +48,6 @@ packages:
     # so they don't rerun alongside plugin/package Scout tests discovered later.
     - kbn-scout
     - kbn-scout-release-testing # Release tests will run separately as part of the release process
-    - kbn-evals-suite-agent-builder # Evaluation suite is run in dedicated eval pipelines, but Scout config must be registered for discovery validation
 
 # Define test configs to be excluded from automatic discovery & execution in CI environment (process.env.CI=true)
 excluded_configs:
 
@@ -193,10 +193,11 @@ EOF
       exit 1
     fi
 
-    # Sanity-check: EVALUATION_CONNECTOR_ID must match a generated connector id
-    if [[ -n "${EVALUATION_CONNECTOR_ID:-}" ]]; then
+    # Sanity-check: when the evaluation connector is expected to be LiteLLM-backed, it must match
+    # a generated LiteLLM connector id. (Non-LiteLLM evaluation connectors may be injected later.)
+    if [[ -n "${EVALUATION_CONNECTOR_ID:-}" ]] && [[ "${EVALUATION_CONNECTOR_ID}" == litellm-* ]]; then
       if ! node -e "const b=process.env.KIBANA_TESTING_AI_CONNECTORS||'';const s=Buffer.from(b,'base64').toString('utf8');const o=JSON.parse(s);const id=process.env.EVALUATION_CONNECTOR_ID;process.exit(Object.prototype.hasOwnProperty.call(o,id)?0:1);" ; then
-        echo "ERROR: EVALUATION_CONNECTOR_ID ($EVALUATION_CONNECTOR_ID) is not present in generated LiteLLM connectors."
+        echo "ERROR: EVALUATION_CONNECTOR_ID ($EVALUATION_CONNECTOR_ID) is not present in generated connectors."
         echo "Sample generated connector ids:"
         node -e "const b=process.env.KIBANA_TESTING_AI_CONNECTORS||'';const s=Buffer.from(b,'base64').toString('utf8');const o=JSON.parse(s);console.log(Object.keys(o).slice(0,20).join('\\n'));"
         exit 1
@@ -216,6 +217,9 @@ EOF
     if [[ -n "$TRACING_EXPORTERS_JSON" && "$TRACING_EXPORTERS_JSON" != "null" ]]; then
       export TRACING_EXPORTERS="$TRACING_EXPORTERS_JSON"
     fi
+
+    # Optional: GCS service account credentials for snapshot restoration (e.g. AI Insights)
+    export GCS_CREDENTIALS="$(jq -c '.gcsDatasetAccessCredentials // empty' <<<"$KBN_EVALS_CONFIG_JSON")"
   fi
 }
 
 
@@ -30,7 +30,9 @@ fi
 # Annotate ingestable meta-data (prefixed with 'ingest:')
 if [[ "${BUILDKITE_PULL_REQUEST_BASE_BRANCH:-}" != "" ]]; then # if we're in a PR build
   # GITHUB_PR_DRAFT is set by our pr build trigger bot
-  buildkite-agent meta-data set "ingest:is_draft_pr" "${GITHUB_PR_DRAFT:-}"
+  buildkite-agent meta-data set "ingest:is_draft_pr" "${GITHUB_PR_DRAFT:-false}"
   # GITHUB_PR_LABELS is set by our pr build trigger bot, and is a comma-separated list of labels on the PR
-  buildkite-agent meta-data set "ingest:pr_labels" "${GITHUB_PR_LABELS:-}"
+  if [[ -n "${GITHUB_PR_LABELS:-}" ]]; then
+    buildkite-agent meta-data set "ingest:pr_labels" "$GITHUB_PR_LABELS"
+  fi
 fi
@@ -129,7 +129,6 @@ const SKIPPABLE_PR_MATCHERS = prConfig.skip_ci_on_only_changed!.map((r) => new R
     ) {
       pipeline.push(getPipeline('.buildkite/pipelines/pull_request/synthetics_plugin.yml'));
       pipeline.push(getPipeline('.buildkite/pipelines/pull_request/uptime_plugin.yml'));
-      pipeline.push(getPipeline('.buildkite/pipelines/pull_request/exploratory_view_plugin.yml'));
       pipeline.push(getPipeline('.buildkite/pipelines/pull_request/ux_plugin_e2e.yml'));
     }
 
@@ -144,10 +143,10 @@ const SKIPPABLE_PR_MATCHERS = prConfig.skip_ci_on_only_changed!.map((r) => new R
       /^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/openai/,
       /^x-pack\/platform\/plugins\/shared\/stack_connectors\/server\/connector_types\/inference/,
     ];
-    // const agentBuilderPaths = [
-    //   /^x-pack\/platform\/plugins\/shared\/agent_builder/,
-    //   /^x-pack\/platform\/packages\/shared\/agent_builder/,
-    // ];
+    const agentBuilderPaths = [
+      /^x-pack\/platform\/plugins\/shared\/agent_builder/,
+      /^x-pack\/platform\/packages\/shared\/agent_builder/,
+    ];
 
     if (
       (await doAnyChangesMatch([...aiInfraPaths, ...aiConnectorPaths])) ||
@@ -157,9 +156,8 @@ const SKIPPABLE_PR_MATCHERS = prConfig.skip_ci_on_only_changed!.map((r) => new R
       pipeline.push(getPipeline('.buildkite/pipelines/pull_request/ai_infra_gen_ai.yml'));
     }
 
-    // Temporarily disable auto-trigger on file changes - smoke tests still run daily
     if (
-      // (await doAnyChangesMatch([...aiInfraPaths, ...aiConnectorPaths, ...agentBuilderPaths])) ||
+      (await doAnyChangesMatch([...aiInfraPaths, ...aiConnectorPaths, ...agentBuilderPaths])) ||
       GITHUB_PR_LABELS.includes('agent-builder:run-smoke-tests') ||
       GITHUB_PR_LABELS.includes('ci:all-gen-ai-suites') ||
       ALL_UI_TEST_SUITES
 
@@ -26,6 +26,6 @@ git commit -m "[api-docs] Daily api_docs build"
 
 git push origin "$branch"
 
-prUrl=$(gh pr create --repo elastic/kibana --base main --head "$branch" --title "[api-docs] $(date +%F) Daily api_docs build" --body "Generated by $BUILDKITE_BUILD_URL" --label "release_note:skip" --label "docs")
+prUrl=$(gh pr create --repo elastic/kibana --base main --head "$branch" --title "[api-docs] $(date +%F) Daily api_docs build" --body "Generated by $BUILDKITE_BUILD_URL" --label "release_note:skip" --label "backport:skip" --label "docs")
 echo "Opened PR: $prUrl"
 gh pr merge --repo elastic/kibana --auto --squash "$prUrl"
@@ -53,7 +53,7 @@ if is_pr; then
   # First, we try to obtain its SHA (or one of its ancestors)
   MERGE_BASE_REV="$(findExistingSnapshotSha "$GITHUB_PR_MERGE_BASE")"
   if [[ $? -ne 0 ]]; then
-    echo "❌ Could not find an existing snapshot to use as a baseline. Aborting Saved Objects checks" >&2
+    echo "❌ Could not find an existing snapshot to use as a baseline. Please rebase this PR branch onto the latest 'main' commit, then rerun CI." >&2
     exit 1
   fi
 
 
@@ -183,6 +183,9 @@ main () {
 
   # Note: We run build commands directly instead of `yarn build:antlr4` to skip
   # the prebuild:antlr4 hook which uses `brew` (macOS only). CI has antlr installed.
+  # Pin the ANTLR version to avoid the broken Sonatype Central version-lookup API
+  # in antlr4-tools (https://github.com/antlr/antlr4-tools/issues/18).
+  export ANTLR4_TOOLS_ANTLR_VERSION="4.13.2"
   cd ./src/platform/packages/shared/kbn-esql-language
   yarn build:antlr4:esql
   yarn build:antlr4:promql