Skip to content

[CI] MlDistributedFailureIT testFullClusterRestart failing #96822

@thecoop

Description

@thecoop

Build scan:
https://gradle-enterprise.elastic.co/s/kecgvgpmpwq24/tests/:x-pack:plugin:ml:internalClusterTest/org.elasticsearch.xpack.ml.integration.MlDistributedFailureIT/testFullClusterRestart

Reproduction line:

./gradlew ':x-pack:plugin:ml:internalClusterTest' --tests "org.elasticsearch.xpack.ml.integration.MlDistributedFailureIT.testFullClusterRestart" -Dtests.seed=AD23039C7FA58116 -Dtests.locale=ar-YE -Dtests.timezone=Europe/Oslo -Druntime.java=20

Applicable branches:
main

Reproduces locally?:
No

Failure history:
https://gradle-enterprise.elastic.co/scans/tests?tests.container=org.elasticsearch.xpack.ml.integration.MlDistributedFailureIT&tests.test=testFullClusterRestart

Failure excerpt:

com.carrotsearch.randomizedtesting.UncaughtExceptionError: Captured an uncaught exception in thread: Thread[id=1111, name=elasticsearch[node_t1][write][T#1], state=RUNNABLE, group=TGRP-MlDistributedFailureIT]

  at __randomizedtesting.SeedInfo.seed([AD23039C7FA58116:FC0C128D8A89C9B9]:0)

  Caused by: java.lang.AssertionError: [.ml-anomalies-shared/4MF2yJ9FQcejJY8XRQPa6Q][[.ml-anomalies-shared][0]] org.elasticsearch.index.shard.IndexShardClosedException: CurrentState[CLOSED] operation only allowed when not closed

    at __randomizedtesting.SeedInfo.seed([AD23039C7FA58116]:0)
    at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:321)
    at org.elasticsearch.index.engine.Engine.externalRefresh(Engine.java:1044)
    at org.elasticsearch.index.shard.IndexShard.externalRefresh(IndexShard.java:1236)
    at org.elasticsearch.action.support.replication.PostWriteRefresh.immediate(PostWriteRefresh.java:101)
    at org.elasticsearch.action.support.replication.PostWriteRefresh.refreshShard(PostWriteRefresh.java:65)
    at org.elasticsearch.action.support.replication.TransportWriteAction$AsyncAfterWriteAction.run(TransportWriteAction.java:505)
    at org.elasticsearch.action.support.replication.TransportWriteAction$WritePrimaryResult.runPostReplicationActions(TransportWriteAction.java:318)
    at org.elasticsearch.action.support.replication.ReplicationOperation.handlePrimaryResult(ReplicationOperation.java:173)
    at org.elasticsearch.action.ActionListener$2.onResponse(ActionListener.java:169)
    at org.elasticsearch.action.ActionListenerImplementations$MappedActionListener.onResponse(ActionListenerImplementations.java:94)
    at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:319)
    at org.elasticsearch.action.bulk.TransportShardBulkAction$2.finishRequest(TransportShardBulkAction.java:273)
    at org.elasticsearch.action.bulk.TransportShardBulkAction$2.doRun(TransportShardBulkAction.java:235)
    at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
    at org.elasticsearch.action.bulk.TransportShardBulkAction.performOnPrimary(TransportShardBulkAction.java:286)
    at org.elasticsearch.action.bulk.TransportShardBulkAction.dispatchedShardOperationOnPrimary(TransportShardBulkAction.java:137)
    at org.elasticsearch.action.bulk.TransportShardBulkAction.dispatchedShardOperationOnPrimary(TransportShardBulkAction.java:74)
    at org.elasticsearch.action.support.replication.TransportWriteAction$1.doRun(TransportWriteAction.java:215)
    at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:983)
    at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
    at java.lang.Thread.run(Thread.java:1623)

    Caused by: org.elasticsearch.index.shard.IndexShardClosedException: CurrentState[CLOSED] operation only allowed when not closed

      at org.elasticsearch.index.shard.IndexShard.verifyNotClosed(IndexShard.java:2190)
      at org.elasticsearch.index.shard.IndexShard.verifyNotClosed(IndexShard.java:2184)
      at org.elasticsearch.index.shard.IndexShard.getReplicationGroup(IndexShard.java:2777)
      at org.elasticsearch.action.support.replication.PostWriteRefresh$2.onResponse(PostWriteRefresh.java:70)
      at org.elasticsearch.action.support.replication.PostWriteRefresh$2.onResponse(PostWriteRefresh.java:65)
      at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:319)
      at org.elasticsearch.index.engine.Engine.externalRefresh(Engine.java:1044)
      at org.elasticsearch.index.shard.IndexShard.externalRefresh(IndexShard.java:1236)
      at org.elasticsearch.action.support.replication.PostWriteRefresh.immediate(PostWriteRefresh.java:101)
      at org.elasticsearch.action.support.replication.PostWriteRefresh.refreshShard(PostWriteRefresh.java:65)
      at org.elasticsearch.action.support.replication.TransportWriteAction$AsyncAfterWriteAction.run(TransportWriteAction.java:505)
      at org.elasticsearch.action.support.replication.TransportWriteAction$WritePrimaryResult.runPostReplicationActions(TransportWriteAction.java:318)
      at org.elasticsearch.action.support.replication.ReplicationOperation.handlePrimaryResult(ReplicationOperation.java:173)
      at org.elasticsearch.action.ActionListener$2.onResponse(ActionListener.java:169)
      at org.elasticsearch.action.ActionListenerImplementations$MappedActionListener.onResponse(ActionListenerImplementations.java:94)
      at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:319)
      at org.elasticsearch.action.bulk.TransportShardBulkAction$2.finishRequest(TransportShardBulkAction.java:273)
      at org.elasticsearch.action.bulk.TransportShardBulkAction$2.doRun(TransportShardBulkAction.java:235)
      at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
      at org.elasticsearch.action.bulk.TransportShardBulkAction.performOnPrimary(TransportShardBulkAction.java:286)
      at org.elasticsearch.action.bulk.TransportShardBulkAction.dispatchedShardOperationOnPrimary(TransportShardBulkAction.java:137)
      at org.elasticsearch.action.bulk.TransportShardBulkAction.dispatchedShardOperationOnPrimary(TransportShardBulkAction.java:74)
      at org.elasticsearch.action.support.replication.TransportWriteAction$1.doRun(TransportWriteAction.java:215)
      at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:983)
      at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:26)
      at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
      at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
      at java.lang.Thread.run(Thread.java:1623)

Metadata

Metadata

Assignees

No one assigned

    Labels

    :mlMachine learning>test-failureTriaged test failures from CITeam:MLMeta label for the ML team

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions