Skip to content

Commit 8901a99

Browse files
authored
Use query param instead of a system property for opting in for new cluster health response code (#79351)
The original change was implemented in #78940, bu we have decided to move from a system property to an a request parameter, so Cloud users/clients have an easier way to opt-in for the new status code. Relates #70849
1 parent 74cce57 commit 8901a99

14 files changed

Lines changed: 112 additions & 58 deletions

File tree

client/rest-high-level/src/test/java/org/elasticsearch/client/ClusterClientIT.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@
3737
import org.elasticsearch.common.compress.CompressedXContent;
3838
import org.elasticsearch.common.settings.Settings;
3939
import org.elasticsearch.common.unit.ByteSizeUnit;
40-
import org.elasticsearch.core.TimeValue;
41-
import org.elasticsearch.xcontent.XContentType;
4240
import org.elasticsearch.common.xcontent.support.XContentMapValues;
41+
import org.elasticsearch.core.TimeValue;
4342
import org.elasticsearch.indices.recovery.RecoverySettings;
4443
import org.elasticsearch.rest.RestStatus;
4544
import org.elasticsearch.transport.RemoteClusterService;
4645
import org.elasticsearch.transport.SniffConnectionStrategy;
46+
import org.elasticsearch.xcontent.XContentType;
4747

4848
import java.io.IOException;
4949
import java.util.HashMap;
@@ -316,7 +316,7 @@ public void testClusterHealthNotFoundIndex() throws IOException {
316316
assertThat(response.getStatus(), equalTo(ClusterHealthStatus.RED));
317317
assertNoIndices(response);
318318
assertWarnings("The HTTP status code for a cluster health timeout will be changed from 408 to 200 in a " +
319-
"future version. Set the [es.cluster_health.request_timeout_200] system property to [true] to suppress this message and " +
319+
"future version. Set the [return_200_for_cluster_health_timeout] query parameter to [true] to suppress this message and " +
320320
"opt in to the future behaviour now.");
321321
}
322322

docs/reference/cluster/health.asciidoc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms]
9797
provided or better, i.e. `green` > `yellow` > `red`. By default, will not
9898
wait for any status.
9999

100+
`return_200_for_cluster_health_timeout`::
101+
(Optional, Boolean) A boolean value which controls whether to return HTTP 200
102+
status code instead of HTTP 408 in case of a cluster health timeout from
103+
the server side. Defaults to false.
104+
100105
[[cluster-health-api-response-body]]
101106
==== {api-response-body-title}
102107

rest-api-spec/src/main/resources/rest-api-spec/api/cluster.health.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@
102102
"red"
103103
],
104104
"description":"Wait until cluster is in a specific state"
105+
},
106+
"return_200_for_cluster_health_timeout":{
107+
"type":"boolean",
108+
"description":"Whether to return HTTP 200 instead of 408 in case of a cluster health timeout from the server side"
105109
}
106110
}
107111
}

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.health/20_request_timeout.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,25 @@
3535
- match: { initializing_shards: 0 }
3636
- match: { unassigned_shards: 0 }
3737
- gte: { number_of_pending_tasks: 0 }
38+
39+
---
40+
"cluster health request timeout with 200 response code":
41+
- skip:
42+
version: " - 7.99.99"
43+
reason: "return_200_for_cluster_health_timeout exists only in 8.0.0; re-enable in 7.16+ when back-ported"
44+
- do:
45+
cluster.health:
46+
timeout: 1ms
47+
wait_for_active_shards: 5
48+
return_200_for_cluster_health_timeout: true
49+
50+
- is_true: cluster_name
51+
- is_true: timed_out
52+
- gte: { number_of_nodes: 1 }
53+
- gte: { number_of_data_nodes: 1 }
54+
- match: { active_primary_shards: 0 }
55+
- match: { active_shards: 0 }
56+
- match: { relocating_shards: 0 }
57+
- match: { initializing_shards: 0 }
58+
- match: { unassigned_shards: 0 }
59+
- gte: { number_of_pending_tasks: 0 }

server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequest.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ public class ClusterHealthRequest extends MasterNodeReadRequest<ClusterHealthReq
3535
private ActiveShardCount waitForActiveShards = ActiveShardCount.NONE;
3636
private String waitForNodes = "";
3737
private Priority waitForEvents = null;
38+
private boolean return200ForClusterHealthTimeout;
39+
3840
/**
3941
* Only used by the high-level REST Client. Controls the details level of the health information returned.
4042
* The default value is 'cluster'.
@@ -67,6 +69,9 @@ public ClusterHealthRequest(StreamInput in) throws IOException {
6769
} else {
6870
indicesOptions = IndicesOptions.lenientExpandOpen();
6971
}
72+
if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
73+
return200ForClusterHealthTimeout = in.readBoolean();
74+
}
7075
}
7176

7277
@Override
@@ -97,6 +102,11 @@ public void writeTo(StreamOutput out) throws IOException {
97102
if (out.getVersion().onOrAfter(Version.V_7_2_0)) {
98103
indicesOptions.writeIndicesOptions(out);
99104
}
105+
if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
106+
out.writeBoolean(return200ForClusterHealthTimeout);
107+
} else if (return200ForClusterHealthTimeout) {
108+
throw new IllegalArgumentException("Can't fix response code in a cluster involving nodes with version " + out.getVersion());
109+
}
100110
}
101111

102112
@Override
@@ -240,6 +250,18 @@ public Priority waitForEvents() {
240250
return this.waitForEvents;
241251
}
242252

253+
public boolean doesReturn200ForClusterHealthTimeout() {
254+
return return200ForClusterHealthTimeout;
255+
}
256+
257+
/**
258+
* Sets whether to return HTTP 200 status code instead of HTTP 408 in case of a
259+
* cluster health timeout from the server side.
260+
*/
261+
public void return200ForClusterHealthTimeout(boolean return200ForClusterHealthTimeout) {
262+
this.return200ForClusterHealthTimeout = return200ForClusterHealthTimeout;
263+
}
264+
243265
/**
244266
* Set the level of detail for the health information to be returned.
245267
* Only used by the high-level REST Client.

server/src/main/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponse.java

Lines changed: 25 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,25 @@
88

99
package org.elasticsearch.action.admin.cluster.health;
1010

11+
import org.elasticsearch.Version;
1112
import org.elasticsearch.action.ActionResponse;
1213
import org.elasticsearch.cluster.ClusterState;
1314
import org.elasticsearch.cluster.health.ClusterHealthStatus;
1415
import org.elasticsearch.cluster.health.ClusterIndexHealth;
1516
import org.elasticsearch.cluster.health.ClusterStateHealth;
16-
import org.elasticsearch.common.logging.DeprecationLogger;
17-
import org.elasticsearch.xcontent.ParseField;
1817
import org.elasticsearch.common.Strings;
1918
import org.elasticsearch.common.io.stream.StreamInput;
2019
import org.elasticsearch.common.io.stream.StreamOutput;
20+
import org.elasticsearch.common.logging.DeprecationLogger;
21+
import org.elasticsearch.common.xcontent.StatusToXContentObject;
2122
import org.elasticsearch.core.TimeValue;
23+
import org.elasticsearch.rest.RestStatus;
24+
import org.elasticsearch.rest.action.search.RestSearchAction;
2225
import org.elasticsearch.xcontent.ConstructingObjectParser;
2326
import org.elasticsearch.xcontent.ObjectParser;
24-
import org.elasticsearch.common.xcontent.StatusToXContentObject;
27+
import org.elasticsearch.xcontent.ParseField;
2528
import org.elasticsearch.xcontent.XContentBuilder;
2629
import org.elasticsearch.xcontent.XContentParser;
27-
import org.elasticsearch.rest.RestStatus;
28-
import org.elasticsearch.rest.action.search.RestSearchAction;
2930

3031
import java.io.IOException;
3132
import java.util.HashMap;
@@ -101,10 +102,10 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
101102

102103
private static final ObjectParser.NamedObjectParser<ClusterIndexHealth, Void> INDEX_PARSER =
103104
(XContentParser parser, Void context, String index) -> ClusterIndexHealth.innerFromXContent(parser, index);
104-
private static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "es.cluster_health.request_timeout_200";
105+
static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "return_200_for_cluster_health_timeout";
105106
static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "The HTTP status code for a cluster health timeout " +
106107
"will be changed from 408 to 200 in a future version. Set the [" + ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY + "] " +
107-
"system property to [true] to suppress this message and opt in to the future behaviour now.";
108+
"query parameter to [true] to suppress this message and opt in to the future behaviour now.";
108109

109110
static {
110111
// ClusterStateHealth fields
@@ -137,15 +138,7 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
137138
private boolean timedOut = false;
138139
private ClusterStateHealth clusterStateHealth;
139140
private ClusterHealthStatus clusterHealthStatus;
140-
private boolean esClusterHealthRequestTimeout200 = readEsClusterHealthRequestTimeout200FromProperty();
141-
142-
public ClusterHealthResponse() {
143-
}
144-
145-
/** For the testing of opting in for the 200 status code without setting a system property */
146-
ClusterHealthResponse(boolean esClusterHealthRequestTimeout200) {
147-
this.esClusterHealthRequestTimeout200 = esClusterHealthRequestTimeout200;
148-
}
141+
private boolean return200ForClusterHealthTimeout;
149142

150143
public ClusterHealthResponse(StreamInput in) throws IOException {
151144
super(in);
@@ -157,22 +150,29 @@ public ClusterHealthResponse(StreamInput in) throws IOException {
157150
numberOfInFlightFetch = in.readInt();
158151
delayedUnassignedShards= in.readInt();
159152
taskMaxWaitingTime = in.readTimeValue();
153+
if (in.getVersion().onOrAfter(Version.V_8_0_0)) {
154+
return200ForClusterHealthTimeout = in.readBoolean();
155+
}
160156
}
161157

162158
/** needed for plugins BWC */
163-
public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState) {
164-
this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0));
159+
public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState,
160+
boolean return200ForServerTimeout) {
161+
this(clusterName, concreteIndices, clusterState, -1, -1, -1, TimeValue.timeValueHours(0),
162+
return200ForServerTimeout);
165163
}
166164

167165
public ClusterHealthResponse(String clusterName, String[] concreteIndices, ClusterState clusterState, int numberOfPendingTasks,
168-
int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime) {
166+
int numberOfInFlightFetch, int delayedUnassignedShards, TimeValue taskMaxWaitingTime,
167+
boolean return200ForServerTimeout) {
169168
this.clusterName = clusterName;
170169
this.numberOfPendingTasks = numberOfPendingTasks;
171170
this.numberOfInFlightFetch = numberOfInFlightFetch;
172171
this.delayedUnassignedShards = delayedUnassignedShards;
173172
this.taskMaxWaitingTime = taskMaxWaitingTime;
174173
this.clusterStateHealth = new ClusterStateHealth(clusterState, concreteIndices);
175174
this.clusterHealthStatus = clusterStateHealth.getStatus();
175+
this.return200ForClusterHealthTimeout = return200ForServerTimeout;
176176
}
177177

178178
/**
@@ -304,6 +304,11 @@ public void writeTo(StreamOutput out) throws IOException {
304304
out.writeInt(numberOfInFlightFetch);
305305
out.writeInt(delayedUnassignedShards);
306306
out.writeTimeValue(taskMaxWaitingTime);
307+
if (out.getVersion().onOrAfter(Version.V_8_0_0)) {
308+
out.writeBoolean(return200ForClusterHealthTimeout);
309+
} else if (return200ForClusterHealthTimeout) {
310+
throw new IllegalArgumentException("Can't fix response code in a cluster involving nodes with version " + out.getVersion());
311+
}
307312
}
308313

309314
@Override
@@ -316,7 +321,7 @@ public RestStatus status() {
316321
if (isTimedOut() == false) {
317322
return RestStatus.OK;
318323
}
319-
if (esClusterHealthRequestTimeout200) {
324+
if (return200ForClusterHealthTimeout) {
320325
return RestStatus.OK;
321326
} else {
322327
deprecationLogger.compatibleCritical("cluster_health_request_timeout", CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG);
@@ -381,17 +386,4 @@ public int hashCode() {
381386
return Objects.hash(clusterName, numberOfPendingTasks, numberOfInFlightFetch, delayedUnassignedShards, taskMaxWaitingTime,
382387
timedOut, clusterStateHealth, clusterHealthStatus);
383388
}
384-
385-
private static boolean readEsClusterHealthRequestTimeout200FromProperty() {
386-
String property = System.getProperty(ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY);
387-
if (property == null) {
388-
return false;
389-
}
390-
if (Boolean.parseBoolean(property)) {
391-
return true;
392-
} else {
393-
throw new IllegalArgumentException(ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY + " can only be unset or [true] but was ["
394-
+ property + "]");
395-
}
396-
}
397389
}

server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
import org.elasticsearch.cluster.service.ClusterService;
3131
import org.elasticsearch.common.Strings;
3232
import org.elasticsearch.common.inject.Inject;
33-
import org.elasticsearch.core.TimeValue;
3433
import org.elasticsearch.common.util.CollectionUtils;
34+
import org.elasticsearch.core.TimeValue;
3535
import org.elasticsearch.index.IndexNotFoundException;
3636
import org.elasticsearch.node.NodeClosedException;
3737
import org.elasticsearch.tasks.Task;
@@ -225,7 +225,8 @@ private enum TimeoutState {
225225

226226
private ClusterHealthResponse getResponse(final ClusterHealthRequest request, ClusterState clusterState,
227227
final int waitFor, TimeoutState timeoutState) {
228-
ClusterHealthResponse response = clusterHealth(request, clusterState, clusterService.getMasterService().numberOfPendingTasks(),
228+
ClusterHealthResponse response = clusterHealth(request, clusterState,
229+
clusterService.getMasterService().numberOfPendingTasks(),
229230
allocationService.getNumberOfInFlightFetches(), clusterService.getMasterService().getMaxTaskWaitTime());
230231
int readyCounter = prepareResponse(request, response, clusterState, indexNameExpressionResolver);
231232
boolean valid = (readyCounter == waitFor);
@@ -324,8 +325,8 @@ static int prepareResponse(final ClusterHealthRequest request, final ClusterHeal
324325
}
325326

326327

327-
private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState, int numberOfPendingTasks,
328-
int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) {
328+
private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState,
329+
int numberOfPendingTasks, int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) {
329330
if (logger.isTraceEnabled()) {
330331
logger.trace("Calculating health based on state version [{}]", clusterState.version());
331332
}
@@ -337,12 +338,13 @@ private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, Cluste
337338
// one of the specified indices is not there - treat it as RED.
338339
ClusterHealthResponse response = new ClusterHealthResponse(clusterState.getClusterName().value(), Strings.EMPTY_ARRAY,
339340
clusterState, numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState),
340-
pendingTaskTimeInQueue);
341+
pendingTaskTimeInQueue, request.doesReturn200ForClusterHealthTimeout());
341342
response.setStatus(ClusterHealthStatus.RED);
342343
return response;
343344
}
344345

345-
return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState, numberOfPendingTasks,
346-
numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue);
346+
return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState,
347+
numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue,
348+
request.doesReturn200ForClusterHealthTimeout());
347349
}
348350
}

server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterHealthAction.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ public static ClusterHealthRequest fromRequest(final RestRequest request) {
8181
if (request.param("wait_for_events") != null) {
8282
clusterHealthRequest.waitForEvents(Priority.valueOf(request.param("wait_for_events").toUpperCase(Locale.ROOT)));
8383
}
84+
clusterHealthRequest.return200ForClusterHealthTimeout(request.paramAsBoolean(
85+
"return_200_for_cluster_health_timeout",
86+
clusterHealthRequest.doesReturn200ForClusterHealthTimeout()));
8487
return clusterHealthRequest;
8588
}
8689

server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthRequestTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public void testSerialize() throws Exception {
4343
assertThat(cloneRequest.waitForEvents(), equalTo(originalRequest.waitForEvents()));
4444
assertIndicesEquals(cloneRequest.indices(), originalRequest.indices());
4545
assertThat(cloneRequest.indicesOptions(), equalTo(originalRequest.indicesOptions()));
46+
assertThat(cloneRequest.doesReturn200ForClusterHealthTimeout(), equalTo(originalRequest.doesReturn200ForClusterHealthTimeout()));
4647
}
4748

4849
public void testRequestReturnsHiddenIndicesByDefault() {

server/src/test/java/org/elasticsearch/action/admin/cluster/health/ClusterHealthResponsesTests.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@
2020
import org.elasticsearch.common.io.stream.Writeable;
2121
import org.elasticsearch.common.settings.Settings;
2222
import org.elasticsearch.core.TimeValue;
23-
import org.elasticsearch.xcontent.ToXContent;
24-
import org.elasticsearch.xcontent.XContentParser;
2523
import org.elasticsearch.rest.RestStatus;
2624
import org.elasticsearch.test.AbstractSerializingTestCase;
25+
import org.elasticsearch.xcontent.ToXContent;
26+
import org.elasticsearch.xcontent.XContentParser;
2727
import org.hamcrest.Matchers;
2828

2929
import java.io.IOException;
@@ -43,7 +43,7 @@ public class ClusterHealthResponsesTests extends AbstractSerializingTestCase<Clu
4343
private final ClusterHealthRequest.Level level = randomFrom(ClusterHealthRequest.Level.values());
4444

4545
public void testIsTimeout() {
46-
ClusterHealthResponse res = new ClusterHealthResponse();
46+
ClusterHealthResponse res = new ClusterHealthResponse("", new String[]{}, ClusterState.EMPTY_STATE, false);
4747
for (int i = 0; i < 5; i++) {
4848
res.setTimedOut(randomBoolean());
4949
if (res.isTimedOut()) {
@@ -56,7 +56,7 @@ public void testIsTimeout() {
5656
}
5757

5858
public void testTimeoutReturns200IfOptedIn() {
59-
ClusterHealthResponse res = new ClusterHealthResponse(true);
59+
ClusterHealthResponse res = new ClusterHealthResponse("", new String[]{}, ClusterState.EMPTY_STATE, true);
6060
for (int i = 0; i < 5; i++) {
6161
res.setTimedOut(randomBoolean());
6262
assertEquals(RestStatus.OK, res.status());
@@ -70,7 +70,7 @@ public void testClusterHealth() throws IOException {
7070
int delayedUnassigned = randomIntBetween(0, 200);
7171
TimeValue pendingTaskInQueueTime = TimeValue.timeValueMillis(randomIntBetween(1000, 100000));
7272
ClusterHealthResponse clusterHealth = new ClusterHealthResponse("bla", new String[] {Metadata.ALL},
73-
clusterState, pendingTasks, inFlight, delayedUnassigned, pendingTaskInQueueTime);
73+
clusterState, pendingTasks, inFlight, delayedUnassigned, pendingTaskInQueueTime, false);
7474
clusterHealth = maybeSerialize(clusterHealth);
7575
assertClusterHealth(clusterHealth);
7676
assertThat(clusterHealth.getNumberOfPendingTasks(), Matchers.equalTo(pendingTasks));

0 commit comments

Comments
 (0)