Skip to content

Commit 387989d

Browse files
committed
Caching avg total bytes and avg free bytes inside ClusterInfo
Signed-off-by: RS146BIJAY <rishavsagar4b1@gmail.com>
1 parent b980b12 commit 387989d

2 files changed

Lines changed: 58 additions & 5 deletions

File tree

server/src/main/java/org/opensearch/cluster/ClusterInfo.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.cluster;
3434

3535
import org.opensearch.Version;
36+
import org.opensearch.cluster.routing.RoutingNode;
3637
import org.opensearch.cluster.routing.ShardRouting;
3738
import org.opensearch.common.annotation.PublicApi;
3839
import org.opensearch.core.common.io.stream.StreamInput;
@@ -68,9 +69,12 @@ public class ClusterInfo implements ToXContentFragment, Writeable {
6869
final Map<ShardRouting, String> routingToDataPath;
6970
final Map<NodeAndPath, ReservedSpace> reservedSpace;
7071
final Map<String, FileCacheStats> nodeFileCacheStats;
72+
private long avgTotalBytes;
73+
private long avgFreeByte;
7174

7275
protected ClusterInfo() {
7376
this(Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of());
77+
calculateAvgFreeAndTotalBytes(mostAvailableSpaceUsage);
7478
}
7579

7680
/**
@@ -97,6 +101,7 @@ public ClusterInfo(
97101
this.routingToDataPath = routingToDataPath;
98102
this.reservedSpace = reservedSpace;
99103
this.nodeFileCacheStats = nodeFileCacheStats;
104+
calculateAvgFreeAndTotalBytes(mostAvailableSpaceUsage);
100105
}
101106

102107
public ClusterInfo(StreamInput in) throws IOException {
@@ -117,6 +122,39 @@ public ClusterInfo(StreamInput in) throws IOException {
117122
} else {
118123
this.nodeFileCacheStats = Map.of();
119124
}
125+
126+
calculateAvgFreeAndTotalBytes(mostAvailableSpaceUsage);
127+
}
128+
129+
/**
130+
* Returns a {@link DiskUsage} for the {@link RoutingNode} using the
131+
* average usage of other nodes in the disk usage map.
132+
* @param usages Map of nodeId to DiskUsage for all known nodes
133+
*/
134+
private void calculateAvgFreeAndTotalBytes(final Map<String, DiskUsage> usages) {
135+
if (usages == null || usages.isEmpty()) {
136+
this.avgTotalBytes = 0;
137+
this.avgFreeByte = 0;
138+
return;
139+
}
140+
141+
long totalBytes = 0;
142+
long freeBytes = 0;
143+
for (DiskUsage du : usages.values()) {
144+
totalBytes += du.getTotalBytes();
145+
freeBytes += du.getFreeBytes();
146+
}
147+
148+
this.avgTotalBytes = totalBytes / usages.size();
149+
this.avgFreeByte = freeBytes / usages.size();
150+
}
151+
152+
public long getAvgFreeByte() {
153+
return avgFreeByte;
154+
}
155+
156+
public long getAvgTotalBytes() {
157+
return avgTotalBytes;
120158
}
121159

122160
@Override

server/src/main/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDecider.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,8 @@ public static long sizeOfRelocatingShards(
140140

141141
// Where reserved space is unavailable (e.g. stats are out-of-sync) compute a conservative estimate for initialising shards
142142
final List<ShardRouting> initializingShards = node.shardsWithState(ShardRoutingState.INITIALIZING);
143-
initializingShards.removeIf(shardRouting -> reservedSpace.containsShardId(shardRouting.shardId()));
144143
for (ShardRouting routing : initializingShards) {
145-
if (routing.relocatingNodeId() == null) {
144+
if (routing.relocatingNodeId() == null || reservedSpace.containsShardId(routing.shardId())) {
146145
// in practice the only initializing-but-not-relocating shards with a nonzero expected shard size will be ones created
147146
// by a resize (shrink/split/clone) operation which we expect to happen using hard links, so they shouldn't be taking
148147
// any additional space and can be ignored here
@@ -230,7 +229,14 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing
230229

231230
// subtractLeavingShards is passed as false here, because they still use disk space, and therefore we should be extra careful
232231
// and take the size into account
233-
final DiskUsageWithRelocations usage = getDiskUsage(node, allocation, usages, false);
232+
final DiskUsageWithRelocations usage = getDiskUsage(
233+
node,
234+
allocation,
235+
usages,
236+
clusterInfo.getAvgFreeByte(),
237+
clusterInfo.getAvgTotalBytes(),
238+
false
239+
);
234240
// First, check that the node currently over the low watermark
235241
double freeDiskPercentage = usage.getFreeDiskAsPercentage();
236242
// Cache the used disk percentage for displaying disk percentages consistent with documentation
@@ -492,7 +498,14 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl
492498

493499
// subtractLeavingShards is passed as true here, since this is only for shards remaining, we will *eventually* have enough disk
494500
// since shards are moving away. No new shards will be incoming since in canAllocate we pass false for this check.
495-
final DiskUsageWithRelocations usage = getDiskUsage(node, allocation, usages, true);
501+
final DiskUsageWithRelocations usage = getDiskUsage(
502+
node,
503+
allocation,
504+
usages,
505+
clusterInfo.getAvgFreeByte(),
506+
clusterInfo.getAvgTotalBytes(),
507+
false
508+
);
496509
final String dataPath = clusterInfo.getDataPath(shardRouting);
497510
// If this node is already above the high threshold, the shard cannot remain (get it off!)
498511
final double freeDiskPercentage = usage.getFreeDiskAsPercentage();
@@ -581,13 +594,15 @@ private DiskUsageWithRelocations getDiskUsage(
581594
RoutingNode node,
582595
RoutingAllocation allocation,
583596
final Map<String, DiskUsage> usages,
597+
final long avgFreeBytes,
598+
final long avgTotalBytes,
584599
boolean subtractLeavingShards
585600
) {
586601
DiskUsage usage = usages.get(node.nodeId());
587602
if (usage == null) {
588603
// If there is no usage, and we have other nodes in the cluster,
589604
// use the average usage for all nodes as the usage for this node
590-
usage = averageUsage(node, usages);
605+
usage = new DiskUsage(node.nodeId(), node.node().getName(), "_na_", avgTotalBytes, avgFreeBytes);
591606
if (logger.isDebugEnabled()) {
592607
logger.debug(
593608
"unable to determine disk usage for {}, defaulting to average across nodes [{} total] [{} free] [{}% free]",

0 commit comments

Comments
 (0)