Skip to content

Commit c3c7457

Browse files
Adding shards per node constraint for predictability to testClusterGr… (opensearch-project#2110) (opensearch-project#2265)
* Adding shards per node constraint for predictability to testClusterGreenAfterPartialRelocation Signed-off-by: Ankit Jain <jain.ankitk@gmail.com> * Fixing precommit violation Signed-off-by: Ankit Jain <jain.ankitk@gmail.com> * Adding assertion to ensure invariant Signed-off-by: Ankit Jain <jain.ankitk@gmail.com> (cherry picked from commit 8ae0db5) Co-authored-by: Ankit Jain <jain.ankitk@gmail.com>
1 parent da2f92b commit c3c7457

File tree

1 file changed

+23
-23
lines changed

1 file changed

+23
-23
lines changed

server/src/test/java/org/opensearch/cluster/routing/MovePrimaryFirstTests.java

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,11 @@
1212
import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest;
1313
import org.opensearch.cluster.ClusterStateListener;
1414
import org.opensearch.common.settings.Settings;
15-
import org.opensearch.common.unit.TimeValue;
1615
import org.opensearch.test.InternalTestCluster;
1716
import org.opensearch.test.OpenSearchIntegTestCase;
1817

19-
import java.util.ArrayList;
2018
import java.util.Iterator;
21-
import java.util.List;
2219
import java.util.concurrent.CountDownLatch;
23-
import java.util.stream.Stream;
2420

2521
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
2622

@@ -53,14 +49,17 @@ protected void createAndIndex(String index, int replicaCount, int shardCount) {
5349
}
5450

5551
/**
56-
* Creates two nodes each in two zones and shuts down nodes in one zone
57-
* after relocating half the number of shards. Since, primaries are relocated
58-
* first, cluster should stay green as primary should have relocated
52+
* Creates two nodes each in two zones and shuts down nodes in zone1 after
53+
* relocating half the number of shards. Shards per node constraint ensures
54+
* that exactly 50% of shards relocate to nodes in zone2 giving time to shut down
55+
* nodes in zone1. Since primaries are relocated first as movePrimaryFirst is
56+
* enabled, cluster should not become red and zone2 nodes have all the primaries
5957
*/
6058
public void testClusterGreenAfterPartialRelocation() throws InterruptedException {
6159
internalCluster().startMasterOnlyNodes(1);
6260
final String z1 = "zone-1", z2 = "zone-2";
63-
final int primaryShardCount = 100;
61+
final int primaryShardCount = 6;
62+
assertTrue("Primary shard count must be even for equal distribution across two nodes", primaryShardCount % 2 == 0);
6463
final String z1n1 = startDataOnlyNode(z1);
6564
ensureGreen();
6665
createAndIndex("foo", 1, primaryShardCount);
@@ -88,40 +87,41 @@ public void testClusterGreenAfterPartialRelocation() throws InterruptedException
8887
if (event.routingTableChanged()) {
8988
final RoutingNodes routingNodes = event.state().getRoutingNodes();
9089
int startedCount = 0;
91-
List<ShardRouting> initz2n1 = new ArrayList<>(), initz2n2 = new ArrayList<>();
9290
for (Iterator<RoutingNode> it = routingNodes.iterator(); it.hasNext();) {
9391
RoutingNode routingNode = it.next();
9492
final String nodeName = routingNode.node().getName();
95-
if (nodeName.equals(z2n1)) {
93+
if (nodeName.equals(z2n1) || nodeName.equals(z2n2)) {
9694
startedCount += routingNode.numberOfShardsWithState(ShardRoutingState.STARTED);
97-
initz2n1 = routingNode.shardsWithState(ShardRoutingState.INITIALIZING);
98-
} else if (nodeName.equals(z2n2)) {
99-
startedCount += routingNode.numberOfShardsWithState(ShardRoutingState.STARTED);
100-
initz2n2 = routingNode.shardsWithState(ShardRoutingState.INITIALIZING);
10195
}
10296
}
103-
if (!Stream.concat(initz2n1.stream(), initz2n2.stream()).anyMatch(s -> s.primary())) {
104-
// All primaries are relocated before 60% of total shards are started on new nodes
105-
final int totalShardCount = primaryShardCount * 2;
106-
if (primaryShardCount <= startedCount && startedCount <= 3 * totalShardCount / 5) {
107-
primaryMoveLatch.countDown();
108-
}
97+
98+
// Count down the latch once all the primary shards have initialized on nodes in zone-2
99+
if (startedCount == primaryShardCount) {
100+
primaryMoveLatch.countDown();
109101
}
110102
}
111103
};
112104
internalCluster().clusterService().addListener(listener);
113105

114106
// Exclude zone1 nodes for allocation and await latch count down
115107
settingsRequest = new ClusterUpdateSettingsRequest();
116-
settingsRequest.persistentSettings(Settings.builder().put("cluster.routing.allocation.exclude.zone", z1));
108+
settingsRequest.persistentSettings(
109+
Settings.builder()
110+
.put("cluster.routing.allocation.exclude.zone", z1)
111+
// Total shards per node constraint is added to pause the relocation after primary shards
112+
// have relocated to allow time for node shutdown and validate yellow cluster
113+
.put("cluster.routing.allocation.total_shards_per_node", primaryShardCount / 2)
114+
);
117115
client().admin().cluster().updateSettings(settingsRequest);
118116
primaryMoveLatch.await();
119117

120-
// Shutdown both nodes in zone and ensure cluster stays green
118+
// Shutdown both nodes in zone 1 and ensure cluster does not become red
121119
try {
122120
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(z1n1));
123121
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(z1n2));
124122
} catch (Exception e) {}
125-
ensureGreen(TimeValue.timeValueSeconds(60));
123+
// Due to shards per node constraint cluster cannot be green
124+
// Since yellow suffices for this test, not removing shards constraint
125+
ensureYellow();
126126
}
127127
}

0 commit comments

Comments
 (0)