|
12 | 12 | import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; |
13 | 13 | import org.opensearch.cluster.ClusterStateListener; |
14 | 14 | import org.opensearch.common.settings.Settings; |
15 | | -import org.opensearch.common.unit.TimeValue; |
16 | 15 | import org.opensearch.test.InternalTestCluster; |
17 | 16 | import org.opensearch.test.OpenSearchIntegTestCase; |
18 | 17 |
|
19 | | -import java.util.ArrayList; |
20 | 18 | import java.util.Iterator; |
21 | | -import java.util.List; |
22 | 19 | import java.util.concurrent.CountDownLatch; |
23 | | -import java.util.stream.Stream; |
24 | 20 |
|
25 | 21 | import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; |
26 | 22 |
|
@@ -53,14 +49,17 @@ protected void createAndIndex(String index, int replicaCount, int shardCount) { |
53 | 49 | } |
54 | 50 |
|
55 | 51 | /** |
56 | | - * Creates two nodes each in two zones and shuts down nodes in one zone |
57 | | - * after relocating half the number of shards. Since, primaries are relocated |
58 | | - * first, cluster should stay green as primary should have relocated |
| 52 | + * Creates two nodes each in two zones and shuts down nodes in zone1 after |
| 53 | + * relocating half the number of shards. Shards per node constraint ensures |
| 54 | + * that exactly 50% of shards relocate to nodes in zone2 giving time to shut down |
| 55 | + * nodes in zone1. Since primaries are relocated first as movePrimaryFirst is |
| 56 | + * enabled, cluster should not become red and zone2 nodes have all the primaries |
59 | 57 | */ |
60 | 58 | public void testClusterGreenAfterPartialRelocation() throws InterruptedException { |
61 | 59 | internalCluster().startMasterOnlyNodes(1); |
62 | 60 | final String z1 = "zone-1", z2 = "zone-2"; |
63 | | - final int primaryShardCount = 100; |
| 61 | + final int primaryShardCount = 6; |
| 62 | + assertTrue("Primary shard count must be even for equal distribution across two nodes", primaryShardCount % 2 == 0); |
64 | 63 | final String z1n1 = startDataOnlyNode(z1); |
65 | 64 | ensureGreen(); |
66 | 65 | createAndIndex("foo", 1, primaryShardCount); |
@@ -88,40 +87,41 @@ public void testClusterGreenAfterPartialRelocation() throws InterruptedException |
88 | 87 | if (event.routingTableChanged()) { |
89 | 88 | final RoutingNodes routingNodes = event.state().getRoutingNodes(); |
90 | 89 | int startedCount = 0; |
91 | | - List<ShardRouting> initz2n1 = new ArrayList<>(), initz2n2 = new ArrayList<>(); |
92 | 90 | for (Iterator<RoutingNode> it = routingNodes.iterator(); it.hasNext();) { |
93 | 91 | RoutingNode routingNode = it.next(); |
94 | 92 | final String nodeName = routingNode.node().getName(); |
95 | | - if (nodeName.equals(z2n1)) { |
| 93 | + if (nodeName.equals(z2n1) || nodeName.equals(z2n2)) { |
96 | 94 | startedCount += routingNode.numberOfShardsWithState(ShardRoutingState.STARTED); |
97 | | - initz2n1 = routingNode.shardsWithState(ShardRoutingState.INITIALIZING); |
98 | | - } else if (nodeName.equals(z2n2)) { |
99 | | - startedCount += routingNode.numberOfShardsWithState(ShardRoutingState.STARTED); |
100 | | - initz2n2 = routingNode.shardsWithState(ShardRoutingState.INITIALIZING); |
101 | 95 | } |
102 | 96 | } |
103 | | - if (!Stream.concat(initz2n1.stream(), initz2n2.stream()).anyMatch(s -> s.primary())) { |
104 | | - // All primaries are relocated before 60% of total shards are started on new nodes |
105 | | - final int totalShardCount = primaryShardCount * 2; |
106 | | - if (primaryShardCount <= startedCount && startedCount <= 3 * totalShardCount / 5) { |
107 | | - primaryMoveLatch.countDown(); |
108 | | - } |
| 97 | + |
| 98 | + // Count down the latch once all the primary shards have initialized on nodes in zone-2 |
| 99 | + if (startedCount == primaryShardCount) { |
| 100 | + primaryMoveLatch.countDown(); |
109 | 101 | } |
110 | 102 | } |
111 | 103 | }; |
112 | 104 | internalCluster().clusterService().addListener(listener); |
113 | 105 |
|
114 | 106 | // Exclude zone1 nodes for allocation and await latch count down |
115 | 107 | settingsRequest = new ClusterUpdateSettingsRequest(); |
116 | | - settingsRequest.persistentSettings(Settings.builder().put("cluster.routing.allocation.exclude.zone", z1)); |
| 108 | + settingsRequest.persistentSettings( |
| 109 | + Settings.builder() |
| 110 | + .put("cluster.routing.allocation.exclude.zone", z1) |
| 111 | + // Total shards per node constraint is added to pause the relocation after primary shards |
| 112 | + // have relocated to allow time for node shutdown and validate yellow cluster |
| 113 | + .put("cluster.routing.allocation.total_shards_per_node", primaryShardCount / 2) |
| 114 | + ); |
117 | 115 | client().admin().cluster().updateSettings(settingsRequest); |
118 | 116 | primaryMoveLatch.await(); |
119 | 117 |
|
120 | | - // Shutdown both nodes in zone and ensure cluster stays green |
| 118 | + // Shutdown both nodes in zone 1 and ensure cluster does not become red |
121 | 119 | try { |
122 | 120 | internalCluster().stopRandomNode(InternalTestCluster.nameFilter(z1n1)); |
123 | 121 | internalCluster().stopRandomNode(InternalTestCluster.nameFilter(z1n2)); |
124 | 122 | } catch (Exception e) {} |
125 | | - ensureGreen(TimeValue.timeValueSeconds(60)); |
| 123 | + // Due to shards per node constraint cluster cannot be green |
| 124 | + // Since yellow suffices for this test, not removing shards constraint |
| 125 | + ensureYellow(); |
126 | 126 | } |
127 | 127 | } |
0 commit comments