Skip to content

Commit 15b06bc

Browse files
Merge pull request #15968 from rabbitmq/mergify/bp/v4.3.x/pr-15967
Test flake chase (Apr 8, 2026) (backport #15967)
2 parents 07c93fa + eecc9bb commit 15b06bc

9 files changed

Lines changed: 91 additions & 25 deletions

deps/rabbit/test/feature_flags_v2_SUITE.erl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,16 @@ start_slave_node(Parent, Config, Testcase, N) ->
159159
Name = list_to_atom(
160160
rabbit_misc:format("~ts-~b", [Testcase, N])),
161161
ct:pal("- Starting slave node `~ts@...`", [Name]),
162+
%% `wait_boot' is set explicitly because the 15-second default is too
163+
%% tight when many nodes are started concurrently by parallel test
164+
%% groups. On timeout, `peer:start/1' calls `erlang:exit(timeout)',
165+
%% which propagates through the linked starter to the test case and
166+
%% aborts the whole CT run.
162167
{ok, NodePid, Node} = peer:start(#{
163168
name => Name,
164169
connection => standard_io,
165-
shutdown => close
170+
shutdown => close,
171+
wait_boot => 60_000
166172
}),
167173
peer:call(NodePid, net_kernel, set_net_ticktime, [5]),
168174

deps/rabbit/test/rabbit_stream_sac_coordinator_SUITE.erl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2063,10 +2063,15 @@ evaluate_group_ensure_monitors_test(_) ->
20632063
ok.
20642064

20652065
start_node(Name) ->
2066+
%% `wait_boot' is set explicitly because the 15-second default is too
2067+
%% tight when this suite runs alongside other parallel CT sets. On
2068+
%% timeout, `peer:start/1' calls `erlang:exit(timeout)', which would
2069+
%% crash the test case with the bare reason `timeout'.
20662070
{ok, NodePid, Node} = peer:start(#{
20672071
name => Name,
20682072
connection => standard_io,
2069-
shutdown => close
2073+
shutdown => close,
2074+
wait_boot => 60_000
20702075
}),
20712076
{NodePid, Node}.
20722077

deps/rabbitmq_cli/test/ctl/close_all_connections_command_test.exs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,12 @@ defmodule CloseAllConnectionsCommandTest do
6767
[[vhost: @vhost], [vhost: @vhost], [vhost: @vhost]] = fetch_connection_vhosts(node, nodes)
6868
opts = %{node: node, vhost: @vhost, global: false, per_connection_delay: 0, limit: 2}
6969
assert {:ok, "Closed 2 connections"} == @command.run(["test"], opts)
70-
Process.sleep(100)
70+
# Closing connections is asynchronous; poll for the expected count
71+
# instead of assuming a fixed delay is enough.
72+
await_condition(
73+
fn -> length(fetch_connection_vhosts(node, nodes)) == 1 end,
74+
5_000
75+
)
7176
assert fetch_connection_vhosts(node, nodes) == [[vhost: @vhost]]
7277
end)
7378
end

deps/rabbitmq_cli/test/ctl/close_connection_command_test.exs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ defmodule CloseConnectionCommandTest do
4848
nodes = @helpers.nodes_in_cluster(node)
4949
[[pid: pid]] = fetch_connection_pids(node, nodes)
5050
assert :ok == @command.run([:rabbit_misc.pid_to_string(pid), "test"], %{node: node})
51-
Process.sleep(500)
51+
# Closing a connection is asynchronous; poll instead of assuming a
52+
# fixed delay is enough.
53+
await_no_client_connections(node, 5_000)
5254
assert fetch_connection_pids(node, nodes) == []
5355
end)
5456
end

deps/rabbitmq_cli/test/ctl/decode_command_test.exs

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -109,16 +109,20 @@ defmodule DecodeCommandTest do
109109
assert {:ok, secret} ===
110110
@command.run([format_as_erlang_term(output), passphrase], context[:opts])
111111

112-
# wrong passphrase
113-
assert match?(
114-
{:error, _},
115-
@command.run([format_as_erlang_term(encrypted), "wrong/passphrase"], context[:opts])
116-
)
112+
# Wrong passphrase: decryption usually errors out, but garbage bytes
113+
# can occasionally form a valid Erlang term. Either way, the result
114+
# must not be the original secret.
115+
refute {:ok, secret} ===
116+
@command.run(
117+
[format_as_erlang_term(encrypted), "wrong/passphrase"],
118+
context[:opts]
119+
)
117120

118-
assert match?(
119-
{:error, _},
120-
@command.run([format_as_erlang_term(output), "wrong passphrase"], context[:opts])
121-
)
121+
refute {:ok, secret} ===
122+
@command.run(
123+
[format_as_erlang_term(output), "wrong passphrase"],
124+
context[:opts]
125+
)
122126
end
123127

124128
defp format_as_erlang_term(value) do

deps/rabbitmq_cli/test/ctl/list_consumers_command_test.exs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ defmodule ListConsumersCommandTest do
8484

8585
with_channel(@vhost, fn channel ->
8686
{:ok, _} = AMQP.Basic.consume(channel, queue_name, nil, consumer_tag: consumer_tag)
87-
:timer.sleep(100)
87+
# Consumer registration is asynchronous; wait for it to be visible.
88+
await_consumer_count(context[:opts], 1)
8889
[[consumer]] = run_command_to_list(@command, [info_keys_s, context[:opts]])
8990
assert info_keys_a == Keyword.keys(consumer)
9091
assert consumer[:consumer_tag] == consumer_tag
@@ -111,7 +112,8 @@ defmodule ListConsumersCommandTest do
111112
{:ok, tag1} = AMQP.Basic.consume(channel, queue_name1)
112113
{:ok, tag2} = AMQP.Basic.consume(channel, queue_name2)
113114
{:ok, tag3} = AMQP.Basic.consume(channel, queue_name2)
114-
:timer.sleep(100)
115+
# Consumer registration is asynchronous; wait for them to be visible.
116+
await_consumer_count(context[:opts], 3)
115117

116118
try do
117119
consumers =
@@ -149,7 +151,8 @@ defmodule ListConsumersCommandTest do
149151
{:ok, tag1} = AMQP.Basic.consume(channel, queue_name)
150152
{:ok, tag2} = AMQP.Basic.consume(channel, queue_name)
151153
{:ok, tag3} = AMQP.Basic.consume(channel, queue_name)
152-
:timer.sleep(100)
154+
# Consumer registration is asynchronous; wait for them to be visible.
155+
await_consumer_count(context[:opts], 3)
153156

154157
try do
155158
consumers =
@@ -201,7 +204,8 @@ defmodule ListConsumersCommandTest do
201204
{:ok, tag1} = AMQP.Basic.consume(channel, queue_name)
202205
{:ok, tag2} = AMQP.Basic.consume(channel, queue_name)
203206
{:ok, tag3} = AMQP.Basic.consume(channel, queue_name)
204-
:timer.sleep(100)
207+
# Consumer registration is asynchronous; wait for them to be visible.
208+
await_consumer_count(context[:opts], 3)
205209

206210
try do
207211
consumers =
@@ -226,7 +230,8 @@ defmodule ListConsumersCommandTest do
226230
)
227231

228232
AMQP.Basic.cancel(channel, tag1)
229-
:timer.sleep(100)
233+
# Consumer cancellation is asynchronous; wait for the count to drop.
234+
await_consumer_count(context[:opts], 2)
230235

231236
consumers =
232237
List.first(
@@ -300,4 +305,14 @@ defmodule ListConsumersCommandTest do
300305
]
301306
], {1, :continue}}
302307
end
308+
309+
defp await_consumer_count(opts, expected_count) do
310+
await_condition(
311+
fn ->
312+
consumers = run_command_to_list(@command, [["queue_name"], opts])
313+
Enum.reduce(consumers, 0, fn group, acc -> acc + length(group) end) == expected_count
314+
end,
315+
10_000
316+
)
317+
end
303318
end

deps/rabbitmq_cli/test/ctl/reconcile_vhosts_command_test.exs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,14 @@ defmodule ReconcileVhostsCommandTest do
5858
node_name = context[:opts][:node]
5959
force_vhost_failure(node_name, vhost)
6060
assert :ok == @command.run([], context[:opts])
61-
:timer.sleep(1000)
62-
assert match?({:ok, _}, :rpc.call(node_name, :rabbit_vhost_sup_sup, :get_vhost_sup, [vhost]))
61+
# Reconciliation is asynchronous; wait for the vhost supervisor to be
62+
# restarted instead of assuming a fixed delay is enough.
63+
await_condition(
64+
fn ->
65+
match?({:ok, _}, :rpc.call(node_name, :rabbit_vhost_sup_sup, :get_vhost_sup, [vhost]))
66+
end,
67+
30_000
68+
)
6369
end
6470

6571
#
@@ -68,8 +74,16 @@ defmodule ReconcileVhostsCommandTest do
6874

6975
defp setup_vhosts do
7076
add_vhost(@vhost)
71-
# give the vhost a chance to fully start and initialise
72-
:timer.sleep(1000)
77+
# Wait for the vhost supervisor to be fully started.
78+
await_condition(
79+
fn ->
80+
match?(
81+
{:ok, _},
82+
:rpc.call(get_rabbit_hostname(), :rabbit_vhost_sup_sup, :get_vhost_sup, [@vhost])
83+
)
84+
end,
85+
30_000
86+
)
7387

7488
on_exit(fn ->
7589
delete_vhost(@vhost)

deps/rabbitmq_cli/test/ctl/restart_vhost_command_test.exs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,16 @@ defmodule RestartVhostCommandTest do
7272

7373
defp setup_vhosts do
7474
add_vhost(@vhost)
75-
# give the vhost a chance to fully start and initialise
76-
:timer.sleep(1000)
75+
# Wait for the vhost supervisor to be fully started.
76+
await_condition(
77+
fn ->
78+
match?(
79+
{:ok, _},
80+
:rpc.call(get_rabbit_hostname(), :rabbit_vhost_sup_sup, :get_vhost_sup, [@vhost])
81+
)
82+
end,
83+
30_000
84+
)
7785

7886
on_exit(fn ->
7987
delete_vhost(@vhost)

deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -768,14 +768,21 @@ do_start_rabbitmq_node(Config, NodeConfig, I) ->
768768
PeerEnv3
769769
end,
770770
%% Start the peer node.
771+
%%
772+
%% `wait_boot' is set explicitly because the 15-second default is too
773+
%% tight when many nodes are started concurrently by parallel test
774+
%% groups. On timeout, `peer:start/1' calls `erlang:exit(timeout)',
775+
%% which propagates through the linked starter to the test case and
776+
%% aborts the whole CT run.
771777
{ok, PeerPid, Nodename} = peer:start(#{
772778
name => Nodename1,
773779
longnames => false,
774780
host => HostName1,
775781
connection => standard_io,
776782
exec => os:find_executable("erl"),
777783
args => PeerArgs,
778-
env => PeerEnv}),
784+
env => PeerEnv,
785+
wait_boot => 60_000}),
779786
%% Redirect the PeerPid's standard output to a file.
780787
%% The standard output of the peer process is a redirect
781788
%% from the peer node. We tie the file's process to PeerPid

0 commit comments

Comments
 (0)