Skip to content

Commit 0fffce4

Browse files
authored
Merge pull request #627 from rabbitmq/handle-ra-system-down-rpc
Handle Ra system down gracefully on start_server
2 parents 26495f9 + cda2b7d commit 0fffce4

2 files changed

Lines changed: 32 additions & 4 deletions

File tree

src/ra_server_sup_sup.erl

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,17 @@
4141

4242
-spec start_server(System :: atom(), ra_server:ra_server_config()) ->
4343
supervisor:startchild_ret() |
44-
{error, not_new | system_not_started | invalid_initial_machine_version} |
45-
{badrpc, term()}.
44+
{error, not_new | system_not_started | nodedown |
45+
invalid_initial_machine_version} | {badrpc, term()}.
4646
start_server(System, #{id := NodeId,
4747
uid := UId} = Config)
4848
when is_atom(System) ->
4949
Node = ra_lib:ra_server_id_node(NodeId),
5050
rpc:call(Node, ?MODULE, start_server_rpc, [System, UId, Config]).
5151

5252
-spec restart_server(atom(), ra_server_id(), ra_server:mutable_config()) ->
53-
supervisor:startchild_ret() | {error, system_not_started} | {badrpc, term()}.
53+
supervisor:startchild_ret() | {badrpc, term()} |
54+
{error, system_not_started | nodedown}.
5455
restart_server(System, {RaName, Node}, AddConfig) ->
5556
rpc:call(Node, ?MODULE, restart_server_rpc,
5657
[System, {RaName, Node}, AddConfig]).
@@ -259,7 +260,7 @@ init([]) ->
259260

260261
start_child(Name, Config) ->
261262
Ref = make_ref(),
262-
case supervisor:start_child(Name, [Config#{reply_to => {Ref, self()}}]) of
263+
try supervisor:start_child(Name, [Config#{reply_to => {Ref, self()}}]) of
263264
{ok, Pid} ->
264265
%% we have started the process now and have to wait for reply
265266
%% that is sent after init but before state machine recovery
@@ -275,4 +276,9 @@ start_child(Name, Config) ->
275276
end;
276277
Err ->
277278
Err
279+
catch
280+
exit:{noproc, _} ->
281+
{error, system_not_started};
282+
exit:{{nodedown, _}, _} ->
283+
{error, nodedown}
278284
end.

test/ra_2_SUITE.erl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ all_tests() ->
3636
cluster_is_deleted_with_server_down,
3737
cluster_cannot_be_deleted_in_minority,
3838
diverged_follower,
39+
start_server_noproc,
3940
server_restart_after_application_restart,
4041
restarted_server_does_not_reissue_side_effects,
4142
recover,
@@ -954,6 +955,27 @@ enq_deq_n(N, ServerId, Acc) ->
954955
true = Deq /= empty,
955956
enq_deq_n(N-1, ServerId, [Deq | Acc]).
956957

958+
start_server_noproc(Config) ->
959+
ClusterName = ?config(cluster_name, Config),
960+
PrivDir = ?config(priv_dir, Config),
961+
ServerId = ?config(server_id, Config),
962+
UId = ?config(uid, Config),
963+
Conf = conf(ClusterName, UId, ServerId, PrivDir, []),
964+
meck:new(ra_system, [passthrough]),
965+
meck:expect(ra_system, lookup_name,
966+
fun(?SYS, server_sup) ->
967+
{ok, ra_server_sup_sup_noproc_test};
968+
(Sys, Key) ->
969+
meck:passthrough([Sys, Key])
970+
end),
971+
try
972+
?assertEqual({error, system_not_started},
973+
ra:start_server(?SYS, Conf))
974+
after
975+
meck:unload(ra_system)
976+
end,
977+
ok.
978+
957979
conf(ClusterName, UId, ServerId, _, Peers) ->
958980
#{cluster_name => ClusterName,
959981
id => ServerId,

0 commit comments

Comments
 (0)