Skip to content

Commit a41aa38

Browse files
committed
Use unlang_interpret_force_result for dead home servers
In the case where there are no status checks and revive_interval is used to assume a home server has come back to life. This means that during the period when the home server is marked as dead, the module fails immediately so failover is efficient. A different approach is needed for dynamic home servers, since the same instruction can be used for many different home servers.
1 parent 0b58418 commit a41aa38

1 file changed

Lines changed: 26 additions & 2 deletions

File tree

  • src/modules/rlm_radius

src/modules/rlm_radius/bio.c

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ typedef struct {
4949
fr_bio_fd_info_t const *fd_info; //!< status of the FD.
5050
fr_radius_ctx_t radius_ctx; //!< for signing packets
5151
bio_limit_ports_t limit_source_ports; //!< What type of port limit is in use.
52+
bool dynamic; //!< is this a dynamic home server.
5253
} bio_handle_ctx_t;
5354

5455
typedef struct {
@@ -99,6 +100,7 @@ typedef struct {
99100
fr_time_t last_idle; //!< last time we had nothing to do
100101

101102
fr_timer_t *zombie_ev; //!< Zombie timeout.
103+
unlang_t const *instruction; //!< Instruction which triggered the start of the zombie period.
102104

103105
bool status_checking; //!< whether we're doing status checks
104106
bio_request_t *status_u; //!< for sending status check packets
@@ -1386,6 +1388,21 @@ static void zombie_timeout(fr_timer_list_t *tl, fr_time_t now, void *uctx)
13861388
return;
13871389
}
13881390

1391+
if (!h->ctx.dynamic) {
1392+
/*
1393+
* Force the instruction to immediately fail until the revive interval has expired.
1394+
*/
1395+
unlang_interpret_force_result(h->instruction, &(unlang_result_t){.rcode = RLM_MODULE_FAIL}, tl,
1396+
h->ctx.inst->revive_interval);
1397+
1398+
/*
1399+
* Mark the connection as active, so when the module forced result times out
1400+
* requests will be sent again.
1401+
*/
1402+
trunk_connection_signal_active(tconn);
1403+
return;
1404+
}
1405+
13891406
/*
13901407
* Revive the connection after a time.
13911408
*/
@@ -1421,9 +1438,10 @@ static void zombie_timeout(fr_timer_list_t *tl, fr_time_t now, void *uctx)
14211438
* - true if the connection is zombie.
14221439
* - false if the connection is not zombie.
14231440
*/
1424-
static bool check_for_zombie(fr_event_list_t *el, trunk_connection_t *tconn, fr_time_t now, fr_time_t last_sent)
1441+
static bool check_for_zombie(request_t *request, trunk_connection_t *tconn, fr_time_t now, fr_time_t last_sent)
14251442
{
14261443
bio_handle_t *h = talloc_get_type_abort(tconn->conn->h, bio_handle_t);
1444+
fr_event_list_t *el = unlang_interpret_event_list(request);
14271445

14281446
/*
14291447
* We're replicating, and don't care about the health of
@@ -1478,6 +1496,11 @@ static bool check_for_zombie(fr_event_list_t *el, trunk_connection_t *tconn, fr_
14781496
trunk_connection_signal_reconnect(tconn, CONNECTION_FAILED);
14791497
}
14801498
} else {
1499+
/*
1500+
* Capture the instruction which started the zombie period.
1501+
*/
1502+
h->instruction = unlang_interpret_instruction(request);
1503+
14811504
if (fr_timer_at(h, el->tl, &h->zombie_ev, fr_time_add(now, h->ctx.inst->zombie_period),
14821505
false, zombie_timeout, tconn) < 0) {
14831506
ERROR("Failed inserting zombie timeout for connection");
@@ -1611,7 +1634,7 @@ static void do_retry(rlm_radius_t const *inst, bio_request_t *u, request_t *requ
16111634
*/
16121635
if (!tconn || (inst->mode == RLM_RADIUS_MODE_REPLICATE)) return;
16131636

1614-
check_for_zombie(unlang_interpret_event_list(request), tconn, now, retry->start);
1637+
check_for_zombie(request, tconn, now, retry->start);
16151638
}
16161639

16171640
CC_NO_UBSAN(function) /* UBSAN: false positive - public vs private connection_t trips --fsanitize=function*/
@@ -2961,6 +2984,7 @@ static xlat_action_t xlat_radius_client(UNUSED TALLOC_CTX *ctx, UNUSED fr_dcurso
29612984
.module_name = inst->name,
29622985
.inst = inst,
29632986
.limit_source_ports = (thread->num_ports > 0) ? LIMIT_PORTS_DYNAMIC : LIMIT_PORTS_NONE,
2987+
.dynamic = true,
29642988
},
29652989
.num_ports = thread->num_ports,
29662990
};

0 commit comments

Comments
 (0)