Skip to content

Commit 511a56f

Browse files
committed
move the cleanups of request_done to a free list
so that we can call request_done() with the thread mutex unlocked
1 parent 3f4792e commit 511a56f

3 files changed

Lines changed: 140 additions & 108 deletions

File tree

src/include/dlist.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ static inline void fr_dlist_entry_init(fr_dlist_t *entry)
4545
entry->prev = entry->next = entry;
4646
}
4747

48+
static inline bool fr_dlist_empty(fr_dlist_t *head)
49+
{
50+
return ((head->prev == head) && (head->next == head));
51+
}
52+
4853
static inline CC_HINT(nonnull) void fr_dlist_entry_unlink(fr_dlist_t *entry)
4954
{
5055
entry->prev->next = entry->next;
@@ -60,4 +65,14 @@ static inline CC_HINT(nonnull) void fr_dlist_insert_tail(fr_dlist_t *head, fr_dl
6065
head->prev = entry;
6166
}
6267

68+
static inline CC_HINT(nonnull) fr_dlist_t *fr_dlist_pop_head(fr_dlist_t *head)
69+
{
70+
fr_dlist_t *entry = head->next;
71+
72+
if (entry == head) return NULL;
73+
74+
fr_dlist_entry_unlink(entry);
75+
return entry;
76+
}
77+
6378
#endif /* RADIUS_DLIST_H */

src/include/radiusd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ RCSIDH(radiusd_h, "$Id$")
3232
#include <freeradius-devel/conffile.h>
3333
#include <freeradius-devel/event.h>
3434
#include <freeradius-devel/connection.h>
35+
#include <freeradius-devel/dlist.h>
3536

3637
typedef struct rad_request REQUEST;
3738

@@ -321,6 +322,7 @@ struct rad_request {
321322
uint32_t num_coa_requests;//!< Counter for number of requests sent including
322323
//!< retransmits.
323324
#endif
325+
fr_dlist_t entry; //!< for the free list
324326
}; /* REQUEST typedef */
325327

326328
#define RAD_REQUEST_LVL_NONE (0) //!< No debug messages should be printed.

src/main/threads.c

Lines changed: 123 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ int request_enqueue(REQUEST *request)
505505

506506
/*
507507
* If there are too many packets _overall_, OR the
508-
* destinatio fifo is full, then try to delete a lower
508+
* destination fifo is full, then try to delete a lower
509509
* priority one.
510510
*/
511511
if (((thread_pool.num_queued >= thread_pool.max_queue_size) &&
@@ -547,24 +547,73 @@ int request_enqueue(REQUEST *request)
547547
return 1;
548548
}
549549

550+
/*
551+
* Free a list of requests, WITHOUT holding the thread mutex.
552+
*/
553+
static void request_list_free(fr_dlist_t *head)
554+
{
555+
fr_dlist_t *entry;
556+
557+
while ((entry = fr_dlist_pop_head(head)) != NULL) {
558+
REQUEST *request;
559+
560+
request = (REQUEST *) (((uint8_t *) entry) - offsetof(REQUEST, entry));
561+
rad_assert(request->magic == REQUEST_MAGIC);
562+
563+
/*
564+
* This entry was marked to be stopped. Acknowledge it.
565+
*
566+
* If we own the request, we delete it. Otherwise
567+
* we run the "done" callback now, which will
568+
* stop timers, remove it from the request hash,
569+
* update listener counts, etc.
570+
*
571+
* Running request_done() here means that old
572+
* requests are cleaned up immediately, which
573+
* frees up more resources for new requests. It
574+
* also means that we don't need to rely on
575+
* timers to free up the old requests, as those
576+
* timers will run much much later.
577+
*
578+
* Catching this corner case doesn't change the
579+
* normal operation of the server. Most requests
580+
* should NOT be marked "stop processing" when
581+
* they're in the queue. This situation
582+
* generally happens when the server is blocked,
583+
* due to a slow back-end database.
584+
*/
585+
request->child_state = REQUEST_DONE;
586+
if (request->master_state == REQUEST_TO_FREE) {
587+
request_free(request);
588+
} else {
589+
request_done(request, REQUEST_DONE);
590+
}
591+
}
592+
}
593+
550594
#ifndef HAVE_STDATOMIC_H
551595
/*
552596
* Try to free up requests by discarding requests of lower priority.
553597
*/
554598
static int request_fifo_discard(int priority, bool lower, time_t now)
555599
{
556-
int i, rcode;
600+
int i, rcode = 0;
557601
REQUEST *request;
602+
fr_dlist_t free_list;
603+
604+
fr_dlist_entry_init(&free_list);
558605

559606
if (lower) {
560607
for (i = NUM_FIFOS - 1; i < priority; i--) {
561608
request = fr_fifo_pop(thread_pool.fifo[i]);
562609
if (!request) continue;
563610

564611
fr_assert(request->child_state == REQUEST_QUEUED);
565-
request->child_state = REQUEST_DONE;
566-
request_done(request, FR_ACTION_DONE);
567-
return 1;
612+
613+
request->master_state = REQUEST_STOP_PROCESSING;
614+
fr_dlist_insert_tail(&free_list, &request->entry);
615+
rcode = 1;
616+
break;
568617
}
569618

570619
/*
@@ -576,47 +625,48 @@ static int request_fifo_discard(int priority, bool lower, time_t now)
576625
}
577626

578627
/*
579-
* The time stamp for proxied requests may be 5-10
580-
* seconds in the past, because the home server hasn't
581-
* responded. We therefore can't discard "old" requests,
582-
* as they have just received a reply, or they have just
583-
* timed out.
628+
* We didn't free a lower priority one, try to free one from the current priority.
629+
*
630+
* We don't free proxied requests based on time, as the time stamp for proxied requests may be
631+
* 5-10 seconds in the past, because the home server hasn't responded. We therefore can't
632+
* discard "old" requests, as they have just received a reply, or they have just timed out.
584633
*/
585-
if (priority <= RAD_LISTEN_PROXY) return 0;
634+
if (!rcode && (priority > RAD_LISTEN_PROXY)) {
635+
while (true) {
636+
request = fr_fifo_peek(thread_pool.fifo[priority]);
637+
if (!request) break;
586638

587-
/*
588-
* Peek at the first entry in the fifo. Note that there
589-
* is not always a first entry. This is because we're
590-
* called if there are too many _total_ requests.
591-
*/
592-
rcode = 0;
593639

594-
retry:
595-
request = fr_fifo_peek(thread_pool.fifo[priority]);
596-
if (!request) return rcode;
640+
/*
641+
* This request expires in the future. We can't do anything.
642+
*/
643+
if ((request->timestamp + main_config.max_request_time) > now) break;
597644

598-
/*
599-
* This request expires in the future. We can't do anything.
600-
*/
601-
if ((request->timestamp + main_config.max_request_time) > now) return rcode;
645+
request = fr_fifo_pop(thread_pool.fifo[priority]);
646+
rad_assert(request != NULL);
647+
VERIFY_REQUEST(request);
602648

603-
request = fr_fifo_pop(thread_pool.fifo[priority]);
604-
rad_assert(request != NULL);
605-
VERIFY_REQUEST(request);
649+
request->master_state = REQUEST_STOP_PROCESSING;
650+
fr_dlist_insert_tail(&free_list, &request->entry);
651+
thread_pool.num_queued--;
606652

607-
request->child_state = REQUEST_DONE;
608-
if (request->master_state == REQUEST_TO_FREE) {
609-
request_free(request);
610-
} else {
611-
request_done(request, REQUEST_DONE);
653+
/*
654+
* We might as well delete as many old requests as possible.
655+
*/
656+
rcode = 1;
657+
}
612658
}
613-
thread_pool.num_queued--;
614659

615660
/*
616-
* We might as well delete as many old requests as possible.
661+
* request_done() grabs a mutex. So we unlock this one, to avoid nested mutexes.
617662
*/
618-
rcode = 1;
619-
goto retry;
663+
if (!fr_dlist_empty(&free_list)) {
664+
pthread_mutex_unlock(&thread_pool.queue_mutex);
665+
request_list_free(&free_list);
666+
pthread_mutex_lock(&thread_pool.queue_mutex);
667+
}
668+
669+
return rcode;
620670
}
621671
#endif
622672

@@ -634,12 +684,15 @@ static int request_dequeue(REQUEST **prequest)
634684
#endif
635685
RAD_LISTEN_TYPE i;
636686
REQUEST *request = NULL;
687+
fr_dlist_t free_list;
688+
689+
fr_dlist_entry_init(&free_list);
690+
637691
reap_children();
638692

639693
rad_assert(pool_initialized == true);
640694

641695
#ifdef HAVE_STDATOMIC_H
642-
retry:
643696
for (i = 0; i < NUM_FIFOS; i++) {
644697
if (!fr_atomic_queue_pop(thread_pool.queue[i], (void **) &request)) continue;
645698

@@ -651,42 +704,9 @@ static int request_dequeue(REQUEST **prequest)
651704
break;
652705
}
653706

654-
/*
655-
* This entry was marked to be stopped. Acknowledge it.
656-
*
657-
* If we own the request, we delete it. Otherwise
658-
* we run the "done" callback now, which will
659-
* stop timers, remove it from the request hash,
660-
* update listener counts, etc.
661-
*
662-
* Running request_done() here means that old
663-
* requests are cleaned up immediately, which
664-
* frees up more resources for new requests. It
665-
* also means that we don't need to rely on
666-
* timers to free up the old requests, as those
667-
* timers will run much much later.
668-
*
669-
* Catching this corner case doesn't change the
670-
* normal operation of the server. Most requests
671-
* should NOT be marked "stop processing" when
672-
* they're in the queue. This situation
673-
* generally happens when the server is blocked,
674-
* due to a slow back-end database.
675-
*/
676-
request->child_state = REQUEST_DONE;
677-
if (request->master_state == REQUEST_TO_FREE) {
678-
request_free(request);
679-
} else {
680-
request_done(request, REQUEST_DONE);
681-
}
707+
fr_dlist_insert_tail(&free_list, &request->entry);
682708
request = NULL;
683709
}
684-
685-
/*
686-
* Popping might fail. If so, return.
687-
*/
688-
if (!request) return 0;
689-
690710
#else
691711
pthread_mutex_lock(&thread_pool.queue_mutex);
692712

@@ -735,58 +755,59 @@ static int request_dequeue(REQUEST **prequest)
735755
rad_assert(request != NULL);
736756
VERIFY_REQUEST(request);
737757

738-
request->child_state = REQUEST_DONE;
739-
if (request->master_state == REQUEST_TO_FREE) {
740-
request_free(request);
741-
} else {
742-
request_done(request, REQUEST_DONE);
743-
}
758+
fr_dlist_insert_tail(&free_list, &request->entry);
744759
thread_pool.num_queued--;
760+
request = NULL;
745761
}
746762

747763
start = 0;
748-
retry:
764+
749765
/*
750-
* Pop results from the top of the queue
766+
* Pop the first request by priority.
751767
*/
752768
for (i = start; i < NUM_FIFOS; i++) {
753769
request = fr_fifo_pop(thread_pool.fifo[i]);
754-
if (request) {
755-
VERIFY_REQUEST(request);
756-
start = i;
757-
break;
770+
if (!request) continue;
771+
772+
VERIFY_REQUEST(request);
773+
774+
rad_assert(thread_pool.num_queued > 0);
775+
thread_pool.num_queued--;
776+
777+
if (request->master_state == REQUEST_STOP_PROCESSING) {
778+
fr_dlist_insert_tail(&free_list, &request->entry);
779+
request = NULL;
780+
continue;
758781
}
759-
}
760782

761-
if (!request) {
762-
pthread_mutex_unlock(&thread_pool.queue_mutex);
763-
*prequest = NULL;
764-
return 0;
783+
start = i;
784+
break;
765785
}
766786

767-
rad_assert(thread_pool.num_queued > 0);
768-
thread_pool.num_queued--;
787+
if (request) thread_pool.active_threads++;
788+
789+
pthread_mutex_unlock(&thread_pool.queue_mutex);
769790
#endif /* HAVE_STD_ATOMIC_H */
770791

771792
*prequest = request;
772793

773-
rad_assert(*prequest != NULL);
774-
rad_assert(request->magic == REQUEST_MAGIC);
794+
/*
795+
* Clean up the free list, with all of the mutexes unlocked. request_done() will grab another
796+
* mutex to do its work, and we don't want to have nested mutexes.
797+
*/
798+
if (!fr_dlist_empty(&free_list)) {
799+
request_list_free(&free_list);
800+
}
775801

776802
/*
777-
* If the request has sat in the queue for too long,
778-
* kill it.
779-
*
780-
* The main clean-up code can't delete the request from
781-
* the queue, and therefore won't clean it up until we
782-
* have acknowledged it as "done".
803+
* We didn't find a live request in the list. Return that.
783804
*/
784-
if (request->master_state == REQUEST_STOP_PROCESSING) {
785-
request->module = "<done>";
786-
request->child_state = REQUEST_DONE;
787-
goto retry;
805+
if (!request) {
806+
return 0;
788807
}
789808

809+
rad_assert(*prequest != NULL);
810+
rad_assert(request->magic == REQUEST_MAGIC);
790811
rad_assert(request->child_state == REQUEST_QUEUED);
791812

792813
request->component = "<core>";
@@ -798,8 +819,6 @@ static int request_dequeue(REQUEST **prequest)
798819
*/
799820
#ifdef HAVE_STDATOMIC_H
800821
CAS_INCR(thread_pool.active_threads);
801-
#else
802-
thread_pool.active_threads++;
803822
#endif
804823

805824
blocked = time(NULL);
@@ -817,10 +836,6 @@ static int request_dequeue(REQUEST **prequest)
817836
blocked = 0;
818837
}
819838

820-
#ifndef HAVE_STDATOMIC_H
821-
pthread_mutex_unlock(&thread_pool.queue_mutex);
822-
#endif
823-
824839
if (blocked) {
825840
ERROR("%d requests have been waiting in the processing queue for %d seconds. Check that all databases are running properly!",
826841
num_blocked, (int) blocked);

0 commit comments

Comments
 (0)