@@ -505,7 +505,7 @@ int request_enqueue(REQUEST *request)
505505
506506 /*
507507 * If there are too many packets _overall_, OR the
508- * destinatio fifo is full, then try to delete a lower
508+ * destination fifo is full, then try to delete a lower
509509 * priority one.
510510 */
511511 if (((thread_pool .num_queued >= thread_pool .max_queue_size ) &&
@@ -547,24 +547,73 @@ int request_enqueue(REQUEST *request)
547547 return 1 ;
548548}
549549
550+ /*
551+ * Free a list of requests, WITHOUT holding the thread mutex.
552+ */
553+ static void request_list_free (fr_dlist_t * head )
554+ {
555+ fr_dlist_t * entry ;
556+
557+ while ((entry = fr_dlist_pop_head (head )) != NULL ) {
558+ REQUEST * request ;
559+
560+ request = (REQUEST * ) (((uint8_t * ) entry ) - offsetof(REQUEST , entry ));
561+ rad_assert (request -> magic == REQUEST_MAGIC );
562+
563+ /*
564+ * This entry was marked to be stopped. Acknowledge it.
565+ *
566+ * If we own the request, we delete it. Otherwise
567+ * we run the "done" callback now, which will
568+ * stop timers, remove it from the request hash,
569+ * update listener counts, etc.
570+ *
571+ * Running request_done() here means that old
572+ * requests are cleaned up immediately, which
573+ * frees up more resources for new requests. It
574+ * also means that we don't need to rely on
575+ * timers to free up the old requests, as those
576+ * timers will run much much later.
577+ *
578+ * Catching this corner case doesn't change the
579+ * normal operation of the server. Most requests
580+ * should NOT be marked "stop processing" when
581+ * they're in the queue. This situation
582+ * generally happens when the server is blocked,
583+ * due to a slow back-end database.
584+ */
585+ request -> child_state = REQUEST_DONE ;
586+ if (request -> master_state == REQUEST_TO_FREE ) {
587+ request_free (request );
588+ } else {
589+ request_done (request , REQUEST_DONE );
590+ }
591+ }
592+ }
593+
550594#ifndef HAVE_STDATOMIC_H
551595/*
552596 * Try to free up requests by discarding requests of lower priority.
553597 */
554598static int request_fifo_discard (int priority , bool lower , time_t now )
555599{
556- int i , rcode ;
600+ int i , rcode = 0 ;
557601 REQUEST * request ;
602+ fr_dlist_t free_list ;
603+
604+ fr_dlist_entry_init (& free_list );
558605
559606 if (lower ) {
560607 for (i = NUM_FIFOS - 1 ; i < priority ; i -- ) {
561608 request = fr_fifo_pop (thread_pool .fifo [i ]);
562609 if (!request ) continue ;
563610
564611 fr_assert (request -> child_state == REQUEST_QUEUED );
565- request -> child_state = REQUEST_DONE ;
566- request_done (request , FR_ACTION_DONE );
567- return 1 ;
612+
613+ request -> master_state = REQUEST_STOP_PROCESSING ;
614+ fr_dlist_insert_tail (& free_list , & request -> entry );
615+ rcode = 1 ;
616+ break ;
568617 }
569618
570619 /*
@@ -576,47 +625,48 @@ static int request_fifo_discard(int priority, bool lower, time_t now)
576625 }
577626
578627 /*
579- * The time stamp for proxied requests may be 5-10
580- * seconds in the past, because the home server hasn't
581- * responded. We therefore can 't discard "old" requests,
582- * as they have just received a reply, or they have just
583- * timed out.
628+ * We didn't free a lower priority one, try to free one from the current priority.
629+ *
630+ * We don 't free proxied requests based on time, as the time stamp for proxied requests may be
631+ * 5-10 seconds in the past, because the home server hasn't responded. We therefore can't
632+ * discard "old" requests, as they have just received a reply, or they have just timed out.
584633 */
585- if (priority <= RAD_LISTEN_PROXY ) return 0 ;
634+ if (!rcode && (priority > RAD_LISTEN_PROXY )) {
635+ while (true) {
636+ request = fr_fifo_peek (thread_pool .fifo [priority ]);
637+ if (!request ) break ;
586638
587- /*
588- * Peek at the first entry in the fifo. Note that there
589- * is not always a first entry. This is because we're
590- * called if there are too many _total_ requests.
591- */
592- rcode = 0 ;
593639
594- retry :
595- request = fr_fifo_peek (thread_pool .fifo [priority ]);
596- if (!request ) return rcode ;
640+ /*
641+ * This request expires in the future. We can't do anything.
642+ */
643+ if ((request -> timestamp + main_config .max_request_time ) > now ) break ;
597644
598- /*
599- * This request expires in the future. We can't do anything.
600- */
601- if ((request -> timestamp + main_config .max_request_time ) > now ) return rcode ;
645+ request = fr_fifo_pop (thread_pool .fifo [priority ]);
646+ rad_assert (request != NULL );
647+ VERIFY_REQUEST (request );
602648
603- request = fr_fifo_pop ( thread_pool . fifo [ priority ]) ;
604- rad_assert ( request != NULL );
605- VERIFY_REQUEST ( request ) ;
649+ request -> master_state = REQUEST_STOP_PROCESSING ;
650+ fr_dlist_insert_tail ( & free_list , & request -> entry );
651+ thread_pool . num_queued -- ;
606652
607- request -> child_state = REQUEST_DONE ;
608- if ( request -> master_state == REQUEST_TO_FREE ) {
609- request_free ( request );
610- } else {
611- request_done ( request , REQUEST_DONE );
653+ /*
654+ * We might as well delete as many old requests as possible.
655+ */
656+ rcode = 1 ;
657+ }
612658 }
613- thread_pool .num_queued -- ;
614659
615660 /*
616- * We might as well delete as many old requests as possible .
661+ * request_done() grabs a mutex. So we unlock this one, to avoid nested mutexes .
617662 */
618- rcode = 1 ;
619- goto retry ;
663+ if (!fr_dlist_empty (& free_list )) {
664+ pthread_mutex_unlock (& thread_pool .queue_mutex );
665+ request_list_free (& free_list );
666+ pthread_mutex_lock (& thread_pool .queue_mutex );
667+ }
668+
669+ return rcode ;
620670}
621671#endif
622672
@@ -634,12 +684,15 @@ static int request_dequeue(REQUEST **prequest)
634684#endif
635685 RAD_LISTEN_TYPE i ;
636686 REQUEST * request = NULL ;
687+ fr_dlist_t free_list ;
688+
689+ fr_dlist_entry_init (& free_list );
690+
637691 reap_children ();
638692
639693 rad_assert (pool_initialized == true);
640694
641695#ifdef HAVE_STDATOMIC_H
642- retry :
643696 for (i = 0 ; i < NUM_FIFOS ; i ++ ) {
644697 if (!fr_atomic_queue_pop (thread_pool .queue [i ], (void * * ) & request )) continue ;
645698
@@ -651,42 +704,9 @@ static int request_dequeue(REQUEST **prequest)
651704 break ;
652705 }
653706
654- /*
655- * This entry was marked to be stopped. Acknowledge it.
656- *
657- * If we own the request, we delete it. Otherwise
658- * we run the "done" callback now, which will
659- * stop timers, remove it from the request hash,
660- * update listener counts, etc.
661- *
662- * Running request_done() here means that old
663- * requests are cleaned up immediately, which
664- * frees up more resources for new requests. It
665- * also means that we don't need to rely on
666- * timers to free up the old requests, as those
667- * timers will run much much later.
668- *
669- * Catching this corner case doesn't change the
670- * normal operation of the server. Most requests
671- * should NOT be marked "stop processing" when
672- * they're in the queue. This situation
673- * generally happens when the server is blocked,
674- * due to a slow back-end database.
675- */
676- request -> child_state = REQUEST_DONE ;
677- if (request -> master_state == REQUEST_TO_FREE ) {
678- request_free (request );
679- } else {
680- request_done (request , REQUEST_DONE );
681- }
707+ fr_dlist_insert_tail (& free_list , & request -> entry );
682708 request = NULL ;
683709 }
684-
685- /*
686- * Popping might fail. If so, return.
687- */
688- if (!request ) return 0 ;
689-
690710#else
691711 pthread_mutex_lock (& thread_pool .queue_mutex );
692712
@@ -735,58 +755,59 @@ static int request_dequeue(REQUEST **prequest)
735755 rad_assert (request != NULL );
736756 VERIFY_REQUEST (request );
737757
738- request -> child_state = REQUEST_DONE ;
739- if (request -> master_state == REQUEST_TO_FREE ) {
740- request_free (request );
741- } else {
742- request_done (request , REQUEST_DONE );
743- }
758+ fr_dlist_insert_tail (& free_list , & request -> entry );
744759 thread_pool .num_queued -- ;
760+ request = NULL ;
745761 }
746762
747763 start = 0 ;
748- retry :
764+
749765 /*
750- * Pop results from the top of the queue
766+ * Pop the first request by priority.
751767 */
752768 for (i = start ; i < NUM_FIFOS ; i ++ ) {
753769 request = fr_fifo_pop (thread_pool .fifo [i ]);
754- if (request ) {
755- VERIFY_REQUEST (request );
756- start = i ;
757- break ;
770+ if (!request ) continue ;
771+
772+ VERIFY_REQUEST (request );
773+
774+ rad_assert (thread_pool .num_queued > 0 );
775+ thread_pool .num_queued -- ;
776+
777+ if (request -> master_state == REQUEST_STOP_PROCESSING ) {
778+ fr_dlist_insert_tail (& free_list , & request -> entry );
779+ request = NULL ;
780+ continue ;
758781 }
759- }
760782
761- if (!request ) {
762- pthread_mutex_unlock (& thread_pool .queue_mutex );
763- * prequest = NULL ;
764- return 0 ;
783+ start = i ;
784+ break ;
765785 }
766786
767- rad_assert (thread_pool .num_queued > 0 );
768- thread_pool .num_queued -- ;
787+ if (request ) thread_pool .active_threads ++ ;
788+
789+ pthread_mutex_unlock (& thread_pool .queue_mutex );
769790#endif /* HAVE_STD_ATOMIC_H */
770791
771792 * prequest = request ;
772793
773- rad_assert (* prequest != NULL );
774- rad_assert (request -> magic == REQUEST_MAGIC );
794+ /*
795+ * Clean up the free list, with all of the mutexes unlocked. request_done() will grab another
796+ * mutex to do its work, and we don't want to have nested mutexes.
797+ */
798+ if (!fr_dlist_empty (& free_list )) {
799+ request_list_free (& free_list );
800+ }
775801
776802 /*
777- * If the request has sat in the queue for too long,
778- * kill it.
779- *
780- * The main clean-up code can't delete the request from
781- * the queue, and therefore won't clean it up until we
782- * have acknowledged it as "done".
803+ * We didn't find a live request in the list. Return that.
783804 */
784- if (request -> master_state == REQUEST_STOP_PROCESSING ) {
785- request -> module = "<done>" ;
786- request -> child_state = REQUEST_DONE ;
787- goto retry ;
805+ if (!request ) {
806+ return 0 ;
788807 }
789808
809+ rad_assert (* prequest != NULL );
810+ rad_assert (request -> magic == REQUEST_MAGIC );
790811 rad_assert (request -> child_state == REQUEST_QUEUED );
791812
792813 request -> component = "<core>" ;
@@ -798,8 +819,6 @@ static int request_dequeue(REQUEST **prequest)
798819 */
799820#ifdef HAVE_STDATOMIC_H
800821 CAS_INCR (thread_pool .active_threads );
801- #else
802- thread_pool .active_threads ++ ;
803822#endif
804823
805824 blocked = time (NULL );
@@ -817,10 +836,6 @@ static int request_dequeue(REQUEST **prequest)
817836 blocked = 0 ;
818837 }
819838
820- #ifndef HAVE_STDATOMIC_H
821- pthread_mutex_unlock (& thread_pool .queue_mutex );
822- #endif
823-
824839 if (blocked ) {
825840 ERROR ("%d requests have been waiting in the processing queue for %d seconds. Check that all databases are running properly!" ,
826841 num_blocked , (int ) blocked );
0 commit comments