@@ -47,109 +47,152 @@ namespace madness {
4747 // / constant over two traversals. We are then we are sure
4848 // / that all tasks and AM are processed and there no AM in
4949 // / flight.
50+ // / \post `this->world_.taskq.size()==0`
5051 void WorldGopInterface::fence_impl (std::function<void ()> epilogue,
5152 bool pause_during_epilogue,
5253 bool debug) {
5354 PROFILE_MEMBER_FUNC (WorldGopInterface);
5455 MADNESS_CHECK (not forbid_fence_);
55- unsigned long nsent_prev=0 , nrecv_prev=1 ; // invalid initial condition
56+ unsigned long nsent_prev=0 , nrecv_prev=1 ;
5657 SafeMPI::Request req0, req1;
5758 ProcessID parent, child0, child1;
5859 world_.mpi .binary_tree_info (0 , parent, child0, child1);
5960 Tag gfence_tag = world_.mpi .unique_tag ();
6061 Tag bcast_tag = world_.mpi .unique_tag ();
6162 int npass = 0 ;
6263
63- // double start = wall_time();
64-
65- if (debug)
66- madness::print (world_.rank (), " : WORLD.GOP.FENCE: entering fence loop, gfence_tag=" , gfence_tag, " bcast_tag=" , bcast_tag);
67-
68- while (1 ) {
69- uint64_t sum0[2 ]={0 ,0 }, sum1[2 ]={0 ,0 }, sum[2 ];
70- if (child0 != -1 ) req0 = world_.mpi .Irecv ((void *) &sum0, sizeof (sum0), MPI_BYTE, child0, gfence_tag);
71- if (child1 != -1 ) req1 = world_.mpi .Irecv ((void *) &sum1, sizeof (sum1), MPI_BYTE, child1, gfence_tag);
64+ // fence ensures that all ranks agree that all sent AMs (nsent) have been
65+ // processed (nrecv) and that no tasks (ntask) are running. We ensure this by
66+ // observing the global sums of these local observables, and then ensuring
67+ // that the termination conditions have been met twice and
68+ // over two rounds of observations no messages have been met.
69+ // N.B. Epilogue and deferred cleanup can also generate messages, so
70+ // need to do another round of global synchronization after these
71+ // actions ... hence the lambda
72+
73+ auto termdet = [&]() {
74+ if (debug)
75+ madness::print (
76+ world_.rank (),
77+ " : WORLD.GOP.FENCE: entering termdet, gfence_tag=" ,
78+ gfence_tag, " bcast_tag=" , bcast_tag);
79+
80+ while (1 ) {
81+ uint64_t sum0[2 ] = {0 , 0 }, sum1[2 ] = {0 , 0 }, sum[2 ];
82+ if (child0 != -1 )
83+ req0 = world_.mpi .Irecv ((void *)&sum0, sizeof (sum0), MPI_BYTE,
84+ child0, gfence_tag);
85+ if (child1 != -1 )
86+ req1 = world_.mpi .Irecv ((void *)&sum1, sizeof (sum1), MPI_BYTE,
87+ child1, gfence_tag);
7288 world_.taskq .fence ();
73- if (child0 != -1 ) World::await (req0);
74- if (child1 != -1 ) World::await (req1);
89+ if (child0 != -1 )
90+ World::await (req0);
91+ if (child1 != -1 )
92+ World::await (req1);
7593
7694 if (debug && (child0 != -1 || child1 != -1 ))
77- madness::print (world_.rank (), " : WORLD.GOP.FENCE: npass=" , npass, " received messages from children={" , child0, " ," , child1, " } gfence_tag=" , gfence_tag);
95+ madness::print (world_.rank (),
96+ " : WORLD.GOP.FENCE: npass=" , npass,
97+ " received messages from children={" , child0,
98+ " ," , child1, " } gfence_tag=" , gfence_tag);
7899
79100 bool finished;
80101 uint64_t ntask1, nsent1, nrecv1, ntask2, nsent2, nrecv2;
81102 do {
82- world_.taskq .fence ();
103+ world_.taskq .fence ();
83104
84- // Since the number of outstanding tasks and number of AM sent/recv
85- // don't share a critical section read each twice and ensure they
86- // are unchanged to ensure that are consistent ... they don't have
87- // to be current.
105+ // Since the number of outstanding tasks and number of AM sent/recv
106+ // don't share a critical section there is no good way to obtain
107+ // their "current" values (i.e. their values at the same clock),
108+ // so read each twice and ensure they are unchanged to ensure
109+ // that are consistent ...
88110
89- ntask1 = world_.taskq .size ();
90- nsent1 = world_.am .nsent ;
91- nrecv1 = world_.am .nrecv ;
111+ nsent1 = world_.am .nsent ; // # of sent AM
112+ nrecv1 = world_.am .nrecv ; // # of processed incoming AM
113+ ntask1 = world_.taskq .size (); // current # of tasks; N.B. this was zero after the fence above but may be non-zero now
114+ // processing each incoming AMs may bump this up, so read it AFTER nrecv (albeit task completion will drop this again)
92115
93- __asm__ __volatile__ (" " : : : " memory" );
116+ __asm__ __volatile__ (" " : : : " memory" );
94117
95- ntask2 = world_.taskq . size () ;
96- nsent2 = world_.am .nsent ;
97- nrecv2 = world_.am . nrecv ;
118+ nsent2 = world_.am . nsent ;
119+ nrecv2 = world_.am .nrecv ;
120+ ntask2 = world_.taskq . size () ;
98121
99- __asm__ __volatile__ (" " : : : " memory" );
122+ __asm__ __volatile__ (" " : : : " memory" );
100123
101- finished = (ntask2== 0 ) && (ntask1== 0 ) && (nsent1==nsent2) && (nrecv1==nrecv2);
102- }
103- while (!finished);
124+ finished = (ntask2 == 0 ) && (ntask1 == 0 ) &&
125+ (nsent1 == nsent2) && (nrecv1 == nrecv2);
126+ } while (!finished);
104127
105- sum[0 ] = sum0[0 ] + sum1[0 ] + nsent2; // Must use values read above
128+ sum[0 ] =
129+ sum0[0 ] + sum1[0 ] + nsent2;
106130 sum[1 ] = sum0[1 ] + sum1[1 ] + nrecv2;
107131
108132 if (parent != -1 ) {
109- req0 = world_.mpi .Isend (&sum, sizeof (sum), MPI_BYTE, parent, gfence_tag);
110- if (debug)
111- madness::print (world_.rank (), " : WORLD.GOP.FENCE: npass=" , npass, " sent message to parent=" , parent, " gfence_tag=" , gfence_tag);
112- World::await (req0);
113- if (debug)
114- madness::print (world_.rank (), " : WORLD.GOP.FENCE: npass=" , npass, " parent=" , parent, " , confirmed receipt" );
133+ req0 = world_.mpi .Isend (&sum, sizeof (sum), MPI_BYTE, parent,
134+ gfence_tag);
135+ if (debug)
136+ madness::print (world_.rank (),
137+ " : WORLD.GOP.FENCE: npass=" , npass,
138+ " sent message to parent=" , parent,
139+ " gfence_tag=" , gfence_tag);
140+ World::await (req0);
141+ if (debug)
142+ madness::print (world_.rank (),
143+ " : WORLD.GOP.FENCE: npass=" , npass,
144+ " parent=" , parent, " , confirmed receipt" );
115145 }
116146
117- // While we are probably idle free unused communication buffers
118- // world_.am.free_managed_buffers();
119-
120- // bool dowork = (npass==0) || (ThreadPool::size()==0);
147+ // bool dowork = (npass==0) || (ThreadPool::size()==0);
121148 bool dowork = true ;
122149 broadcast (&sum, sizeof (sum), 0 , dowork, bcast_tag);
123150 ++npass;
124151
125152 if (debug)
126- madness::print (world_.rank (), " : WORLD.GOP.FENCE: npass=" , npass, " sum0=" , sum[0 ], " nsent_prev=" , nsent_prev, " sum1=" , sum[1 ], " nrecv_prev=" , nrecv_prev);
153+ madness::print (world_.rank (),
154+ " : WORLD.GOP.FENCE: npass=" , npass,
155+ " sum0=" , sum[0 ], " nsent_prev=" , nsent_prev,
156+ " sum1=" , sum[1 ], " nrecv_prev=" , nrecv_prev);
127157
128- if (sum[0 ]==sum[1 ] && sum[0 ]==nsent_prev && sum[1 ]==nrecv_prev) {
158+ if (sum[0 ] == sum[1 ] && sum[0 ] == nsent_prev &&
159+ sum[1 ] == nrecv_prev) {
129160 if (debug)
130- madness::print (world_.rank (), " : WORLD.GOP.FENCE: npass=" , npass, " exiting fence loop" );
161+ madness::print (world_.rank (),
162+ " : WORLD.GOP.FENCE: npass=" , npass,
163+ " exiting fence loop" );
131164 break ;
132165 }
133166
134- // if (wall_time() - start > 1200.0) {
135- // std::cout << rank() << " FENCE " << nsent2 << " "
136- // << nsent_prev << " " << nrecv2 << " " << nrecv_prev
137- // << " " << sum[0] << " " << sum[1] << " " << npass
138- // << " " << taskq.size() << std::endl;
139- // std::cout.flush();
140- // //myusleep(1000);
141- // MADNESS_ASSERT(0);
142- // }
167+ // if (wall_time() - start > 1200.0) {
168+ // std::cout << rank() << " FENCE " << nsent2 << " "
169+ // << nsent_prev << " " << nrecv2 << " " << nrecv_prev
170+ // << " " << sum[0] << " " << sum[1] << " " << npass
171+ // << " " << taskq.size() << std::endl;
172+ // std::cout.flush();
173+ // //myusleep(1000);
174+ // MADNESS_ASSERT(0);
175+ // }
143176
144177 nsent_prev = sum[0 ];
145178 nrecv_prev = sum[1 ];
179+ };
180+ }; // termdet
181+
182+ termdet ();
146183
147- };
148184 // execute post-fence actions
149185 MADNESS_ASSERT (pause_during_epilogue == false );
150186 epilogue ();
151187 world_.am .free_managed_buffers (); // free up communication buffers
152188 deferred_->do_cleanup ();
189+
190+ // repeat termination detection in case epilogue or cleanup produced tasks
191+ termdet ();
192+
193+ // ensure postcondition
194+ world_.taskq .fence ();
195+
153196#ifdef MADNESS_HAS_GOOGLE_PERF_TCMALLOC
154197 MallocExtension::instance ()->ReleaseFreeMemory ();
155198// print("clearing memory");
0 commit comments