Skip to content

Commit 6ab4484

Browse files
committed
fence checks postcondition that there are no tasks
for this we ensure that: - local # of tasks is zero after reading AM counts - repeat global termdet after epilogue/cleanup
1 parent ad779d0 commit 6ab4484

2 files changed

Lines changed: 101 additions & 58 deletions

File tree

src/madness/world/worldgop.cc

Lines changed: 96 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -47,109 +47,152 @@ namespace madness {
4747
/// constant over two traversals. We are then we are sure
4848
/// that all tasks and AM are processed and there no AM in
4949
/// flight.
50+
/// \post `this->world_.taskq.size()==0`
5051
void WorldGopInterface::fence_impl(std::function<void()> epilogue,
5152
bool pause_during_epilogue,
5253
bool debug) {
5354
PROFILE_MEMBER_FUNC(WorldGopInterface);
5455
MADNESS_CHECK(not forbid_fence_);
55-
unsigned long nsent_prev=0, nrecv_prev=1; // invalid initial condition
56+
unsigned long nsent_prev=0, nrecv_prev=1;
5657
SafeMPI::Request req0, req1;
5758
ProcessID parent, child0, child1;
5859
world_.mpi.binary_tree_info(0, parent, child0, child1);
5960
Tag gfence_tag = world_.mpi.unique_tag();
6061
Tag bcast_tag = world_.mpi.unique_tag();
6162
int npass = 0;
6263

63-
//double start = wall_time();
64-
65-
if (debug)
66-
madness::print(world_.rank(), ": WORLD.GOP.FENCE: entering fence loop, gfence_tag=", gfence_tag, " bcast_tag=", bcast_tag);
67-
68-
while (1) {
69-
uint64_t sum0[2]={0,0}, sum1[2]={0,0}, sum[2];
70-
if (child0 != -1) req0 = world_.mpi.Irecv((void*) &sum0, sizeof(sum0), MPI_BYTE, child0, gfence_tag);
71-
if (child1 != -1) req1 = world_.mpi.Irecv((void*) &sum1, sizeof(sum1), MPI_BYTE, child1, gfence_tag);
64+
// fence ensures that all ranks agree that all sent AMs (nsent) have been
65+
// processed (nrecv) and that no tasks (ntask) are running. We ensure this by
66+
// observing the global sums of these local observables, and then ensuring
67+
// that the termination conditions have been met twice and
68+
// over two rounds of observations no messages have been met.
69+
// N.B. Epilogue and deferred cleanup can also generate messages, so
70+
// need to do another round of global synchronization after these
71+
// actions ... hence the lambda
72+
73+
auto termdet = [&]() {
74+
if (debug)
75+
madness::print(
76+
world_.rank(),
77+
": WORLD.GOP.FENCE: entering termdet, gfence_tag=",
78+
gfence_tag, " bcast_tag=", bcast_tag);
79+
80+
while (1) {
81+
uint64_t sum0[2] = {0, 0}, sum1[2] = {0, 0}, sum[2];
82+
if (child0 != -1)
83+
req0 = world_.mpi.Irecv((void *)&sum0, sizeof(sum0), MPI_BYTE,
84+
child0, gfence_tag);
85+
if (child1 != -1)
86+
req1 = world_.mpi.Irecv((void *)&sum1, sizeof(sum1), MPI_BYTE,
87+
child1, gfence_tag);
7288
world_.taskq.fence();
73-
if (child0 != -1) World::await(req0);
74-
if (child1 != -1) World::await(req1);
89+
if (child0 != -1)
90+
World::await(req0);
91+
if (child1 != -1)
92+
World::await(req1);
7593

7694
if (debug && (child0 != -1 || child1 != -1))
77-
madness::print(world_.rank(), ": WORLD.GOP.FENCE: npass=", npass, " received messages from children={", child0, ",", child1, "} gfence_tag=", gfence_tag);
95+
madness::print(world_.rank(),
96+
": WORLD.GOP.FENCE: npass=", npass,
97+
" received messages from children={", child0,
98+
",", child1, "} gfence_tag=", gfence_tag);
7899

79100
bool finished;
80101
uint64_t ntask1, nsent1, nrecv1, ntask2, nsent2, nrecv2;
81102
do {
82-
world_.taskq.fence();
103+
world_.taskq.fence();
83104

84-
// Since the number of outstanding tasks and number of AM sent/recv
85-
// don't share a critical section read each twice and ensure they
86-
// are unchanged to ensure that are consistent ... they don't have
87-
// to be current.
105+
// Since the number of outstanding tasks and number of AM sent/recv
106+
// don't share a critical section there is no good way to obtain
107+
// their "current" values (i.e. their values at the same clock),
108+
// so read each twice and ensure they are unchanged to ensure
109+
// that are consistent ...
88110

89-
ntask1 = world_.taskq.size();
90-
nsent1 = world_.am.nsent;
91-
nrecv1 = world_.am.nrecv;
111+
nsent1 = world_.am.nsent; // # of sent AM
112+
nrecv1 = world_.am.nrecv; // # of processed incoming AM
113+
ntask1 = world_.taskq.size(); // current # of tasks; N.B. this was zero after the fence above but may be non-zero now
114+
// processing each incoming AMs may bump this up, so read it AFTER nrecv (albeit task completion will drop this again)
92115

93-
__asm__ __volatile__ (" " : : : "memory");
116+
__asm__ __volatile__(" " : : : "memory");
94117

95-
ntask2 = world_.taskq.size();
96-
nsent2 = world_.am.nsent;
97-
nrecv2 = world_.am.nrecv;
118+
nsent2 = world_.am.nsent;
119+
nrecv2 = world_.am.nrecv;
120+
ntask2 = world_.taskq.size();
98121

99-
__asm__ __volatile__ (" " : : : "memory");
122+
__asm__ __volatile__(" " : : : "memory");
100123

101-
finished = (ntask2==0) && (ntask1==0) && (nsent1==nsent2) && (nrecv1==nrecv2);
102-
}
103-
while (!finished);
124+
finished = (ntask2 == 0) && (ntask1 == 0) &&
125+
(nsent1 == nsent2) && (nrecv1 == nrecv2);
126+
} while (!finished);
104127

105-
sum[0] = sum0[0] + sum1[0] + nsent2; // Must use values read above
128+
sum[0] =
129+
sum0[0] + sum1[0] + nsent2;
106130
sum[1] = sum0[1] + sum1[1] + nrecv2;
107131

108132
if (parent != -1) {
109-
req0 = world_.mpi.Isend(&sum, sizeof(sum), MPI_BYTE, parent, gfence_tag);
110-
if (debug)
111-
madness::print(world_.rank(), ": WORLD.GOP.FENCE: npass=", npass, " sent message to parent=", parent, " gfence_tag=", gfence_tag);
112-
World::await(req0);
113-
if (debug)
114-
madness::print(world_.rank(), ": WORLD.GOP.FENCE: npass=", npass, " parent=", parent, ", confirmed receipt");
133+
req0 = world_.mpi.Isend(&sum, sizeof(sum), MPI_BYTE, parent,
134+
gfence_tag);
135+
if (debug)
136+
madness::print(world_.rank(),
137+
": WORLD.GOP.FENCE: npass=", npass,
138+
" sent message to parent=", parent,
139+
" gfence_tag=", gfence_tag);
140+
World::await(req0);
141+
if (debug)
142+
madness::print(world_.rank(),
143+
": WORLD.GOP.FENCE: npass=", npass,
144+
" parent=", parent, ", confirmed receipt");
115145
}
116146

117-
// While we are probably idle free unused communication buffers
118-
//world_.am.free_managed_buffers();
119-
120-
//bool dowork = (npass==0) || (ThreadPool::size()==0);
147+
// bool dowork = (npass==0) || (ThreadPool::size()==0);
121148
bool dowork = true;
122149
broadcast(&sum, sizeof(sum), 0, dowork, bcast_tag);
123150
++npass;
124151

125152
if (debug)
126-
madness::print(world_.rank(), ": WORLD.GOP.FENCE: npass=", npass, " sum0=", sum[0], " nsent_prev=", nsent_prev, " sum1=", sum[1], " nrecv_prev=", nrecv_prev);
153+
madness::print(world_.rank(),
154+
": WORLD.GOP.FENCE: npass=", npass,
155+
" sum0=", sum[0], " nsent_prev=", nsent_prev,
156+
" sum1=", sum[1], " nrecv_prev=", nrecv_prev);
127157

128-
if (sum[0]==sum[1] && sum[0]==nsent_prev && sum[1]==nrecv_prev) {
158+
if (sum[0] == sum[1] && sum[0] == nsent_prev &&
159+
sum[1] == nrecv_prev) {
129160
if (debug)
130-
madness::print(world_.rank(), ": WORLD.GOP.FENCE: npass=", npass, " exiting fence loop");
161+
madness::print(world_.rank(),
162+
": WORLD.GOP.FENCE: npass=", npass,
163+
" exiting fence loop");
131164
break;
132165
}
133166

134-
// if (wall_time() - start > 1200.0) {
135-
// std::cout << rank() << " FENCE " << nsent2 << " "
136-
// << nsent_prev << " " << nrecv2 << " " << nrecv_prev
137-
// << " " << sum[0] << " " << sum[1] << " " << npass
138-
// << " " << taskq.size() << std::endl;
139-
// std::cout.flush();
140-
// //myusleep(1000);
141-
// MADNESS_ASSERT(0);
142-
// }
167+
// if (wall_time() - start > 1200.0) {
168+
// std::cout << rank() << " FENCE " << nsent2 << " "
169+
// << nsent_prev << " " << nrecv2 << " " << nrecv_prev
170+
// << " " << sum[0] << " " << sum[1] << " " << npass
171+
// << " " << taskq.size() << std::endl;
172+
// std::cout.flush();
173+
// //myusleep(1000);
174+
// MADNESS_ASSERT(0);
175+
// }
143176

144177
nsent_prev = sum[0];
145178
nrecv_prev = sum[1];
179+
};
180+
}; // termdet
181+
182+
termdet();
146183

147-
};
148184
// execute post-fence actions
149185
MADNESS_ASSERT(pause_during_epilogue == false);
150186
epilogue();
151187
world_.am.free_managed_buffers(); // free up communication buffers
152188
deferred_->do_cleanup();
189+
190+
// repeat termination detection in case epilogue or cleanup produced tasks
191+
termdet();
192+
193+
// ensure postcondition
194+
world_.taskq.fence();
195+
153196
#ifdef MADNESS_HAS_GOOGLE_PERF_TCMALLOC
154197
MallocExtension::instance()->ReleaseFreeMemory();
155198
// print("clearing memory");

src/madness/world/worldgop.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -706,14 +706,14 @@ namespace madness {
706706

707707
/// Synchronizes all processes in communicator AND globally ensures no pending AM or tasks
708708

709-
/// \internal Runs Dykstra-like termination algorithm on binary tree by
710-
/// locally ensuring ntask=0 and all am sent and processed,
711-
/// and then participating in a global sum of nsent and nrecv.
712-
/// Then globally checks that nsent=nrecv and that both are
713-
/// constant over two traversals. We are then sure
709+
/// \internal Runs Dykstra-like termination algorithm on binary tree
710+
/// which stops when global sum of # of tasks in queue (`ntask`) is
711+
/// zero and global sum of the # of sent/received AMs (`nsent`/`nrecv`)
712+
/// are equal and unchanged over two traversals. We are then sure
714713
/// that all tasks and AM are processed and there no AM in
715714
/// flight.
716715
/// \param[in] debug set to true to print progress statistics using madness::print(); the default is false.
716+
/// \post `this->gop.taskq.size()==0`
717717
void fence(bool debug = false);
718718

719719
/// Executes an action on single (this) thread after ensuring all other work is done

0 commit comments

Comments
 (0)