@@ -91,19 +91,61 @@ namespace chunk_assignment
9191 PartialAssignment ( ChunkTable notAssigned, ChunkTable assigned );
9292 };
9393
94+ /* *
95+ * @brief Interface for a chunk distribution strategy.
96+ *
97+ * Used for implementing algorithms that read a ChunkTable as produced
98+ * by BaseRecordComponent::availableChunks() and produce as result a
99+ * ChunkTable that guides data sinks on how to load data into reading
100+ * processes.
101+ */
94102 struct Strategy
95103 {
104+ /* *
105+ * @brief Assign chunks to be loaded to reading processes.
106+ *
107+ * @param partialAssignment Two chunktables, one of unassigned chunks
108+ * and one of chunks that might have already been assigned
109+ * previously.
110+ * Merge the unassigned chunks into the partially assigned table.
111+ * @param in Meta information on writing processes, e.g. hostnames.
112+ * @param out Meta information on reading processes, e.g. hostnames.
113+ * @return ChunkTable A table that assigns chunks to reading processes.
114+ */
96115 virtual ChunkTable
97116 assign (
98- PartialAssignment,
117+ PartialAssignment partialAssignment ,
99118 RankMeta const & in,
100119 RankMeta const & out ) = 0 ;
101120
102121 virtual ~Strategy () = default ;
103122 };
104123
124+ /* *
125+ * @brief A chunk distribution strategy that guarantees no complete
126+ * distribution.
127+ *
128+ * Combine with a full Strategy using the FromPartialStrategy struct to
129+ * obtain a Strategy that works in two phases:
130+ * 1. Apply the partial strategy.
131+ * 2. Apply the full strategy to assign unassigned leftovers.
132+ *
133+ */
105134 struct PartialStrategy
106135 {
136+ /* *
137+ * @brief Assign chunks to be loaded to reading processes.
138+ *
139+ * @param partialAssignment Two chunktables, one of unassigned chunks
140+ * and one of chunks that might have already been assigned
141+ * previously.
142+ * Merge the unassigned chunks into the partially assigned table.
143+ * @param in Meta information on writing processes, e.g. hostnames.
144+ * @param out Meta information on reading processes, e.g. hostnames.
145+ * @return PartialAssignment Two chunktables, one of leftover chunks
146+ * that were not assigned and one that assigns chunks to
147+ * reading processes.
148+ */
107149 virtual PartialAssignment
108150 assign (
109151 PartialAssignment,
@@ -120,6 +162,18 @@ namespace chunk_assignment
120162 RankMeta const & rankMetaOut,
121163 Strategy & strategy );
122164
165+ /* *
166+ * @brief Combine a PartialStrategy and a Strategy to obtain a Strategy
167+ * working in two phases.
168+ *
169+ * 1. Apply the PartialStrategy to obtain a PartialAssignment.
170+ * This may be a heuristic that will not work under all circumstances,
171+ * e.g. trying to distribute chunks within the same compute node.
172+ * 2. Apply the Strategy to assign leftovers.
173+ * This guarantees correctness in case the heuristics in the first phase
174+ * were not applicable e.g. due to a suboptimal setup.
175+ *
176+ */
123177 struct FromPartialStrategy : Strategy
124178 {
125179 FromPartialStrategy (
@@ -134,12 +188,25 @@ namespace chunk_assignment
134188 std::unique_ptr< Strategy > m_secondPass;
135189 };
136190
191+ /* *
192+ * @brief Simple strategy that assigns produced chunks to reading processes
193+ * in a round-Robin manner.
194+ *
195+ */
137196 struct RoundRobin : Strategy
138197 {
139198 ChunkTable
140199 assign ( PartialAssignment, RankMeta const & in, RankMeta const & out );
141200 };
142201
202+ /* *
203+ * @brief Strategy that assigns chunks to be read by processes within
204+ * the same host that produced the chunk.
205+ *
206+ * The distribution strategy within one such chunk can be flexibly
207+ * chosen.
208+ *
209+ */
143210 struct ByHostname : PartialStrategy
144211 {
145212 ByHostname ( std::unique_ptr< Strategy > withinNode );
@@ -152,6 +219,16 @@ namespace chunk_assignment
152219 std::unique_ptr< Strategy > m_withinNode;
153220 };
154221
222+ /* *
223+ * @brief Slice the n-dimensional dataset into hyperslabs and distribute
224+ * chunks according to them.
225+ *
226+ * This strategy only produces chunks in the returned ChunkTable for the
227+ * calling parallel process.
228+ * Incoming chunks are intersected with the hyperslab and assigned to the
229+ * current parallel process in case this intersection is non-empty.
230+ *
231+ */
155232 struct ByCuboidSlice : Strategy
156233 {
157234 ByCuboidSlice (
@@ -170,6 +247,18 @@ namespace chunk_assignment
170247 unsigned int mpi_rank, mpi_size;
171248 };
172249
250+ /* *
251+ * @brief Strategy that tries to assign chunks in a balanced manner without
252+ * arbitrarily cutting chunks.
253+ *
254+ * Idea:
255+ * Calculate the ideal amount of data to be loaded per parallel process
256+ * and cut chunks s.t. no chunk is larger than that ideal size.
257+ * The resulting problem is an instance of the Bin-Packing problem which
258+ * can be solved by a factor-2 approximation, meaning that a reading process
259+ * will be assigned at worst twice the ideal amount of data.
260+ *
261+ */
173262 struct BinPacking : Strategy
174263 {
175264 ChunkTable
0 commit comments