@@ -232,4 +232,159 @@ TEST(DryRunExecute, ExceptionSafety)
232232 EXPECT_FALSE (resource::get_dry_run_flag (res));
233233}
234234
235+ // ===== Independent-counting tests for dry_run_resources =====
236+
237+ TEST (DryRunResources, IndependentCounting_DefaultWorkspace)
238+ {
239+ raft::resources res;
240+
241+ dry_run_resources dry_res (res);
242+
243+ constexpr std::size_t kWsSize = 1024 ;
244+ constexpr std::size_t kGlobalSize = 2048 ;
245+
246+ auto * ws_mr = resource::get_workspace_resource (dry_res);
247+ void * ws_ptr = ws_mr->allocate (rmm::cuda_stream_view{}, kWsSize );
248+
249+ auto * dev_mr = rmm::mr::get_current_device_resource ();
250+ void * dev_ptr = dev_mr->allocate (rmm::cuda_stream_view{}, kGlobalSize );
251+
252+ auto peak = dry_res.get_bytes_peak ();
253+ EXPECT_EQ (peak.device_workspace , kWsSize );
254+ EXPECT_EQ (peak.device_global , kGlobalSize );
255+ EXPECT_EQ (peak.total (), kWsSize + kGlobalSize );
256+
257+ ws_mr->deallocate (rmm::cuda_stream_view{}, ws_ptr, kWsSize );
258+ dev_mr->deallocate (rmm::cuda_stream_view{}, dev_ptr, kGlobalSize );
259+ }
260+
261+ TEST (DryRunResources, IndependentCounting_WorkspaceSetToGlobal)
262+ {
263+ raft::resources res;
264+ resource::set_workspace_to_global_resource (res);
265+
266+ dry_run_resources dry_res (res);
267+
268+ constexpr std::size_t kWsSize = 1024 ;
269+ constexpr std::size_t kGlobalSize = 2048 ;
270+
271+ auto * ws_mr = resource::get_workspace_resource (dry_res);
272+ void * ws_ptr = ws_mr->allocate (rmm::cuda_stream_view{}, kWsSize );
273+
274+ auto * dev_mr = rmm::mr::get_current_device_resource ();
275+ void * dev_ptr = dev_mr->allocate (rmm::cuda_stream_view{}, kGlobalSize );
276+
277+ auto peak = dry_res.get_bytes_peak ();
278+ EXPECT_EQ (peak.device_workspace , kWsSize );
279+ EXPECT_EQ (peak.device_global , kGlobalSize );
280+ EXPECT_EQ (peak.total (), kWsSize + kGlobalSize );
281+
282+ ws_mr->deallocate (rmm::cuda_stream_view{}, ws_ptr, kWsSize );
283+ dev_mr->deallocate (rmm::cuda_stream_view{}, dev_ptr, kGlobalSize );
284+ }
285+
286+ // ===== Independent-counting tests for memory_stats_resources =====
287+
288+ TEST (MemoryStatsResources, IndependentCounting_DefaultWorkspace)
289+ {
290+ raft::resources res;
291+
292+ memory_stats_resources stat_res (res);
293+
294+ constexpr std::size_t kWsSize = 1024 ;
295+ constexpr std::size_t kGlobalSize = 2048 ;
296+
297+ auto * ws_mr = resource::get_workspace_resource (stat_res);
298+ void * ws_ptr = ws_mr->allocate (rmm::cuda_stream_view{}, kWsSize );
299+
300+ auto * dev_mr = rmm::mr::get_current_device_resource ();
301+ void * dev_ptr = dev_mr->allocate (rmm::cuda_stream_view{}, kGlobalSize );
302+
303+ auto peak = stat_res.get_bytes_peak ();
304+ EXPECT_EQ (peak.device_workspace , kWsSize );
305+ EXPECT_EQ (peak.device_global , kGlobalSize );
306+ EXPECT_EQ (peak.total (), kWsSize + kGlobalSize );
307+
308+ ws_mr->deallocate (rmm::cuda_stream_view{}, ws_ptr, kWsSize );
309+ dev_mr->deallocate (rmm::cuda_stream_view{}, dev_ptr, kGlobalSize );
310+ }
311+
312+ TEST (MemoryStatsResources, IndependentCounting_WorkspaceSetToGlobal)
313+ {
314+ raft::resources res;
315+ resource::set_workspace_to_global_resource (res);
316+
317+ memory_stats_resources stat_res (res);
318+
319+ constexpr std::size_t kWsSize = 1024 ;
320+ constexpr std::size_t kGlobalSize = 2048 ;
321+
322+ auto * ws_mr = resource::get_workspace_resource (stat_res);
323+ void * ws_ptr = ws_mr->allocate (rmm::cuda_stream_view{}, kWsSize );
324+
325+ auto * dev_mr = rmm::mr::get_current_device_resource ();
326+ void * dev_ptr = dev_mr->allocate (rmm::cuda_stream_view{}, kGlobalSize );
327+
328+ auto peak = stat_res.get_bytes_peak ();
329+ EXPECT_EQ (peak.device_workspace , kWsSize );
330+ EXPECT_EQ (peak.device_global , kGlobalSize );
331+ EXPECT_EQ (peak.total (), kWsSize + kGlobalSize );
332+
333+ ws_mr->deallocate (rmm::cuda_stream_view{}, ws_ptr, kWsSize );
334+ dev_mr->deallocate (rmm::cuda_stream_view{}, dev_ptr, kGlobalSize );
335+ }
336+
337+ TEST (MemoryStatsResources, IndependentCounting_PoolWorkspace)
338+ {
339+ raft::resources res;
340+ constexpr std::size_t kPoolLimit = 64UL * 1024UL * 1024UL ;
341+ resource::set_workspace_to_pool_resource (res, kPoolLimit );
342+
343+ memory_stats_resources stat_res (res);
344+
345+ constexpr std::size_t kWsSize = 1024 ;
346+ constexpr std::size_t kGlobalSize = 2048 ;
347+
348+ auto * ws_mr = resource::get_workspace_resource (stat_res);
349+ void * ws_ptr = ws_mr->allocate (rmm::cuda_stream_view{}, kWsSize );
350+
351+ auto * dev_mr = rmm::mr::get_current_device_resource ();
352+ void * dev_ptr = dev_mr->allocate (rmm::cuda_stream_view{}, kGlobalSize );
353+
354+ auto peak = stat_res.get_bytes_peak ();
355+ EXPECT_EQ (peak.device_workspace , kWsSize );
356+ EXPECT_EQ (peak.device_global , kGlobalSize );
357+ EXPECT_EQ (peak.total (), kWsSize + kGlobalSize );
358+
359+ ws_mr->deallocate (rmm::cuda_stream_view{}, ws_ptr, kWsSize );
360+ dev_mr->deallocate (rmm::cuda_stream_view{}, dev_ptr, kGlobalSize );
361+ }
362+
363+ // ===== Nested wrappers test =====
364+
365+ TEST (IndependentCounting, NestedDryRunInStats)
366+ {
367+ raft::resources res;
368+
369+ memory_stats_resources stat_res (res);
370+ dry_run_resources dry_res (stat_res);
371+
372+ constexpr std::size_t kWsSize = 1024 ;
373+ constexpr std::size_t kGlobalSize = 2048 ;
374+
375+ auto * ws_mr = resource::get_workspace_resource (dry_res);
376+ void * ws_ptr = ws_mr->allocate (rmm::cuda_stream_view{}, kWsSize );
377+
378+ auto * dev_mr = rmm::mr::get_current_device_resource ();
379+ void * dev_ptr = dev_mr->allocate (rmm::cuda_stream_view{}, kGlobalSize );
380+
381+ auto peak = dry_res.get_bytes_peak ();
382+ EXPECT_EQ (peak.device_workspace , kWsSize );
383+ EXPECT_EQ (peak.device_global , kGlobalSize );
384+ EXPECT_EQ (peak.total (), kWsSize + kGlobalSize );
385+
386+ ws_mr->deallocate (rmm::cuda_stream_view{}, ws_ptr, kWsSize );
387+ dev_mr->deallocate (rmm::cuda_stream_view{}, dev_ptr, kGlobalSize );
388+ }
389+
235390} // namespace raft::util
0 commit comments