From b24e2414b821dfa31593ad712b46d641c36d37c0 Mon Sep 17 00:00:00 2001 From: "Andrew M. Bradley" Date: Tue, 14 Jun 2022 18:05:27 -0400 Subject: [PATCH 1/5] f90->F90: By convention, F implies that the C preprocessor is run. --- tests/algorithm/CMakeLists.txt | 2 +- tests/algorithm/{lin_interp_ref.f90 => lin_interp_ref.F90} | 0 tests/io/CMakeLists.txt | 2 +- tests/io/{array_io_tests.f90 => array_io_tests.F90} | 0 4 files changed, 2 insertions(+), 2 deletions(-) rename tests/algorithm/{lin_interp_ref.f90 => lin_interp_ref.F90} (100%) rename tests/io/{array_io_tests.f90 => array_io_tests.F90} (100%) diff --git a/tests/algorithm/CMakeLists.txt b/tests/algorithm/CMakeLists.txt index 75f891f0..71687d46 100644 --- a/tests/algorithm/CMakeLists.txt +++ b/tests/algorithm/CMakeLists.txt @@ -6,7 +6,7 @@ configure_file (${CMAKE_CURRENT_SOURCE_DIR}/ekat_config.f.in # Test lin interp processes set (LIN_INTERP_SRCS lin_interp_test.cpp - lin_interp_ref.f90 + lin_interp_ref.F90 ) if (EKAT_TEST_DOUBLE_PRECISION) diff --git a/tests/algorithm/lin_interp_ref.f90 b/tests/algorithm/lin_interp_ref.F90 similarity index 100% rename from tests/algorithm/lin_interp_ref.f90 rename to tests/algorithm/lin_interp_ref.F90 diff --git a/tests/io/CMakeLists.txt b/tests/io/CMakeLists.txt index a4a0a57a..b81eecc3 100644 --- a/tests/io/CMakeLists.txt +++ b/tests/io/CMakeLists.txt @@ -3,7 +3,7 @@ include(EkatCreateUnitTest) # Test utilities (f90) set (ARRAY_IO_SRCS array_io_tests.cpp - array_io_tests.f90 + array_io_tests.F90 ) if (EKAT_TEST_DOUBLE_PRECISION) EkatCreateUnitTest(array_io${DP_POSTFIX} "${ARRAY_IO_SRCS}" diff --git a/tests/io/array_io_tests.f90 b/tests/io/array_io_tests.F90 similarity index 100% rename from tests/io/array_io_tests.f90 rename to tests/io/array_io_tests.F90 From 6b3aefe58102e7b3164f5a0ffc0a71f96119c373 Mon Sep 17 00:00:00 2001 From: "Andrew M. Bradley" Date: Tue, 14 Jun 2022 19:08:58 -0400 Subject: [PATCH 2/5] Generalize EKAT to handle HIP as well as Cuda. First draft: * Make ekat.hpp the entry point for Kokkos_Core and EkatGpuSpace. * Use EKAT_ENABLE_GPU in several spots instead of KOKKOS_ENABLE_CUDA. * Use EkatGpuSpace in several spots instead of Kokkos::Cuda. * Create some Kokkos::Experimental::HIP specializations. --- CMakeLists.txt | 10 +++++++- src/ekat/ekat.hpp | 12 +++++++++ src/ekat/ekat_config.h.in | 3 +++ src/ekat/ekat_pack.hpp | 2 +- src/ekat/ekat_pack_kokkos.hpp | 4 +-- src/ekat/ekat_scalar_traits.hpp | 3 +-- src/ekat/ekat_session.cpp | 13 +++++++--- src/ekat/kokkos/ekat_kokkos_utils.hpp | 30 +++++++++++++--------- src/ekat/util/ekat_arch.hpp | 6 ++--- src/ekat/util/ekat_math_utils.hpp | 6 ++--- src/ekat/util/ekat_test_utils.cpp | 5 ++-- src/ekat/util/ekat_tridiag.hpp | 37 +++++++++++++++++++-------- src/ekat/util/ekat_upper_bound.hpp | 6 ++--- tests/kokkos/workspace_tests.cpp | 10 ++++---- tests/pack/pack_kokkos_tests.cpp | 8 +++--- 15 files changed, 100 insertions(+), 55 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 03ff8c0c..296888f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,6 +98,14 @@ set (EKAT_TPL_LIBRARIES_INTERNAL) BuildKokkos() list (APPEND EKAT_TPL_LIBRARIES_INTERNAL ${Kokkos_LIBRARIES}) +set (EKAT_ENABLE_GPU False) +if (Kokkos_ENABLE_CUDA) + set (EKAT_ENABLE_GPU True) +endif () +if (Kokkos_ENABLE_HIP) + set (EKAT_ENABLE_GPU True) +endif () + # EKAT also has some yaml parsing utility BuildYamlcpp() list (APPEND EKAT_TPL_LIBRARIES_INTERNAL ${YAMLCPP_LIBRARIES}) @@ -128,7 +136,7 @@ add_subdirectory(src/ekat) # Set some vars needed to configure test-launcher SetMpiRuntimeEnvVars() -if (Kokkos_ENABLE_CUDA) +if (EKAT_ENABLE_GPU) set (TEST_LAUNCHER_ON_GPU True) else() set (TEST_LAUNCHER_ON_GPU False) diff --git a/src/ekat/ekat.hpp b/src/ekat/ekat.hpp index a81352bc..6a0c2e2c 100644 --- a/src/ekat/ekat.hpp +++ b/src/ekat/ekat.hpp @@ -3,6 +3,8 @@ #include "ekat/ekat_config.h" +#include + /* * This header doesn't do much as of now. It includes ekat_config.h, * and declares an alias for int. @@ -18,6 +20,16 @@ static constexpr bool ekatBFB = true; static constexpr bool ekatBFB = false; #endif +#ifdef EKAT_ENABLE_GPU +# if defined KOKKOS_ENABLE_CUDA +typedef Kokkos::Cuda EkatGpuSpace; +# elif defined KOKKOS_ENABLE_HIP +typedef Kokkos::Experimental::HIP EkatGpuSpace; +# else +error "EKAT does not recognize a GPU space other than Cuda and HIP." +# endif +#endif + } // namespace ekat #endif // EKAT_HPP diff --git a/src/ekat/ekat_config.h.in b/src/ekat/ekat_config.h.in index da36c463..83b13360 100644 --- a/src/ekat/ekat_config.h.in +++ b/src/ekat/ekat_config.h.in @@ -22,4 +22,7 @@ // Decide whether ekat defaults to BFB behavior when possible/appropriate #cmakedefine EKAT_FPE +// A GPU space has been enabled in Kokkos, e.g., CUDA or HIP. +#cmakedefine EKAT_ENABLE_GPU + #endif // EKAT_CONFIG_H diff --git a/src/ekat/ekat_pack.hpp b/src/ekat/ekat_pack.hpp index dc4094c0..c55647df 100644 --- a/src/ekat/ekat_pack.hpp +++ b/src/ekat/ekat_pack.hpp @@ -4,12 +4,12 @@ //TODO // - bounds checking define +#include "ekat/ekat.hpp" #include "ekat/util/ekat_math_utils.hpp" #include "ekat/ekat_macros.hpp" #include "ekat/ekat_scalar_traits.hpp" #include "ekat/ekat_type_traits.hpp" -#include #include "spdlog/fmt/ostr.h" #include #include diff --git a/src/ekat/ekat_pack_kokkos.hpp b/src/ekat/ekat_pack_kokkos.hpp index be654de9..69abe61a 100644 --- a/src/ekat/ekat_pack_kokkos.hpp +++ b/src/ekat/ekat_pack_kokkos.hpp @@ -1,11 +1,9 @@ #ifndef EKAT_PACK_KOKKOS_HPP #define EKAT_PACK_KOKKOS_HPP +#include "ekat/ekat.hpp" #include "ekat/ekat_pack.hpp" #include "ekat/kokkos/ekat_kokkos_utils.hpp" -#include "ekat/ekat.hpp" - -#include #include #include diff --git a/src/ekat/ekat_scalar_traits.hpp b/src/ekat/ekat_scalar_traits.hpp index 54cb3f2d..57ba5562 100644 --- a/src/ekat/ekat_scalar_traits.hpp +++ b/src/ekat/ekat_scalar_traits.hpp @@ -1,10 +1,9 @@ #ifndef EKAT_SCALAR_TRAITS_HPP #define EKAT_SCALAR_TRAITS_HPP +#include "ekat/ekat.hpp" #include "ekat/ekat_assert.hpp" -#include - #include #include #include diff --git a/src/ekat/ekat_session.cpp b/src/ekat/ekat_session.cpp index e1573d19..e7f03430 100644 --- a/src/ekat/ekat_session.cpp +++ b/src/ekat/ekat_session.cpp @@ -1,9 +1,8 @@ +#include "ekat/ekat.hpp" #include "ekat/ekat_session.hpp" #include "ekat/ekat_assert.hpp" #include "ekat/util/ekat_arch.hpp" -#include - #include namespace ekat_impl { @@ -25,10 +24,16 @@ void initialize_kokkos () { // If for some reason we're running on a GPU platform, have Cuda enabled, // but are using a different execution space, this initialization is still // OK. The rank gets a GPU assigned and simply will ignore it. -#ifdef KOKKOS_ENABLE_CUDA +#ifdef EKAT_ENABLE_GPU int nd; +# if KOKKOS_ENABLE_CUDA const auto ret = cudaGetDeviceCount(&nd); - if (ret != cudaSuccess) { + const bool ok = ret == cudaSuccess; +# else + const auto ret = hipGetDeviceCount(&nd); + const bool ok = ret == hipSuccess; +# endif + if (not ok) { // It isn't a big deal if we can't get the device count. nd = 1; } diff --git a/src/ekat/kokkos/ekat_kokkos_utils.hpp b/src/ekat/kokkos/ekat_kokkos_utils.hpp index 1180744e..6f515dd7 100644 --- a/src/ekat/kokkos/ekat_kokkos_utils.hpp +++ b/src/ekat/kokkos/ekat_kokkos_utils.hpp @@ -85,10 +85,10 @@ void parallel_reduce (const TeamMember& team, } }); -#ifdef KOKKOS_ENABLE_CUDA +#ifdef EKAT_ENABLE_GPU // Broadcast result to all threads by doing sum of one thread's // non-0 value and the rest of the 0s. - Kokkos::Impl::CudaTeamMember::vector_reduce(Kokkos::Sum(local_tmp)); + TeamMember::vector_reduce(Kokkos::Sum(local_tmp)); #endif result = local_tmp; @@ -292,16 +292,22 @@ struct ExeSpaceUtils { * parallelism by having many threads per team. This is due to having more * threads than the main kernel loop has indices. */ -#ifdef KOKKOS_ENABLE_CUDA +#ifdef EKAT_ENABLE_GPU template <> -struct ExeSpaceUtils { - using TeamPolicy = Kokkos::TeamPolicy; +struct ExeSpaceUtils { + using TeamPolicy = Kokkos::TeamPolicy; using HostTeamPolicy = Kokkos::TeamPolicy; // Enable policy on Host only if UVM is enabled. template struct PolicyOnHostHelper { - static constexpr bool UseUVM = std::is_same::value; + static constexpr bool UseUVM = +#ifdef KOKKOS_ENABLE_CUDA + std::is_same::value +#else + false +#endif + ; static_assert (HD==Device || UseUVM, "Error! Cannot get a policy on Host unless Cuda UVM is enabled in Kokkos."); using type = typename std::conditional::type; }; @@ -521,14 +527,14 @@ class TeamUtils : public TeamUtilsCommonBase -class TeamUtils : public TeamUtilsCommonBase +class TeamUtils : public TeamUtilsCommonBase { - using Device = Kokkos::Device; + using Device = Kokkos::Device; using flag_type = int; // this appears to be the smallest type that correctly handles atomic operations using view_1d = typename KokkosTypes::view_1d; - using RandomGenerator = Kokkos::Random_XorShift64_Pool; + using RandomGenerator = Kokkos::Random_XorShift64_Pool; using rnd_type = typename RandomGenerator::generator_type; int _num_ws_slots; // how many workspace slots (potentially more than the num of concurrent teams due to overprovision factor) @@ -541,7 +547,7 @@ class TeamUtils : public TeamUtilsCommonBase TeamUtils(const TeamPolicy& policy, const double& overprov_factor = 1.0) : - TeamUtilsCommonBase(policy), + TeamUtilsCommonBase(policy), _num_ws_slots(this->_league_size > this->_num_teams ? (overprov_factor * this->_num_teams > this->_league_size ? this->_league_size : overprov_factor * this->_num_teams) : this->_num_teams), @@ -643,7 +649,7 @@ int strcmp(const char* first, const char* second) using std::strlen; using std::strcpy; using std::strcmp; -#endif // KOKKOS_ENABLE_CUDA +#endif // EKAT_ENABLE_GPU } // namespace impl diff --git a/src/ekat/util/ekat_arch.hpp b/src/ekat/util/ekat_arch.hpp index da2622ee..3f9082bf 100644 --- a/src/ekat/util/ekat_arch.hpp +++ b/src/ekat/util/ekat_arch.hpp @@ -1,8 +1,8 @@ #ifndef EKAT_ARCH_HPP #define EKAT_ARCH_HPP +#include "ekat/ekat.hpp" #include -#include /* * Architecture-related calls @@ -16,8 +16,8 @@ std::string ekat_config_string(); template struct OnGpu { enum : bool { value = false }; }; -#ifdef KOKKOS_ENABLE_CUDA -template <> struct OnGpu { enum : bool { value = true }; }; +#ifdef EKAT_ENABLE_GPU +template <> struct OnGpu { enum : bool { value = true }; }; #endif } // namespace ekat diff --git a/src/ekat/util/ekat_math_utils.hpp b/src/ekat/util/ekat_math_utils.hpp index 916c3909..8c6e88d3 100644 --- a/src/ekat/util/ekat_math_utils.hpp +++ b/src/ekat/util/ekat_math_utils.hpp @@ -4,9 +4,7 @@ #include "ekat/ekat_scalar_traits.hpp" #include "ekat/ekat.hpp" -#include - -#ifndef KOKKOS_ENABLE_CUDA +#ifndef EKAT_ENABLE_GPU # include # include #endif @@ -15,7 +13,7 @@ namespace ekat { namespace impl { -#ifdef KOKKOS_ENABLE_CUDA +#ifdef EKAT_ENABLE_GPU // Replacements for namespace std functions that don't run on the GPU. template KOKKOS_FORCEINLINE_FUNCTION diff --git a/src/ekat/util/ekat_test_utils.cpp b/src/ekat/util/ekat_test_utils.cpp index 86069b8e..e0a82df7 100644 --- a/src/ekat/util/ekat_test_utils.cpp +++ b/src/ekat/util/ekat_test_utils.cpp @@ -1,9 +1,8 @@ +#include "ekat/ekat.hpp" #include "ekat/util/ekat_test_utils.hpp" #include "ekat/ekat_assert.hpp" #include "ekat/util/ekat_string_utils.hpp" -#include - #include namespace ekat { @@ -17,7 +16,7 @@ int get_test_device (const int mpi_rank) // Set to -1 by default, which leaves kokkos in full control int dev_id = -1; -#ifdef KOKKOS_ENABLE_CUDA +#ifdef EKAT_ENABLE_GPU auto count_str = getenv("CTEST_RESOURCE_GROUP_COUNT"); if (count_str!=nullptr) { // If CTest is setting the CTEST_RESOURCE_GROUP_COUNT variable, diff --git a/src/ekat/util/ekat_tridiag.hpp b/src/ekat/util/ekat_tridiag.hpp index 88d6f845..9febbf5e 100644 --- a/src/ekat/util/ekat_tridiag.hpp +++ b/src/ekat/util/ekat_tridiag.hpp @@ -1,11 +1,10 @@ #ifndef EKAT_TRIDIAG_HPP #define EKAT_TRIDIAG_HPP +#include "ekat/ekat.hpp" #include "ekat/util/ekat_math_utils.hpp" #include "ekat/kokkos/ekat_kokkos_types.hpp" -#include - #include namespace ekat { @@ -116,10 +115,11 @@ int get_team_nthr (const TeamMember& team) { return team.team_size(); } -#ifdef KOKKOS_ENABLE_CUDA -KOKKOS_INLINE_FUNCTION -int get_thread_id_within_team (const Kokkos::Impl::CudaTeamMember& team) { -#ifdef __CUDA_ARCH__ +// Impl details for Nvidia and AMD GPUs. + +template KOKKOS_FORCEINLINE_FUNCTION +int get_thread_id_within_team_gpu (const TeamMember& team) { +#if defined __CUDA_ARCH__ || defined __HIP_DEVICE_COMPILE__ // Can't use team.team_rank() here because vector direction also uses physical // threads but TeamMember types don't expose that information. return blockDim.x * threadIdx.y + threadIdx.x; @@ -129,16 +129,33 @@ int get_thread_id_within_team (const Kokkos::Impl::CudaTeamMember& team) { #endif } -KOKKOS_INLINE_FUNCTION -int get_team_nthr (const Kokkos::Impl::CudaTeamMember& team) { -#ifdef __CUDA_ARCH__ +template KOKKOS_FORCEINLINE_FUNCTION +int get_team_nthr_gpu (const TeamMember& team) { +#if defined __CUDA_ARCH__ || defined __HIP_DEVICE_COMPILE__ return blockDim.x * blockDim.y; #else assert(0); return -1; #endif } -#endif + +#ifdef KOKKOS_ENABLE_CUDA +KOKKOS_FORCEINLINE_FUNCTION +int get_thread_id_within_team (const Kokkos::Impl::CudaTeamMember& team) +{ return get_thread_id_within_team_gpu(team); } +KOKKOS_FORCEINLINE_FUNCTION +int get_team_nthr (const Kokkos::Impl::CudaTeamMember& team) +{ return get_team_nthr_gpu(team); } +#endif // KOKKOS_ENABLE_CUDA + +#ifdef KOKKOS_ENABLE_HIP +KOKKOS_FORCEINLINE_FUNCTION +int get_thread_id_within_team (const Kokkos::Impl::HIPTeamMember& team) +{ return get_thread_id_within_team_gpu(team); } +KOKKOS_FORCEINLINE_FUNCTION +int get_team_nthr (const Kokkos::Impl::HIPTeamMember& team) +{ return get_team_nthr_gpu(team); } +#endif // KOKKOS_ENABLE_HIP // The caller must provide the team_barrier after this function returns before A // is accessed. diff --git a/src/ekat/util/ekat_upper_bound.hpp b/src/ekat/util/ekat_upper_bound.hpp index 28d189e0..cab550ef 100644 --- a/src/ekat/util/ekat_upper_bound.hpp +++ b/src/ekat/util/ekat_upper_bound.hpp @@ -1,9 +1,9 @@ #ifndef EKAT_UPPER_BOUND_HPP #define EKAT_UPPER_BOUND_HPP -#include +#include "ekat/ekat.hpp" -#ifndef KOKKOS_ENABLE_CUDA +#ifndef EKAT_ENABLE_GPU # include #endif @@ -47,7 +47,7 @@ const T* upper_bound_impl(const T* first, const T* last, const T& value) return first; } -#ifdef KOKKOS_ENABLE_CUDA +#ifdef EKAT_ENABLE_GPU template KOKKOS_FORCEINLINE_FUNCTION const T* upper_bound(const T* first, const T* last, const T& value) diff --git a/tests/kokkos/workspace_tests.cpp b/tests/kokkos/workspace_tests.cpp index ca1b9133..32e721a9 100644 --- a/tests/kokkos/workspace_tests.cpp +++ b/tests/kokkos/workspace_tests.cpp @@ -122,8 +122,8 @@ static void unittest_workspace() {&ws1, &ws2, &ws3, &ws4}); // Assert the memory access has not exceeded the allocation - const int dist = data_end - (ws4.data()+ws4.size()); - EKAT_KERNEL_ASSERT_MSG(dist >= 0, "Error! Local view extended past allocation"); + EKAT_KERNEL_ASSERT_MSG((data_end - (ws4.data()+ws4.size())) >= 0, + "Error! Local view extended past allocation"); ws.template release_many_contiguous<4>( {&ws1, &ws2, &ws3, &ws4}); @@ -176,8 +176,8 @@ static void unittest_workspace() {&v21, &v22, &v23, &v24}); // Assert the memory access has not exceeded the allocation - const int dist = data_end - (v14.data()+v14.size()); - EKAT_KERNEL_ASSERT_MSG(dist >= 0, "Error! Local view extended past allocation"); + EKAT_KERNEL_ASSERT_MSG((data_end - (v14.data()+v14.size())) >= 0, + "Error! Local view extended past allocation"); ws1.template release_many_contiguous<4>( {&v11, &v12, &v13, &v14}); @@ -343,7 +343,7 @@ static void unittest_workspace() team.team_barrier(); } - #ifndef KOKKOS_ENABLE_CUDA + #ifndef EKAT_ENABLE_GPU #ifdef WS_EXPENSIVE_TEST if (true) #else diff --git a/tests/pack/pack_kokkos_tests.cpp b/tests/pack/pack_kokkos_tests.cpp index 106d3065..50964e91 100644 --- a/tests/pack/pack_kokkos_tests.cpp +++ b/tests/pack/pack_kokkos_tests.cpp @@ -285,9 +285,9 @@ struct VectorT { using type = T; - static T get_value(int arg) { return static_cast(arg); } + KOKKOS_INLINE_FUNCTION static T get_value(int arg) { return static_cast(arg); } - static void modify_value(T& value, int arg) { value += static_cast(arg); } + KOKKOS_INLINE_FUNCTION static void modify_value(T& value, int arg) { value += static_cast(arg); } }; template<> @@ -295,9 +295,9 @@ struct VectorT { using type = char; - static bool get_value(int arg) { return arg%2 == 0; } + KOKKOS_INLINE_FUNCTION static bool get_value(int arg) { return arg%2 == 0; } - static void modify_value(bool& value, int arg) { + KOKKOS_INLINE_FUNCTION static void modify_value(bool& value, int arg) { bool arg_value = get_value(arg); value = (value == arg_value); } From 5fde4936cdd326fd4edb57bd0d90f8fa4a187c01 Mon Sep 17 00:00:00 2001 From: "Andrew M. Bradley" Date: Tue, 14 Jun 2022 19:29:38 -0400 Subject: [PATCH 3/5] More uses of EKAT_ENABLE_GPU. --- cmake/pkg_build/EkatBuildEkat.cmake | 3 +-- tests/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cmake/pkg_build/EkatBuildEkat.cmake b/cmake/pkg_build/EkatBuildEkat.cmake index 8af1e06a..40000ab7 100644 --- a/cmake/pkg_build/EkatBuildEkat.cmake +++ b/cmake/pkg_build/EkatBuildEkat.cmake @@ -164,7 +164,7 @@ macro (setVars PREFIX SET_DEFAULTS) if (DEFINED ${PREFIX}_TEST_PACK_SIZE) set (EKAT_TEST_PACK_SIZE ${${PREFIX}_TEST_PACK_SIZE} CACHE STRING "") elseif (SET_DEFAULTS) - if (Kokkos_ENABLE_CUDA) + if (EKAT_ENABLE_GPU) set (EKAT_TEST_PACK_SIZE 1 CACHE STRING "") else () set (EKAT_TEST_PACK_SIZE 16 CACHE STRING "") @@ -191,5 +191,4 @@ macro (setVars PREFIX SET_DEFAULTS) unset (setVars_CMAKE_BUILD_TYPE_ci) unset (setVars_DEBUG_BUILD) - unset (setVars_CUDA_POS) endmacro (setVars) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7b67b331..611f0e6a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -35,7 +35,7 @@ option (EKAT_TEST_DOUBLE_PRECISION "Whether tests should be performed in double ### PACKS ### ####################### -if (Kokkos_ENABLE_CUDA) +if (EKAT_ENABLE_GPU) set (DEFAULT_PACK_SIZE 1) set (DEFAULT_SMALL_PACK_SIZE 1) else () From ee0d01e7dc16d29527079bf0cb8392906badd92c Mon Sep 17 00:00:00 2001 From: "Andrew M. Bradley" Date: Tue, 14 Jun 2022 20:03:58 -0400 Subject: [PATCH 4/5] More uses of EKAT_ENABLE_GPU and a BFB flag guard. --- tests/CMakeLists.txt | 2 +- tests/algorithm/tridiag_tests_correctness.cpp | 2 ++ tests/kokkos/kokkos_utils_tests.cpp | 2 +- tests/kokkos/workspace_tests.cpp | 4 ++-- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 611f0e6a..6cb41a17 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,7 +7,7 @@ ### THREADING ### ####################### -if (Kokkos_ENABLE_CUDA) +if (EKAT_ENABLE_GPU) set (DEFAULT_MAX_THREADS 1) else () set (DEFAULT_MAX_THREADS 16) diff --git a/tests/algorithm/tridiag_tests_correctness.cpp b/tests/algorithm/tridiag_tests_correctness.cpp index 44a65cc5..eee64874 100644 --- a/tests/algorithm/tridiag_tests_correctness.cpp +++ b/tests/algorithm/tridiag_tests_correctness.cpp @@ -570,7 +570,9 @@ TEST_CASE("property", "tridiag") { } TEST_CASE("bfb", "tridiag") { +#ifdef EKAT_DEFAULT_BFB ekat::test::correct::run_bfb_test<1,1>(); if (EKAT_TEST_PACK_SIZE > 1) ekat::test::correct::run_bfb_test(); +#endif } diff --git a/tests/kokkos/kokkos_utils_tests.cpp b/tests/kokkos/kokkos_utils_tests.cpp index ebf9b6ed..228229a1 100644 --- a/tests/kokkos/kokkos_utils_tests.cpp +++ b/tests/kokkos/kokkos_utils_tests.cpp @@ -290,7 +290,7 @@ void test_view_reduction(const Scalar a=Scalar(0.0), const int begin=0, const in const auto results_h = Kokkos::create_mirror_view(results); int team_size = ExeSpace::concurrency(); -#ifdef KOKKOS_ENABLE_CUDA +#ifdef EKAT_ENABLE_GPU ExeSpace temp_space; auto num_sm = temp_space.impl_internal_space_instance()->m_multiProcCount; team_size /= (ekat::is_single_precision::value ? num_sm*64 : num_sm*32); diff --git a/tests/kokkos/workspace_tests.cpp b/tests/kokkos/workspace_tests.cpp index 32e721a9..cd5b6db6 100644 --- a/tests/kokkos/workspace_tests.cpp +++ b/tests/kokkos/workspace_tests.cpp @@ -492,8 +492,8 @@ TEST_CASE("workspace_manager", "[utils]") { unit_test::UnitWrap::UnitTest::unittest_workspace(); } -#ifdef KOKKOS_ENABLE_CUDA -// Force host testing on CUDA +#ifdef EKAT_ENABLE_GPU +// Force host testing when the exe space is a GPU space. TEST_CASE("workspace_manager_host", "[utils]") { unit_test::UnitWrap::UnitTest::unittest_workspace(); } From ecfbe71f12882ef111fdb04878ad6598982b314d Mon Sep 17 00:00:00 2001 From: "Andrew M. Bradley" Date: Tue, 14 Jun 2022 21:17:39 -0400 Subject: [PATCH 5/5] Fixes for Cuda. --- src/ekat/ekat_session.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ekat/ekat_session.cpp b/src/ekat/ekat_session.cpp index e7f03430..5d31d6c0 100644 --- a/src/ekat/ekat_session.cpp +++ b/src/ekat/ekat_session.cpp @@ -26,12 +26,14 @@ void initialize_kokkos () { // OK. The rank gets a GPU assigned and simply will ignore it. #ifdef EKAT_ENABLE_GPU int nd; -# if KOKKOS_ENABLE_CUDA +# if defined KOKKOS_ENABLE_CUDA const auto ret = cudaGetDeviceCount(&nd); const bool ok = ret == cudaSuccess; -# else +# elif defined KOKKOS_ENABLE_HIP const auto ret = hipGetDeviceCount(&nd); const bool ok = ret == hipSuccess; +# else + error "No valid GPU space, yet EKAT_ENABLE_GPU is defined." # endif if (not ok) { // It isn't a big deal if we can't get the device count.