Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ set (EKAT_TPL_LIBRARIES_INTERNAL)
BuildKokkos()
list (APPEND EKAT_TPL_LIBRARIES_INTERNAL ${Kokkos_LIBRARIES})

set (EKAT_ENABLE_GPU False)
if (Kokkos_ENABLE_CUDA)
set (EKAT_ENABLE_GPU True)
endif ()
if (Kokkos_ENABLE_HIP)
set (EKAT_ENABLE_GPU True)
endif ()

# EKAT also has some yaml parsing utility
BuildYamlcpp()
list (APPEND EKAT_TPL_LIBRARIES_INTERNAL ${YAMLCPP_LIBRARIES})
Expand Down Expand Up @@ -128,7 +136,7 @@ add_subdirectory(src/ekat)

# Set some vars needed to configure test-launcher
SetMpiRuntimeEnvVars()
if (Kokkos_ENABLE_CUDA)
if (EKAT_ENABLE_GPU)
set (TEST_LAUNCHER_ON_GPU True)
else()
set (TEST_LAUNCHER_ON_GPU False)
Expand Down
3 changes: 1 addition & 2 deletions cmake/pkg_build/EkatBuildEkat.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ macro (setVars PREFIX SET_DEFAULTS)
if (DEFINED ${PREFIX}_TEST_PACK_SIZE)
set (EKAT_TEST_PACK_SIZE ${${PREFIX}_TEST_PACK_SIZE} CACHE STRING "")
elseif (SET_DEFAULTS)
if (Kokkos_ENABLE_CUDA)
if (EKAT_ENABLE_GPU)
set (EKAT_TEST_PACK_SIZE 1 CACHE STRING "")
else ()
set (EKAT_TEST_PACK_SIZE 16 CACHE STRING "")
Expand All @@ -191,5 +191,4 @@ macro (setVars PREFIX SET_DEFAULTS)

unset (setVars_CMAKE_BUILD_TYPE_ci)
unset (setVars_DEBUG_BUILD)
unset (setVars_CUDA_POS)
endmacro (setVars)
12 changes: 12 additions & 0 deletions src/ekat/ekat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

#include "ekat/ekat_config.h"

#include <Kokkos_Core.hpp>

/*
* This header doesn't do much as of now. It includes ekat_config.h,
* and declares an alias for int.
Expand All @@ -18,6 +20,16 @@ static constexpr bool ekatBFB = true;
static constexpr bool ekatBFB = false;
#endif

#ifdef EKAT_ENABLE_GPU
# if defined KOKKOS_ENABLE_CUDA
typedef Kokkos::Cuda EkatGpuSpace;
# elif defined KOKKOS_ENABLE_HIP
typedef Kokkos::Experimental::HIP EkatGpuSpace;
# else
error "EKAT does not recognize a GPU space other than Cuda and HIP."
# endif
#endif

} // namespace ekat

#endif // EKAT_HPP
3 changes: 3 additions & 0 deletions src/ekat/ekat_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,7 @@
// Decide whether ekat defaults to BFB behavior when possible/appropriate
#cmakedefine EKAT_FPE

// A GPU space has been enabled in Kokkos, e.g., CUDA or HIP.
#cmakedefine EKAT_ENABLE_GPU

#endif // EKAT_CONFIG_H
2 changes: 1 addition & 1 deletion src/ekat/ekat_pack.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
//TODO
// - bounds checking define

#include "ekat/ekat.hpp"
#include "ekat/util/ekat_math_utils.hpp"
#include "ekat/ekat_macros.hpp"
#include "ekat/ekat_scalar_traits.hpp"
#include "ekat/ekat_type_traits.hpp"

#include <Kokkos_Core.hpp>
#include "spdlog/fmt/ostr.h"
#include <iostream>
#include <type_traits>
Expand Down
4 changes: 1 addition & 3 deletions src/ekat/ekat_pack_kokkos.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
#ifndef EKAT_PACK_KOKKOS_HPP
#define EKAT_PACK_KOKKOS_HPP

#include "ekat/ekat.hpp"
#include "ekat/ekat_pack.hpp"
#include "ekat/kokkos/ekat_kokkos_utils.hpp"
#include "ekat/ekat.hpp"

#include <Kokkos_Core.hpp>

#include <vector>
#include <type_traits>
Expand Down
3 changes: 1 addition & 2 deletions src/ekat/ekat_scalar_traits.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
#ifndef EKAT_SCALAR_TRAITS_HPP
#define EKAT_SCALAR_TRAITS_HPP

#include "ekat/ekat.hpp"
#include "ekat/ekat_assert.hpp"

#include <Kokkos_Core.hpp>

#include <limits>
#include <climits>
#include <typeinfo>
Expand Down
15 changes: 11 additions & 4 deletions src/ekat/ekat_session.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#include "ekat/ekat.hpp"
#include "ekat/ekat_session.hpp"
#include "ekat/ekat_assert.hpp"
#include "ekat/util/ekat_arch.hpp"

#include <Kokkos_Core.hpp>

#include <vector>

namespace ekat_impl {
Expand All @@ -25,10 +24,18 @@ void initialize_kokkos () {
// If for some reason we're running on a GPU platform, have Cuda enabled,
// but are using a different execution space, this initialization is still
// OK. The rank gets a GPU assigned and simply will ignore it.
#ifdef KOKKOS_ENABLE_CUDA
#ifdef EKAT_ENABLE_GPU
int nd;
# if defined KOKKOS_ENABLE_CUDA
const auto ret = cudaGetDeviceCount(&nd);
if (ret != cudaSuccess) {
const bool ok = ret == cudaSuccess;
# elif defined KOKKOS_ENABLE_HIP
const auto ret = hipGetDeviceCount(&nd);
const bool ok = ret == hipSuccess;
# else
error "No valid GPU space, yet EKAT_ENABLE_GPU is defined."
# endif
if (not ok) {
// It isn't a big deal if we can't get the device count.
nd = 1;
}
Expand Down
30 changes: 18 additions & 12 deletions src/ekat/kokkos/ekat_kokkos_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ void parallel_reduce (const TeamMember& team,
}
});

#ifdef KOKKOS_ENABLE_CUDA
#ifdef EKAT_ENABLE_GPU
// Broadcast result to all threads by doing sum of one thread's
// non-0 value and the rest of the 0s.
Kokkos::Impl::CudaTeamMember::vector_reduce(Kokkos::Sum<ValueType>(local_tmp));
TeamMember::vector_reduce(Kokkos::Sum<ValueType>(local_tmp));
#endif

result = local_tmp;
Expand Down Expand Up @@ -292,16 +292,22 @@ struct ExeSpaceUtils {
* parallelism by having many threads per team. This is due to having more
* threads than the main kernel loop has indices.
*/
#ifdef KOKKOS_ENABLE_CUDA
#ifdef EKAT_ENABLE_GPU
template <>
struct ExeSpaceUtils<Kokkos::Cuda> {
using TeamPolicy = Kokkos::TeamPolicy<Kokkos::Cuda>;
struct ExeSpaceUtils<EkatGpuSpace> {
using TeamPolicy = Kokkos::TeamPolicy<EkatGpuSpace>;
using HostTeamPolicy = Kokkos::TeamPolicy<Kokkos::Serial>;

// Enable policy on Host only if UVM is enabled.
template<HostOrDevice HD>
struct PolicyOnHostHelper {
static constexpr bool UseUVM = std::is_same<Kokkos::Cuda::memory_space,Kokkos::CudaUVMSpace>::value;
static constexpr bool UseUVM =
#ifdef KOKKOS_ENABLE_CUDA
std::is_same<Kokkos::Cuda::memory_space,Kokkos::CudaUVMSpace>::value
#else
false
#endif
;
static_assert (HD==Device || UseUVM, "Error! Cannot get a policy on Host unless Cuda UVM is enabled in Kokkos.");
using type = typename std::conditional<HD==Host,HostTeamPolicy,TeamPolicy>::type;
};
Expand Down Expand Up @@ -521,14 +527,14 @@ class TeamUtils<ValueType, Kokkos::OpenMP> : public TeamUtilsCommonBase<ValueTyp
/*
* Specialization for Cuda execution space.
*/
#ifdef KOKKOS_ENABLE_CUDA
#ifdef EKAT_ENABLE_GPU
template <typename ValueType>
class TeamUtils<ValueType,Kokkos::Cuda> : public TeamUtilsCommonBase<ValueType,Kokkos::Cuda>
class TeamUtils<ValueType,EkatGpuSpace> : public TeamUtilsCommonBase<ValueType,EkatGpuSpace>
{
using Device = Kokkos::Device<Kokkos::Cuda, typename Kokkos::Cuda::memory_space>;
using Device = Kokkos::Device<EkatGpuSpace, typename EkatGpuSpace::memory_space>;
using flag_type = int; // this appears to be the smallest type that correctly handles atomic operations
using view_1d = typename KokkosTypes<Device>::view_1d<flag_type>;
using RandomGenerator = Kokkos::Random_XorShift64_Pool<Kokkos::Cuda>;
using RandomGenerator = Kokkos::Random_XorShift64_Pool<EkatGpuSpace>;
using rnd_type = typename RandomGenerator::generator_type;

int _num_ws_slots; // how many workspace slots (potentially more than the num of concurrent teams due to overprovision factor)
Expand All @@ -541,7 +547,7 @@ class TeamUtils<ValueType,Kokkos::Cuda> : public TeamUtilsCommonBase<ValueType,K

template <typename TeamPolicy>
TeamUtils(const TeamPolicy& policy, const double& overprov_factor = 1.0) :
TeamUtilsCommonBase<ValueType,Kokkos::Cuda>(policy),
TeamUtilsCommonBase<ValueType,EkatGpuSpace>(policy),
_num_ws_slots(this->_league_size > this->_num_teams
? (overprov_factor * this->_num_teams > this->_league_size ? this->_league_size : overprov_factor * this->_num_teams)
: this->_num_teams),
Expand Down Expand Up @@ -643,7 +649,7 @@ int strcmp(const char* first, const char* second)
using std::strlen;
using std::strcpy;
using std::strcmp;
#endif // KOKKOS_ENABLE_CUDA
#endif // EKAT_ENABLE_GPU

} // namespace impl

Expand Down
6 changes: 3 additions & 3 deletions src/ekat/util/ekat_arch.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef EKAT_ARCH_HPP
#define EKAT_ARCH_HPP

#include "ekat/ekat.hpp"
#include <string>
#include <Kokkos_Core.hpp>

/*
* Architecture-related calls
Expand All @@ -16,8 +16,8 @@ std::string ekat_config_string();

template <typename ExeSpace>
struct OnGpu { enum : bool { value = false }; };
#ifdef KOKKOS_ENABLE_CUDA
template <> struct OnGpu<Kokkos::Cuda> { enum : bool { value = true }; };
#ifdef EKAT_ENABLE_GPU
template <> struct OnGpu<EkatGpuSpace> { enum : bool { value = true }; };
#endif

} // namespace ekat
Expand Down
6 changes: 2 additions & 4 deletions src/ekat/util/ekat_math_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
#include "ekat/ekat_scalar_traits.hpp"
#include "ekat/ekat.hpp"

#include <Kokkos_Core.hpp>

#ifndef KOKKOS_ENABLE_CUDA
#ifndef EKAT_ENABLE_GPU
# include <cmath>
# include <algorithm>
#endif
Expand All @@ -15,7 +13,7 @@ namespace ekat {

namespace impl {

#ifdef KOKKOS_ENABLE_CUDA
#ifdef EKAT_ENABLE_GPU
// Replacements for namespace std functions that don't run on the GPU.
template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
Expand Down
5 changes: 2 additions & 3 deletions src/ekat/util/ekat_test_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#include "ekat/ekat.hpp"
#include "ekat/util/ekat_test_utils.hpp"
#include "ekat/ekat_assert.hpp"
#include "ekat/util/ekat_string_utils.hpp"

#include <Kokkos_Core.hpp>

#include <cstdlib>

namespace ekat {
Expand All @@ -17,7 +16,7 @@ int get_test_device (const int mpi_rank)
// Set to -1 by default, which leaves kokkos in full control
int dev_id = -1;

#ifdef KOKKOS_ENABLE_CUDA
#ifdef EKAT_ENABLE_GPU
auto count_str = getenv("CTEST_RESOURCE_GROUP_COUNT");
if (count_str!=nullptr) {
// If CTest is setting the CTEST_RESOURCE_GROUP_COUNT variable,
Expand Down
37 changes: 27 additions & 10 deletions src/ekat/util/ekat_tridiag.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#ifndef EKAT_TRIDIAG_HPP
#define EKAT_TRIDIAG_HPP

#include "ekat/ekat.hpp"
#include "ekat/util/ekat_math_utils.hpp"
#include "ekat/kokkos/ekat_kokkos_types.hpp"

#include <Kokkos_Core.hpp>

#include <cassert>

namespace ekat {
Expand Down Expand Up @@ -116,10 +115,11 @@ int get_team_nthr (const TeamMember& team) {
return team.team_size();
}

#ifdef KOKKOS_ENABLE_CUDA
KOKKOS_INLINE_FUNCTION
int get_thread_id_within_team (const Kokkos::Impl::CudaTeamMember& team) {
#ifdef __CUDA_ARCH__
// Impl details for Nvidia and AMD GPUs.

template <typename TeamMember> KOKKOS_FORCEINLINE_FUNCTION
int get_thread_id_within_team_gpu (const TeamMember& team) {
#if defined __CUDA_ARCH__ || defined __HIP_DEVICE_COMPILE__
// Can't use team.team_rank() here because vector direction also uses physical
// threads but TeamMember types don't expose that information.
return blockDim.x * threadIdx.y + threadIdx.x;
Expand All @@ -129,16 +129,33 @@ int get_thread_id_within_team (const Kokkos::Impl::CudaTeamMember& team) {
#endif
}

KOKKOS_INLINE_FUNCTION
int get_team_nthr (const Kokkos::Impl::CudaTeamMember& team) {
#ifdef __CUDA_ARCH__
template <typename TeamMember> KOKKOS_FORCEINLINE_FUNCTION
int get_team_nthr_gpu (const TeamMember& team) {
#if defined __CUDA_ARCH__ || defined __HIP_DEVICE_COMPILE__
return blockDim.x * blockDim.y;
#else
assert(0);
return -1;
#endif
}
#endif

#ifdef KOKKOS_ENABLE_CUDA
KOKKOS_FORCEINLINE_FUNCTION
int get_thread_id_within_team (const Kokkos::Impl::CudaTeamMember& team)
{ return get_thread_id_within_team_gpu(team); }
KOKKOS_FORCEINLINE_FUNCTION
int get_team_nthr (const Kokkos::Impl::CudaTeamMember& team)
{ return get_team_nthr_gpu(team); }
#endif // KOKKOS_ENABLE_CUDA

#ifdef KOKKOS_ENABLE_HIP
KOKKOS_FORCEINLINE_FUNCTION
int get_thread_id_within_team (const Kokkos::Impl::HIPTeamMember& team)
{ return get_thread_id_within_team_gpu(team); }
KOKKOS_FORCEINLINE_FUNCTION
int get_team_nthr (const Kokkos::Impl::HIPTeamMember& team)
{ return get_team_nthr_gpu(team); }
#endif // KOKKOS_ENABLE_HIP

// The caller must provide the team_barrier after this function returns before A
// is accessed.
Expand Down
6 changes: 3 additions & 3 deletions src/ekat/util/ekat_upper_bound.hpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#ifndef EKAT_UPPER_BOUND_HPP
#define EKAT_UPPER_BOUND_HPP

#include <Kokkos_Core.hpp>
#include "ekat/ekat.hpp"

#ifndef KOKKOS_ENABLE_CUDA
#ifndef EKAT_ENABLE_GPU
# include <algorithm>
#endif

Expand Down Expand Up @@ -47,7 +47,7 @@ const T* upper_bound_impl(const T* first, const T* last, const T& value)
return first;
}

#ifdef KOKKOS_ENABLE_CUDA
#ifdef EKAT_ENABLE_GPU
template<class T>
KOKKOS_FORCEINLINE_FUNCTION
const T* upper_bound(const T* first, const T* last, const T& value)
Expand Down
4 changes: 2 additions & 2 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
### THREADING ###
#######################

if (Kokkos_ENABLE_CUDA)
if (EKAT_ENABLE_GPU)
set (DEFAULT_MAX_THREADS 1)
else ()
set (DEFAULT_MAX_THREADS 16)
Expand Down Expand Up @@ -35,7 +35,7 @@ option (EKAT_TEST_DOUBLE_PRECISION "Whether tests should be performed in double
### PACKS ###
#######################

if (Kokkos_ENABLE_CUDA)
if (EKAT_ENABLE_GPU)
set (DEFAULT_PACK_SIZE 1)
set (DEFAULT_SMALL_PACK_SIZE 1)
else ()
Expand Down
2 changes: 1 addition & 1 deletion tests/algorithm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ configure_file (${CMAKE_CURRENT_SOURCE_DIR}/ekat_config.f.in
# Test lin interp processes
set (LIN_INTERP_SRCS
lin_interp_test.cpp
lin_interp_ref.f90
lin_interp_ref.F90
)

if (EKAT_TEST_DOUBLE_PRECISION)
Expand Down
File renamed without changes.
Loading