Skip to content
16 changes: 10 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,18 @@ option (EKAT_DISABLE_TPL_WARNINGS "Whether we should suppress warnings when comp
set (EKAT_TPL_LIBRARIES_INTERNAL)

# A good chunk of EKAT is a utility layer over kokkos
BuildKokkos()
list (APPEND EKAT_TPL_LIBRARIES_INTERNAL ${Kokkos_LIBRARIES})
# Note: for SYCL/HIP backend, bleeding edge of kokkos/develop might be needed to test some functionality
# Kokkos_ROOT can point to custom installation directory
if (Kokkos_ROOT)
find_package(Kokkos REQUIRED)
Comment thread
bartgol marked this conversation as resolved.
set(Kokkos_LIBRARIES Kokkos::kokkos)
else()
BuildKokkos()
endif()
list (APPEND EKAT_TPL_LIBRARIES_INTERNAL ${Kokkos_LIBRARIES})

set (EKAT_ENABLE_GPU False)
if (Kokkos_ENABLE_CUDA)
set (EKAT_ENABLE_GPU True)
endif ()
if (Kokkos_ENABLE_HIP)
if (Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_HIP OR Kokkos_ENABLE_SYCL)
set (EKAT_ENABLE_GPU True)
endif ()

Expand Down
6 changes: 5 additions & 1 deletion cmake/EkatSetCompilerFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ macro (SetWarningFlags)
set (FFLAGS -Wall)
set (CXXFLAGS -Wall)
endif()

SetFlags(FFLAGS ${FFLAGS} CFLAGS -Wall CXXFLAGS ${CXXFLAGS})
endmacro()

Expand Down Expand Up @@ -217,6 +217,8 @@ macro (SetReleaseFlags)
string (APPEND CMAKE_CXX_FLAGS_RELEASE " -fopenmp-simd")
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
string (APPEND CMAKE_CXX_FLAGS_RELEASE " -qopenmp-simd")
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
string (APPEND CMAKE_CXX_FLAGS_RELEASE " -fsycl -fsycl-unnamed-lambda -sycl-std=2020 -qopenmp-simd -Wsycl-strict -fsycl-device-code-split=per_kernel")
endif()
endif ()
endif ()
Expand Down Expand Up @@ -275,6 +277,8 @@ macro (SetDebugFlags)
string (APPEND CMAKE_CXX_FLAGS_DEBUG " -fopenmp-simd")
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
string (APPEND CMAKE_CXX_FLAGS_DEBUG " -qopenmp-simd")
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
string (APPEND CMAKE_CXX_FLAGS_DEBUG " -qopenmp-simd -fsycl -fsycl-unnamed-lambda -sycl-std=2020 -Wsycl-strict -fno-sycl-dead-args-optimization")
endif()
endif ()
endif ()
Expand Down
3 changes: 3 additions & 0 deletions cmake/machine-files/jlse-gen9.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Load gen9 arch with SYCL backend for kokkos
set(CMAKE_CXX_STANDARD 17)
include (${CMAKE_CURRENT_LIST_DIR}/kokkos/intel-gen9.cmake)
2 changes: 2 additions & 0 deletions cmake/machine-files/jlse-xehp.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Load XEHP arch with SYCL backend for kokkos
include (${CMAKE_CURRENT_LIST_DIR}/kokkos/intel-xehp.cmake)
5 changes: 5 additions & 0 deletions cmake/machine-files/kokkos/intel-gen9.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
include (${CMAKE_CURRENT_LIST_DIR}/generic.cmake)

option(Kokkos_ARCH_INTEL_GEN9 "" ON)
set(Kokkos_ENABLE_SYCL TRUE CACHE BOOL "")
set(Kokkos_ENABLE_DEPRECATED_CODE_3 FALSE CACHE BOOL "")
5 changes: 5 additions & 0 deletions cmake/machine-files/kokkos/intel-xehp.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
include (${CMAKE_CURRENT_LIST_DIR}/generic.cmake)

option(Kokkos_ARCH_INTEL_XEHP "" ON)
set(Kokkos_ENABLE_SYCL TRUE CACHE BOOL "")
set(Kokkos_ENABLE_DEPRECATED_CODE_3 FALSE CACHE BOOL "")
4 changes: 3 additions & 1 deletion src/ekat/ekat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ static constexpr bool ekatBFB = false;
typedef Kokkos::Cuda EkatGpuSpace;
# elif defined KOKKOS_ENABLE_HIP
typedef Kokkos::Experimental::HIP EkatGpuSpace;
# elif defined KOKKOS_ENABLE_SYCL
typedef Kokkos::Experimental::SYCL EkatGpuSpace;
# else
error "EKAT does not recognize a GPU space other than Cuda and HIP."
error "EKAT does not recognize a GPU space other than Cuda, HIP and SYCL".
# endif
#endif

Expand Down
24 changes: 18 additions & 6 deletions src/ekat/ekat_assert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,25 @@
} \
} while(0)

#define IMPL_KERNEL_THROW(condition, msg) \
do { \
if ( ! (condition) ) { \
printf("KERNEL CHECK FAILED:\n %s\n %s\n",#condition,msg); \
Kokkos::abort(""); \
} \
// SYCL cannot printf like the other backends quite yet
#ifdef __SYCL_DEVICE_ONLY__
#define IMPL_KERNEL_THROW(condition, msg) \
do { \
if ( ! (condition) ) { \
const __attribute__((opencl_constant)) char format[] = "KERNEL CHECK FAILED:\n %s %s\n"; \
sycl::ext::oneapi::experimental::printf(format,#condition,msg); \
Kokkos::abort(""); \
} \
} while (0)
#else
#define IMPL_KERNEL_THROW(condition, msg) \
do { \
if ( ! (condition) ) { \
printf("KERNEL CHECK FAILED:\n %s\n %s\n",#condition,msg); \
Kokkos::abort(""); \
} \
} while (0)
#endif

#ifndef NDEBUG
#define EKAT_ASSERT(condition) IMPL_THROW(condition, "", std::logic_error)
Expand Down
2 changes: 1 addition & 1 deletion src/ekat/ekat_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
// Decide whether ekat defaults to BFB behavior when possible/appropriate
#cmakedefine EKAT_FPE

// A GPU space has been enabled in Kokkos, e.g., CUDA or HIP.
// A GPU space has been enabled in Kokkos, e.g., CUDA or HIP OR SYCL.
#cmakedefine EKAT_ENABLE_GPU

#endif // EKAT_CONFIG_H
12 changes: 12 additions & 0 deletions src/ekat/ekat_session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ void initialize_kokkos () {
# elif defined KOKKOS_ENABLE_HIP
const auto ret = hipGetDeviceCount(&nd);
const bool ok = ret == hipSuccess;
# elif defined KOKKOS_ENABLE_SYCL
nd = 0;
auto gpu_devs = sycl::device::get_devices(sycl::info::device_type::gpu);
Comment thread
bartgol marked this conversation as resolved.
for (auto &dev : gpu_devs) {
if (dev.get_info<sycl::info::device::partition_max_sub_devices>() > 0) {
auto subDevs = dev.create_sub_devices<sycl::info::partition_property::partition_by_affinity_domain>(sycl::info::partition_affinity_domain::numa);
nd += subDevs.size();
} else {
nd++;
}
}
const bool ok = true;
# else
error "No valid GPU space, yet EKAT_ENABLE_GPU is defined."
# endif
Expand Down
4 changes: 2 additions & 2 deletions src/ekat/kokkos/ekat_kokkos_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void parallel_reduce (const TeamMember& team,
#ifdef EKAT_ENABLE_GPU
// Broadcast result to all threads by doing sum of one thread's
// non-0 value and the rest of the 0s.
TeamMember::vector_reduce(Kokkos::Sum<ValueType>(local_tmp));
team.vector_reduce(Kokkos::Sum<ValueType>(local_tmp));
#endif

result = local_tmp;
Expand Down Expand Up @@ -525,7 +525,7 @@ class TeamUtils<ValueType, Kokkos::OpenMP> : public TeamUtilsCommonBase<ValueTyp
#endif

/*
* Specialization for Cuda execution space.
* Specialization for CUDA, HIP and SYCL execution space.
*/
#ifdef EKAT_ENABLE_GPU
template <typename ValueType>
Expand Down
4 changes: 4 additions & 0 deletions src/ekat/util/ekat_arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ std::string ekat_config_string () {
<< " compiler id: " <<
#if defined __INTEL_COMPILER
"Intel\n"
#elif defined __INTEL_LLVM_COMPILER
"IntelLLVM\n"
#elif defined __HIPCC__
"AMD Clang\n"
#elif defined __GNUG__
"GCC\n"
#else
Expand Down
16 changes: 16 additions & 0 deletions src/ekat/util/ekat_tridiag.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ int get_thread_id_within_team_gpu (const TeamMember& team) {
// Can't use team.team_rank() here because vector direction also uses physical
// threads but TeamMember types don't expose that information.
return blockDim.x * threadIdx.y + threadIdx.x;
#elif defined(__SYCL_DEVICE_ONLY__)
auto item = team.item();
return static_cast<int>(item.get_local_range(1) * item.get_local_id(0)
+ item.get_local_id(1));
#else
assert(0);
return -1;
Expand All @@ -133,6 +137,9 @@ template <typename TeamMember> KOKKOS_FORCEINLINE_FUNCTION
int get_team_nthr_gpu (const TeamMember& team) {
#if defined __CUDA_ARCH__ || defined __HIP_DEVICE_COMPILE__
return blockDim.x * blockDim.y;
#elif defined __SYCL_DEVICE_ONLY__
auto item = team.item();
return static_cast<int>(item.get_local_range(0) * item.get_local_range(1));
#else
assert(0);
return -1;
Expand All @@ -157,6 +164,15 @@ int get_team_nthr (const Kokkos::Impl::HIPTeamMember& team)
{ return get_team_nthr_gpu(team); }
#endif // KOKKOS_ENABLE_HIP

#ifdef KOKKOS_ENABLE_SYCL
KOKKOS_FORCEINLINE_FUNCTION
int get_thread_id_within_team (const Kokkos::Impl::SYCLTeamMember& team)
{ return get_thread_id_within_team_gpu(team); }
KOKKOS_FORCEINLINE_FUNCTION
int get_team_nthr (const Kokkos::Impl::SYCLTeamMember& team)
{ return get_team_nthr_gpu(team); }
#endif // KOKKOS_ENABLE_SYCL

// The caller must provide the team_barrier after this function returns before A
// is accessed.
template <typename TeamMember, typename TridiagDiag>
Expand Down
8 changes: 6 additions & 2 deletions tests/kokkos/kokkos_utils_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ void test_view_reduction(const Scalar a=Scalar(0.0), const int begin=0, const in
using Device = ekat::DefaultDevice;
using MemberType = typename ekat::KokkosTypes<Device>::MemberType;
using ExeSpace = typename ekat::KokkosTypes<Device>::ExeSpace;

using PackType = ekat::Pack<Scalar, VectorSize>;
using ViewType = Kokkos::View<PackType*,ExeSpace>;

Expand All @@ -267,7 +267,7 @@ void test_view_reduction(const Scalar a=Scalar(0.0), const int begin=0, const in
Scalar serial_result = Scalar(a);
ViewType data("data", view_length);
const auto data_h = Kokkos::create_mirror_view(data);
auto raw = data_h.data();
auto raw = data_h.data();
for (int k = 0; k < view_length; ++k) {
for (int p = 0; p < VectorSize; ++p) {
const int scalar_index = k*VectorSize+p;
Expand All @@ -292,7 +292,11 @@ void test_view_reduction(const Scalar a=Scalar(0.0), const int begin=0, const in
int team_size = ExeSpace::concurrency();
#ifdef EKAT_ENABLE_GPU
ExeSpace temp_space;
#ifdef KOKKOS_ENABLE_SYCL
auto num_sm = temp_space.impl_internal_space_instance()->m_queue->get_device().get_info<sycl::info::device::max_compute_units>();
#else
auto num_sm = temp_space.impl_internal_space_instance()->m_multiProcCount;
#endif
team_size /= (ekat::is_single_precision<Real>::value ? num_sm*64 : num_sm*32);
#endif

Expand Down