Skip to content
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
3c1b671
tweaks for spock
Aug 19, 2021
7604fc4
cache file
Aug 19, 2021
c6422dc
fix build of unit tests without pio
Sep 1, 2021
b3e0af2
added define for hip build
Sep 2, 2021
684c633
more fixes, intermediate
Sep 4, 2021
24f3dc4
intermediate
Sep 22, 2021
3d3cd6d
revert nonsgnificant changes
Nov 10, 2021
dfee17b
revert nonsgnificant changes
Nov 10, 2021
3523d5d
reverting
Nov 10, 2021
0a2d96d
clean up
Nov 10, 2021
5460f98
clean up
Nov 10, 2021
ec3308a
reverting
Nov 10, 2021
9093346
fix ifdef, add prnts for the last pair
Nov 10, 2021
ad20860
mods for cmake file, comment out printing pairs
Nov 10, 2021
2f173cf
set the last pair for hip manuall
Nov 10, 2021
b97eb94
add flags
Nov 11, 2021
bf6f042
newer environment
Feb 5, 2022
6689abd
update kokkos build made with a new env
Apr 6, 2022
cfb8dfd
minor, 1 comment on LB, commenting out pow call that wasnt implemented
Apr 6, 2022
cced5d9
add crusher cmake
Jun 13, 2022
53d3e71
Merge 'origin/master' into oksanaguba/homme/spock
Jun 13, 2022
41f0c42
fix bug from merging
Jun 21, 2022
62db3fc
clean comments, remove if-statement in preqx push logic
oksanaguba Jun 22, 2022
1c042ab
switch to c++17
oksanaguba Jun 22, 2022
076805f
remove unused cmake var enable_cuda
oksanaguba Jun 22, 2022
d1cc873
change if HIP to ifdef HIP
oksanaguba Jun 22, 2022
ecf447f
fix cprnc path when not using cprnc_dir
oksanaguba Jun 23, 2022
ba358e3
summit bfb file for tests
Jun 27, 2022
f6e28c8
minor fix in summit cmake
Jun 28, 2022
91b396a
theta no-compose build
Jun 28, 2022
026a9f1
remove opt flag for 1 remap file
Jul 1, 2022
f26c0f4
Merge 'origin/master' into oksanaguba/homme/spock
Jul 8, 2022
5f41bba
fix to the team size in diagnostics, not verified on crusher yet
Jul 12, 2022
b4a1e39
diagn for repro_sum, not verified on crusher
Jul 12, 2022
babc296
fixing in vs inout issue for nstep_c
Jul 15, 2022
811933a
remove noopt flag from 1 file
Jul 19, 2022
ccb4f03
use inout for some vars
Jul 19, 2022
90ea3f2
start on organizing gpu build vars
Jul 20, 2022
24ca04c
remove duplicated code and fix ifdef
Jul 20, 2022
3a4b951
replace Hommexx_Cuda with HommexxGPU
Jul 20, 2022
0b01499
clean up and fixes, to be tested
Jul 20, 2022
cc33ef0
fixing bfb; confirmed for fhs1 test
Jul 21, 2022
a4ab895
Hommexx: Fix some CMake issues.
ambrad Aug 8, 2022
f0bf95e
Hommexx/SL: Clean up use of HOMME_ENABLE_COMPOSE.
ambrad Aug 8, 2022
1deaa79
Hommexx/SL: Generalize Cuda to GPU spaces.
ambrad Aug 8, 2022
632c46a
Hommexx: Update DIRK solver's tridiag solver to handle HIP.
ambrad Aug 8, 2022
8105e7a
Hommexx: Fix a warning in VR.
ambrad Aug 8, 2022
6c4cf89
Hommexx: Make more link lines use link_to_kokkos.
ambrad Aug 8, 2022
44dade3
Homme: Slightly relax a tolerance in a unit test.
ambrad Aug 8, 2022
62fc2e3
Homme/SL: Slightly relax a tolerance in a unit test.
ambrad Aug 9, 2022
b304124
Hommexx/SL: Fix a variable for lambda capture in two spots.
ambrad Aug 9, 2022
a0af755
Hommexx: Get standalone Homme to build its own Kokkos.
ambrad Aug 9, 2022
d3adb96
Hommexx: CUDA_ARCH equivalent in GllFvRemap.
ambrad Aug 23, 2022
406a01e
fix preqx build
oksanaguba Sep 13, 2022
28711ac
remove string args from prim_state calls
Sep 13, 2022
885b3bc
disable code around energy vars that are not used in cxx version
Sep 13, 2022
e7b7eb5
remove string args from glob_norms calls
Sep 13, 2022
d0cef39
a comment
Sep 19, 2022
0c0bc40
cleaning team size logic
Sep 19, 2022
e3bd20a
clean 1 line for HOMMEXX_ENABLE_GPU
Sep 19, 2022
eff19f5
address comments
Sep 20, 2022
055872c
Merge branch 'origin/master' into oksanaguba/homme/spock
Sep 20, 2022
4802901
fix linking for ut after merge
oksanaguba Sep 20, 2022
b3f04b5
fix homme standalone build not via cime
Sep 28, 2022
9522305
Hommexx/SL: (Re)fix a variable for lambda capture in two spots.
ambrad Oct 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions components/homme/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ IF (${HOMME_USE_KOKKOS})
SET (HOMMEXX_EXEC_SPACE "Default" CACHE STRING "Select the kokkos exec space")
Comment thread
oksanaguba marked this conversation as resolved.

STRING (TOUPPER ${HOMMEXX_EXEC_SPACE} HOMMEXX_EXEC_SPACE_UPPER)
IF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "CUDA")
IF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "HIP")
SET (HOMMEXX_HIP_SPACE ON)
ELSEIF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "CUDA")
SET (HOMMEXX_CUDA_SPACE ON)
ELSEIF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "OPENMP")
SET (HOMMEXX_OPENMP_SPACE ON)
Expand Down Expand Up @@ -276,14 +278,20 @@ MESSAGE(STATUS "Linker Flags = ${CMAKE_EXE_LINKER_FLAGS}")

IF (${HOMME_USE_KOKKOS})

IF (CUDA_BUILD)
if(HIP)
add_definitions(-DHIP_BUILD)
#does not seem to be needed because kokkos::default will be picked up
#SET (HOMMEXX_HIP_SPACE ON)
endif()

IF (CUDA_BUILD OR HIP)
SET (DEFAULT_VECTOR_SIZE 1)
ELSE ()
SET (DEFAULT_VECTOR_SIZE 8)
ENDIF()

SET (HOMMEXX_VECTOR_SIZE ${DEFAULT_VECTOR_SIZE} CACHE STRING
"If AVX or Cuda don't take priority, use this software vector size.")
"If AVX or Cuda or HIP don't take priority, use this software vector size.")

IF (CMAKE_BUILD_TYPE_UPPER MATCHES "DEBUG" OR CMAKE_BUILD_TYPE_UPPER MATCHES "RELWITHDEBINFO")
SET (HOMMEXX_DEBUG ON)
Expand Down
8 changes: 4 additions & 4 deletions components/homme/cmake/SetCompilerFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ IF (${HOMME_USE_CXX})
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")

INCLUDE(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-std=c++14" CXX14_SUPPORTED)
IF (${CXX14_SUPPORTED})
SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
CHECK_CXX_COMPILER_FLAG("-std=c++17" CXX17_SUPPORTED)
IF (${CXX17_SUPPORTED})

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, some cmake might issue a warning here, depending on policies settings. Unless old policy settings are used, you should get a warning like

if given arguments: "TRUE" An argument named "TRUE" appears in a conditional statement.

The clean cmake syntax is to use if (VAR) rather than if (${VAR}).

SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure C++17 is actually needed? I had the impression that it was thought Crusher requires it but then it turns out it does not.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I do not remember why it is here. Do you prefer 14?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For cleanliness we should not mandate more than is technically required. I believe for a while weaver could not handle C++17, but that may have changed now.

ELSEIF (${HOMME_USE_KOKKOS})
MESSAGE (FATAL_ERROR "Kokkos needs C++14, but the C++ compiler does not support it.")
MESSAGE (FATAL_ERROR "Kokkos needs C++17, but the C++ compiler does not support it.")
ENDIF ()
CHECK_CXX_COMPILER_FLAG("-cxxlib" CXXLIB_SUPPORTED)
IF (CXXLIB_SUPPORTED)
Expand Down
64 changes: 64 additions & 0 deletions components/homme/cmake/machineFiles/crusher-gpumpi.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#interactive job
#bsub -W 2:00 -nnodes 1 -P cli115 -Is /bin/bash


#cmake -C ~/acme-fork-lb/components/homme/cmake/machineFiles/summit.cmake -DHOMMEXX_MPI_ON_DEVICE=FALSE ~/acme-fork-lb/components/homme/

#SET (HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "")
SET (HOMMEXX_CUDA_MAX_WARP_PER_TEAM "16" CACHE STRING "")

SET (NETCDF_DIR $ENV{OLCF_NETCDF_FORTRAN_ROOT} CACHE FILEPATH "")
SET (HDF5_DIR $ENV{OLCF_HDF5_ROOT} CACHE FILEPATH "")

SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

SET(ENABLE_CUDA FALSE CACHE BOOL "")

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not see variable ENABLE_CUDA used anywhere, only KOKKOS_ENABLE_CUDA is used. I will remove it from all machine files.


SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")
SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "")

#SET (HOMMEXX_BFB_TESTING TRUE CACHE BOOL "")

SET(USE_TRILINOS OFF CACHE BOOL "")

SET(HIP TRUE CACHE BOOL "")
#SET(Kokkos_ENABLE_OPENMP OFF CACHE BOOL "")
#SET(Kokkos_ENABLE_CUDA OFF CACHE BOOL "")
#SET(Kokkos_ENABLE_CUDA_LAMBDA OFF CACHE BOOL "")
#SET(Kokkos_ARCH_VEGA908 ON CACHE BOOL "")
#SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "")

SET(CMAKE_C_COMPILER "cc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "ftn" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "")
#SET(CMAKE_CXX_COMPILER "/ccs/home/onguba/kokkos/bin/nvcc_wrapper" CACHE STRING "")
SET(E3SM_KOKKOS_PATH "/ccs/home/onguba/kokkos-crusher-june2022/bld-hipcc" CACHE STRING "")

#not the proper way!!!
SET(MPICH_DIR "/opt/cray/pe/mpich/8.1.12/ofi/crayclang/10.0" CACHE STRING "")

SET(Extrae_LIBRARY "-I${MPICH_DIR}/include -L${MPICH_DIR}/lib -lmpi -L/opt/cray/pe/mpich/8.1.12/gtl/lib -lmpi_gtl_hsa" CACHE STRING "")

SET(ADD_Fortran_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "")
SET(ADD_C_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "")
SET(ADD_CXX_FLAGS "-std=c++14 -O3 -DNDEBUG --amdgpu-target=gfx90a -fno-gpu-rdc ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "")
SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "")


set (ENABLE_OPENMP OFF CACHE BOOL "")
set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

set (HOMME_TESTING_PROFILE "dev" CACHE STRING "")

set (USE_NUM_PROCS 4 CACHE STRING "")

#set (OPT_FLAGS "-mcpu=power9 -mtune=power9" CACHE STRING "")
SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "")
64 changes: 64 additions & 0 deletions components/homme/cmake/machineFiles/spock-gpumpi.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#interactive job
#bsub -W 2:00 -nnodes 1 -P cli115 -Is /bin/bash


#cmake -C ~/acme-fork-lb/components/homme/cmake/machineFiles/summit.cmake -DHOMMEXX_MPI_ON_DEVICE=FALSE ~/acme-fork-lb/components/homme/

#SET (HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "")
SET (HOMMEXX_CUDA_MAX_WARP_PER_TEAM "16" CACHE STRING "")

SET (NETCDF_DIR $ENV{OLCF_NETCDF_FORTRAN_ROOT} CACHE FILEPATH "")
SET (HDF5_DIR $ENV{OLCF_HDF5_ROOT} CACHE FILEPATH "")

SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

SET(ENABLE_CUDA FALSE CACHE BOOL "")

SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")
SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "")

#SET (HOMMEXX_BFB_TESTING TRUE CACHE BOOL "")

SET(USE_TRILINOS OFF CACHE BOOL "")

SET(HIP TRUE CACHE BOOL "")
#SET(Kokkos_ENABLE_OPENMP OFF CACHE BOOL "")
#SET(Kokkos_ENABLE_CUDA OFF CACHE BOOL "")
#SET(Kokkos_ENABLE_CUDA_LAMBDA OFF CACHE BOOL "")
#SET(Kokkos_ARCH_VEGA908 ON CACHE BOOL "")
#SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "")

SET(CMAKE_C_COMPILER "cc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "ftn" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "")
#SET(CMAKE_CXX_COMPILER "/ccs/home/onguba/kokkos/bin/nvcc_wrapper" CACHE STRING "")
SET(E3SM_KOKKOS_PATH "/ccs/home/onguba/kokkos-example-spock-hipcc2/bld-hipcc" CACHE STRING "")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had removed dependence on onguba-based things in the crusher file. spock is I think irrelevant at this point, so I don't mind this line. But we could also consider not including the spock file in this PR. Up to you, though.


#not the proper way!!!
SET(MPICH_DIR "/opt/cray/pe/mpich/8.1.12/ofi/crayclang/10.0" CACHE STRING "")

SET(Extrae_LIBRARY "-I${MPICH_DIR}/include -L${MPICH_DIR}/lib -lmpi -L/opt/cray/pe/mpich/8.1.12/gtl/lib -lmpi_gtl_hsa" CACHE STRING "")

SET(ADD_Fortran_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "")
SET(ADD_C_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "")
SET(ADD_CXX_FLAGS "-O3 -DNDEBUG --amdgpu-target=gfx908 -fno-gpu-rdc ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "")
SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "")


set (ENABLE_OPENMP OFF CACHE BOOL "")
set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

set (HOMME_TESTING_PROFILE "dev" CACHE STRING "")

set (USE_NUM_PROCS 4 CACHE STRING "")

#set (OPT_FLAGS "-mcpu=power9 -mtune=power9" CACHE STRING "")
SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "")
11 changes: 6 additions & 5 deletions components/homme/cmake/machineFiles/summit-gpumpi.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
#bsub -W 2:00 -nnodes 1 -P cli115 -Is /bin/bash


#cmake -C ~/acme-fork-lb/components/homme/cmake/machineFiles/summit.cmake -DHOMMEXX_MPI_ON_DEVICE=FALSE ~/acme-fork-lb/components/homme/

#SET (HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "")
SET (HOMMEXX_CUDA_MAX_WARP_PER_TEAM "16" CACHE STRING "")

SET (NETCDF_DIR $ENV{OLCF_NETCDF_FORTRAN_ROOT} CACHE FILEPATH "")
SET (HDF5_DIR $ENV{OLCF_HDF5_ROOT} CACHE FILEPATH "")
#SET (NETCDF_DIR $ENV{OLCF_NETCDF_FORTRAN_ROOT} CACHE FILEPATH "")
#SET (NetCDF_Fortran_PATH "/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-7.5.0/netcdf-fortran-4.4.5-e2hkh7w3253wz5uubjxbbvh56a7xjl7n" CACHE STRING "")
#SET(NetCDF_C_LIBRARY "/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-7.5.0/netcdf-c-4.8.0-pwi4jbrnwv4lrrjxdu5czbos5uvvjgvr/lib" CACHE STRING "")
#SET(NetCDF_C_INCLUDE_DIR "/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-7.5.0/netcdf-c-4.8.0-pwi4jbrnwv4lrrjxdu5czbos5uvvjgvr/include" CACHE STRING "")

SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")

Expand All @@ -22,6 +22,7 @@ SET(ENABLE_CUDA FALSE CACHE BOOL "")

SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")
#SET(HOMME_ENABLE_COMPOSE TRUE CACHE BOOL "")
SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "")

#SET (HOMMEXX_BFB_TESTING TRUE CACHE BOOL "")
Expand All @@ -36,7 +37,7 @@ SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "")

SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "/ccs/home/onguba/kokkos/bin/nvcc_wrapper" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "/ccs/home/onguba/acme-MASTER-GB/externals/kokkos/bin/nvcc_wrapper" CACHE STRING "")

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clean this too


set (ENABLE_OPENMP OFF CACHE BOOL "")
set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
Expand Down
4 changes: 2 additions & 2 deletions components/homme/src/share/cxx/Config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# endif
#endif

#if ! defined HOMMEXX_CUDA_SPACE && ! defined HOMMEXX_OPENMP_SPACE && ! defined HOMMEXX_THREADS_SPACE && ! defined HOMMEXX_SERIAL_SPACE
#if ! defined HOMMEXX_CUDA_SPACE && ! defined HOMMEXX_OPENMP_SPACE && ! defined HOMMEXX_THREADS_SPACE && ! defined HOMMEXX_SERIAL_SPACE && ! defined HOMMEXX_HIP_SPACE
# define HOMMEXX_DEFAULT_SPACE
#endif

Expand All @@ -31,7 +31,7 @@

#include <Kokkos_Core.hpp>

#ifdef KOKKOS_ENABLE_CUDA
#if defined(KOKKOS_ENABLE_CUDA) || (HIP_BUILD)
# ifndef HOMMEXX_CUDA_MIN_WARP_PER_TEAM
# define HOMMEXX_CUDA_MIN_WARP_PER_TEAM 8
# endif
Expand Down
2 changes: 1 addition & 1 deletion components/homme/src/share/cxx/Dimensions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
namespace Homme {

// Until whenever CUDA supports constexpr properly
#ifdef CUDA_BUILD
#if defined(CUDA_BUILD) || (HIP_BUILD)

#ifdef CAM
#define QSIZE_D PCNST
Expand Down
19 changes: 18 additions & 1 deletion components/homme/src/share/cxx/EulerStepFunctorImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,21 @@ struct SerialLimiter<Kokkos::Cuda> {
};
#endif

#if HIP_BUILD
template <>
struct SerialLimiter<Kokkos::Experimental::HIP> {
template <int limiter_option, typename ArrayGll, typename ArrayGllLvl, typename Array2Lvl,
typename Array2GllLvl>
KOKKOS_INLINE_FUNCTION static void
run (const ArrayGll& sphweights, const ArrayGllLvl& idpmass,
const Array2Lvl& iqlim, const ArrayGllLvl& iptens,
const Array2GllLvl& irwrk) {
Kokkos::abort("SerialLimiter::run: Should not be called on GPU.");
}
};
#endif


class EulerStepFunctorImpl {
struct EulerStepData {
EulerStepData ()
Expand Down Expand Up @@ -423,7 +438,9 @@ class EulerStepFunctorImpl {
ExecSpace::impl_static_fence();
m_kernel_will_run_limiters = true;
Kokkos::parallel_for(
Homme::get_default_team_policy<ExecSpace, AALTracerPhase>(
//to play with launch bounds
//Homme::get_default_team_policy<ExecSpace, AALTracerPhase, Kokkos::LaunchBounds<128,1> >(
Homme::get_default_team_policy<ExecSpace, AALTracerPhase >(
m_geometry.num_elems() * m_data.qsize, m_tpref),
*this);
ExecSpace::impl_static_fence();
Expand Down
43 changes: 43 additions & 0 deletions components/homme/src/share/cxx/ExecSpaceDefs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
# include <cuda.h>
#endif

#if HIP_BUILD
#include <hip/hip_runtime.h>
#endif

namespace Homme {

// Since we're initializing from inside a Fortran code and don't have access to
Expand Down Expand Up @@ -49,6 +53,22 @@ void initialize_kokkos () {
str.back() = 0;
args.push_back(const_cast<char*>(str.data()));
#endif
#if HIP_BUILD
int nd;
const auto ret = hipGetDeviceCount(&nd);
if (ret != hipSuccess) {
// It isn't a big deal if we can't get the device count.
nd = 1;
}
std::stringstream ss;

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will remove duplicated code here.

ss << "--kokkos-num-devices=" << nd;
const auto key = ss.str();
std::vector<char> str(key.size()+1);
std::copy(key.begin(), key.end(), str.begin());
str.back() = 0;
args.push_back(const_cast<char*>(str.data()));
#endif


const char* silence = "--kokkos-disable-warnings";
args.push_back(const_cast<char*>(silence));
Expand Down Expand Up @@ -103,6 +123,7 @@ team_num_threads_vectors_for_gpu (
assert(num_warps_total >= max_num_warps);
assert(tp.max_threads_usable >= 1 && tp.max_vectors_usable >= 1);

#if !HIP_BUILD
int num_warps;
if (tp.prefer_larger_team) {
const int num_warps_usable =
Expand Down Expand Up @@ -136,15 +157,27 @@ team_num_threads_vectors_for_gpu (
const int num_threads = ( (tp.max_threads_usable > num_device_threads) ?
num_device_threads :
tp.max_threads_usable );

//printf("tp.prefer_threads: %4d %4d \n",num_threads, prevpow2(num_device_threads / num_threads));

return std::make_pair( num_threads,
prevpow2(num_device_threads / num_threads) );
} else {
const int num_vectors = prevpow2( (tp.max_vectors_usable > num_device_threads) ?
num_device_threads :
tp.max_vectors_usable );

//printf("NOT tp. prefer_threads: %4d %4d \n",num_device_threads / num_vectors,
// num_vectors);

return std::make_pair( num_device_threads / num_vectors,
num_vectors );
}
#else
//manual override for HIP
return std::make_pair( 16,4 );

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this line meant to go in?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. last time i looked i could not figure out what warps, etc. are for miXXX, so, it is not clear what the logic above would produce for the pair. this pair was defined from performance runs.

#endif

}

} // namespace Parallel
Expand Down Expand Up @@ -172,12 +205,22 @@ team_num_threads_vectors (const int num_parallel_iterations,
# else
const int max_num_warps = HOMMEXX_CUDA_MAX_WARP_PER_TEAM; //Kokkos::Impl::cuda_internal_maximum_grid_count();
# endif

#elif HIP_BUILD

//use 64 wavefronts per CU and 120 CUs
const int num_warps_device = 120*64; // no such thing Kokkos::Impl::hip_internal_maximum_warp_count();
const int max_num_warps = 40; //cores per CU, SM ///HOMMEXX_CUDA_MAX_WARP_PER_TEAM;
const int num_threads_warp = Kokkos::Experimental::Impl::HIPTraits::WarpSize;

#else

// I want thread-distribution rules to be unit-testable even when Cuda is
// off. Thus, make up a P100-like machine:
const int num_warps_device = 1792;
const int num_threads_warp = 32;
const int max_num_warps = 16;

#endif

return Parallel::team_num_threads_vectors_for_gpu(
Expand Down
Loading