-
Notifications
You must be signed in to change notification settings - Fork 466
homme HIP changes #5039
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
homme HIP changes #5039
Changes from 24 commits
3c1b671
7604fc4
c6422dc
b3e0af2
684c633
24f3dc4
3d3cd6d
dfee17b
3523d5d
0a2d96d
5460f98
ec3308a
9093346
ad20860
2f173cf
b97eb94
bf6f042
6689abd
cfb8dfd
cced5d9
53d3e71
41f0c42
62db3fc
1c042ab
076805f
d1cc873
ecf447f
ba358e3
f6e28c8
91b396a
026a9f1
f26c0f4
5f41bba
b4a1e39
babc296
811933a
ccb4f03
90ea3f2
24ca04c
3a4b951
0b01499
cc33ef0
a4ab895
f0bf95e
1deaa79
632c46a
8105e7a
6c4cf89
44dade3
62fc2e3
b304124
a0af755
d3adb96
406a01e
28711ac
885b3bc
e7b7eb5
d0cef39
0c0bc40
e3bd20a
eff19f5
055872c
4802901
b3f04b5
9522305
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -67,11 +67,11 @@ IF (${HOMME_USE_CXX}) | |
| SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") | ||
|
|
||
| INCLUDE(CheckCXXCompilerFlag) | ||
| CHECK_CXX_COMPILER_FLAG("-std=c++14" CXX14_SUPPORTED) | ||
| IF (${CXX14_SUPPORTED}) | ||
| SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") | ||
| CHECK_CXX_COMPILER_FLAG("-std=c++17" CXX17_SUPPORTED) | ||
| IF (${CXX17_SUPPORTED}) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI, some cmake might issue a warning here, depending on policies settings. Unless old policy settings are used, you should get a warning like The clean cmake syntax is to use |
||
| SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we sure C++17 is actually needed? I had the impression that it was thought Crusher requires it but then it turns out it does not.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now I do not remember why it is here. Do you prefer 14?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For cleanliness we should not mandate more than is technically required. I believe for a while weaver could not handle C++17, but that may have changed now. |
||
| ELSEIF (${HOMME_USE_KOKKOS}) | ||
| MESSAGE (FATAL_ERROR "Kokkos needs C++14, but the C++ compiler does not support it.") | ||
| MESSAGE (FATAL_ERROR "Kokkos needs C++17, but the C++ compiler does not support it.") | ||
| ENDIF () | ||
| CHECK_CXX_COMPILER_FLAG("-cxxlib" CXXLIB_SUPPORTED) | ||
| IF (CXXLIB_SUPPORTED) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| #interactive job | ||
| #bsub -W 2:00 -nnodes 1 -P cli115 -Is /bin/bash | ||
|
|
||
|
|
||
| #cmake -C ~/acme-fork-lb/components/homme/cmake/machineFiles/summit.cmake -DHOMMEXX_MPI_ON_DEVICE=FALSE ~/acme-fork-lb/components/homme/ | ||
|
|
||
| #SET (HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "") | ||
| SET (HOMMEXX_CUDA_MAX_WARP_PER_TEAM "16" CACHE STRING "") | ||
|
|
||
| SET (NETCDF_DIR $ENV{OLCF_NETCDF_FORTRAN_ROOT} CACHE FILEPATH "") | ||
| SET (HDF5_DIR $ENV{OLCF_HDF5_ROOT} CACHE FILEPATH "") | ||
|
|
||
| SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "") | ||
|
|
||
| SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "") | ||
|
|
||
| SET(WITH_PNETCDF FALSE CACHE FILEPATH "") | ||
|
|
||
| SET(USE_QUEUING FALSE CACHE BOOL "") | ||
|
|
||
| SET(ENABLE_CUDA FALSE CACHE BOOL "") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not see variable ENABLE_CUDA used anywhere, only KOKKOS_ENABLE_CUDA is used. I will remove it from all machine files. |
||
|
|
||
| SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "") | ||
| SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "") | ||
| SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "") | ||
|
|
||
| #SET (HOMMEXX_BFB_TESTING TRUE CACHE BOOL "") | ||
|
|
||
| SET(USE_TRILINOS OFF CACHE BOOL "") | ||
|
|
||
| SET(HIP TRUE CACHE BOOL "") | ||
| #SET(Kokkos_ENABLE_OPENMP OFF CACHE BOOL "") | ||
| #SET(Kokkos_ENABLE_CUDA OFF CACHE BOOL "") | ||
| #SET(Kokkos_ENABLE_CUDA_LAMBDA OFF CACHE BOOL "") | ||
| #SET(Kokkos_ARCH_VEGA908 ON CACHE BOOL "") | ||
| #SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "") | ||
|
|
||
| SET(CMAKE_C_COMPILER "cc" CACHE STRING "") | ||
| SET(CMAKE_Fortran_COMPILER "ftn" CACHE STRING "") | ||
| SET(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "") | ||
| #SET(CMAKE_CXX_COMPILER "/ccs/home/onguba/kokkos/bin/nvcc_wrapper" CACHE STRING "") | ||
| SET(E3SM_KOKKOS_PATH "/ccs/home/onguba/kokkos-crusher-june2022/bld-hipcc" CACHE STRING "") | ||
|
|
||
| #not the proper way!!! | ||
| SET(MPICH_DIR "/opt/cray/pe/mpich/8.1.12/ofi/crayclang/10.0" CACHE STRING "") | ||
|
|
||
| SET(Extrae_LIBRARY "-I${MPICH_DIR}/include -L${MPICH_DIR}/lib -lmpi -L/opt/cray/pe/mpich/8.1.12/gtl/lib -lmpi_gtl_hsa" CACHE STRING "") | ||
|
|
||
| SET(ADD_Fortran_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "") | ||
| SET(ADD_C_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "") | ||
| SET(ADD_CXX_FLAGS "-std=c++14 -O3 -DNDEBUG --amdgpu-target=gfx90a -fno-gpu-rdc ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "") | ||
| SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "") | ||
|
|
||
|
|
||
| set (ENABLE_OPENMP OFF CACHE BOOL "") | ||
| set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "") | ||
| set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "") | ||
|
|
||
| set (HOMME_TESTING_PROFILE "dev" CACHE STRING "") | ||
|
|
||
| set (USE_NUM_PROCS 4 CACHE STRING "") | ||
|
|
||
| #set (OPT_FLAGS "-mcpu=power9 -mtune=power9" CACHE STRING "") | ||
| SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| #interactive job | ||
| #bsub -W 2:00 -nnodes 1 -P cli115 -Is /bin/bash | ||
|
|
||
|
|
||
| #cmake -C ~/acme-fork-lb/components/homme/cmake/machineFiles/summit.cmake -DHOMMEXX_MPI_ON_DEVICE=FALSE ~/acme-fork-lb/components/homme/ | ||
|
|
||
| #SET (HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "") | ||
| SET (HOMMEXX_CUDA_MAX_WARP_PER_TEAM "16" CACHE STRING "") | ||
|
|
||
| SET (NETCDF_DIR $ENV{OLCF_NETCDF_FORTRAN_ROOT} CACHE FILEPATH "") | ||
| SET (HDF5_DIR $ENV{OLCF_HDF5_ROOT} CACHE FILEPATH "") | ||
|
|
||
| SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "") | ||
|
|
||
| SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "") | ||
|
|
||
| SET(WITH_PNETCDF FALSE CACHE FILEPATH "") | ||
|
|
||
| SET(USE_QUEUING FALSE CACHE BOOL "") | ||
|
|
||
| SET(ENABLE_CUDA FALSE CACHE BOOL "") | ||
|
|
||
| SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "") | ||
| SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "") | ||
| SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "") | ||
|
|
||
| #SET (HOMMEXX_BFB_TESTING TRUE CACHE BOOL "") | ||
|
|
||
| SET(USE_TRILINOS OFF CACHE BOOL "") | ||
|
|
||
| SET(HIP TRUE CACHE BOOL "") | ||
| #SET(Kokkos_ENABLE_OPENMP OFF CACHE BOOL "") | ||
| #SET(Kokkos_ENABLE_CUDA OFF CACHE BOOL "") | ||
| #SET(Kokkos_ENABLE_CUDA_LAMBDA OFF CACHE BOOL "") | ||
| #SET(Kokkos_ARCH_VEGA908 ON CACHE BOOL "") | ||
| #SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "") | ||
|
|
||
| SET(CMAKE_C_COMPILER "cc" CACHE STRING "") | ||
| SET(CMAKE_Fortran_COMPILER "ftn" CACHE STRING "") | ||
| SET(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "") | ||
| #SET(CMAKE_CXX_COMPILER "/ccs/home/onguba/kokkos/bin/nvcc_wrapper" CACHE STRING "") | ||
| SET(E3SM_KOKKOS_PATH "/ccs/home/onguba/kokkos-example-spock-hipcc2/bld-hipcc" CACHE STRING "") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had removed dependence on |
||
|
|
||
| #not the proper way!!! | ||
| SET(MPICH_DIR "/opt/cray/pe/mpich/8.1.12/ofi/crayclang/10.0" CACHE STRING "") | ||
|
|
||
| SET(Extrae_LIBRARY "-I${MPICH_DIR}/include -L${MPICH_DIR}/lib -lmpi -L/opt/cray/pe/mpich/8.1.12/gtl/lib -lmpi_gtl_hsa" CACHE STRING "") | ||
|
|
||
| SET(ADD_Fortran_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "") | ||
| SET(ADD_C_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "") | ||
| SET(ADD_CXX_FLAGS "-O3 -DNDEBUG --amdgpu-target=gfx908 -fno-gpu-rdc ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "") | ||
| SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${Extrae_LIBRARY} -I${E3SM_KOKKOS_PATH}/include -L${E3SM_KOKKOS_PATH}/lib64" CACHE STRING "") | ||
|
|
||
|
|
||
| set (ENABLE_OPENMP OFF CACHE BOOL "") | ||
| set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "") | ||
| set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "") | ||
|
|
||
| set (HOMME_TESTING_PROFILE "dev" CACHE STRING "") | ||
|
|
||
| set (USE_NUM_PROCS 4 CACHE STRING "") | ||
|
|
||
| #set (OPT_FLAGS "-mcpu=power9 -mtune=power9" CACHE STRING "") | ||
| SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,13 +2,13 @@ | |
| #bsub -W 2:00 -nnodes 1 -P cli115 -Is /bin/bash | ||
|
|
||
|
|
||
| #cmake -C ~/acme-fork-lb/components/homme/cmake/machineFiles/summit.cmake -DHOMMEXX_MPI_ON_DEVICE=FALSE ~/acme-fork-lb/components/homme/ | ||
|
|
||
| #SET (HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "") | ||
| SET (HOMMEXX_CUDA_MAX_WARP_PER_TEAM "16" CACHE STRING "") | ||
|
|
||
| SET (NETCDF_DIR $ENV{OLCF_NETCDF_FORTRAN_ROOT} CACHE FILEPATH "") | ||
| SET (HDF5_DIR $ENV{OLCF_HDF5_ROOT} CACHE FILEPATH "") | ||
| #SET (NETCDF_DIR $ENV{OLCF_NETCDF_FORTRAN_ROOT} CACHE FILEPATH "") | ||
| #SET (NetCDF_Fortran_PATH "/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-7.5.0/netcdf-fortran-4.4.5-e2hkh7w3253wz5uubjxbbvh56a7xjl7n" CACHE STRING "") | ||
| #SET(NetCDF_C_LIBRARY "/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-7.5.0/netcdf-c-4.8.0-pwi4jbrnwv4lrrjxdu5czbos5uvvjgvr/lib" CACHE STRING "") | ||
| #SET(NetCDF_C_INCLUDE_DIR "/sw/summit/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-7.5.0/netcdf-c-4.8.0-pwi4jbrnwv4lrrjxdu5czbos5uvvjgvr/include" CACHE STRING "") | ||
|
|
||
| SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "") | ||
|
|
||
|
|
@@ -22,6 +22,7 @@ SET(ENABLE_CUDA FALSE CACHE BOOL "") | |
|
|
||
| SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "") | ||
| SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "") | ||
| #SET(HOMME_ENABLE_COMPOSE TRUE CACHE BOOL "") | ||
| SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "") | ||
|
|
||
| #SET (HOMMEXX_BFB_TESTING TRUE CACHE BOOL "") | ||
|
|
@@ -36,7 +37,7 @@ SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "") | |
|
|
||
| SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "") | ||
| SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "") | ||
| SET(CMAKE_CXX_COMPILER "/ccs/home/onguba/kokkos/bin/nvcc_wrapper" CACHE STRING "") | ||
| SET(CMAKE_CXX_COMPILER "/ccs/home/onguba/acme-MASTER-GB/externals/kokkos/bin/nvcc_wrapper" CACHE STRING "") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. clean this too |
||
|
|
||
| set (ENABLE_OPENMP OFF CACHE BOOL "") | ||
| set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,10 @@ | |
| # include <cuda.h> | ||
| #endif | ||
|
|
||
| #if HIP_BUILD | ||
| #include <hip/hip_runtime.h> | ||
| #endif | ||
|
|
||
| namespace Homme { | ||
|
|
||
| // Since we're initializing from inside a Fortran code and don't have access to | ||
|
|
@@ -49,6 +53,22 @@ void initialize_kokkos () { | |
| str.back() = 0; | ||
| args.push_back(const_cast<char*>(str.data())); | ||
| #endif | ||
| #if HIP_BUILD | ||
| int nd; | ||
| const auto ret = hipGetDeviceCount(&nd); | ||
| if (ret != hipSuccess) { | ||
| // It isn't a big deal if we can't get the device count. | ||
| nd = 1; | ||
| } | ||
| std::stringstream ss; | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will remove duplicated code here. |
||
| ss << "--kokkos-num-devices=" << nd; | ||
| const auto key = ss.str(); | ||
| std::vector<char> str(key.size()+1); | ||
| std::copy(key.begin(), key.end(), str.begin()); | ||
| str.back() = 0; | ||
| args.push_back(const_cast<char*>(str.data())); | ||
| #endif | ||
|
|
||
|
|
||
| const char* silence = "--kokkos-disable-warnings"; | ||
| args.push_back(const_cast<char*>(silence)); | ||
|
|
@@ -103,6 +123,7 @@ team_num_threads_vectors_for_gpu ( | |
| assert(num_warps_total >= max_num_warps); | ||
| assert(tp.max_threads_usable >= 1 && tp.max_vectors_usable >= 1); | ||
|
|
||
| #if !HIP_BUILD | ||
| int num_warps; | ||
| if (tp.prefer_larger_team) { | ||
| const int num_warps_usable = | ||
|
|
@@ -136,15 +157,27 @@ team_num_threads_vectors_for_gpu ( | |
| const int num_threads = ( (tp.max_threads_usable > num_device_threads) ? | ||
| num_device_threads : | ||
| tp.max_threads_usable ); | ||
|
|
||
| //printf("tp.prefer_threads: %4d %4d \n",num_threads, prevpow2(num_device_threads / num_threads)); | ||
|
|
||
| return std::make_pair( num_threads, | ||
| prevpow2(num_device_threads / num_threads) ); | ||
| } else { | ||
| const int num_vectors = prevpow2( (tp.max_vectors_usable > num_device_threads) ? | ||
| num_device_threads : | ||
| tp.max_vectors_usable ); | ||
|
|
||
| //printf("NOT tp. prefer_threads: %4d %4d \n",num_device_threads / num_vectors, | ||
| // num_vectors); | ||
|
|
||
| return std::make_pair( num_device_threads / num_vectors, | ||
| num_vectors ); | ||
| } | ||
| #else | ||
| //manual override for HIP | ||
| return std::make_pair( 16,4 ); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this line meant to go in?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes. last time i looked i could not figure out what warps, etc. are for miXXX, so, it is not clear what the logic above would produce for the pair. this pair was defined from performance runs. |
||
| #endif | ||
|
|
||
| } | ||
|
|
||
| } // namespace Parallel | ||
|
|
@@ -172,12 +205,22 @@ team_num_threads_vectors (const int num_parallel_iterations, | |
| # else | ||
| const int max_num_warps = HOMMEXX_CUDA_MAX_WARP_PER_TEAM; //Kokkos::Impl::cuda_internal_maximum_grid_count(); | ||
| # endif | ||
|
|
||
| #elif HIP_BUILD | ||
|
|
||
| //use 64 wavefronts per CU and 120 CUs | ||
| const int num_warps_device = 120*64; // no such thing Kokkos::Impl::hip_internal_maximum_warp_count(); | ||
| const int max_num_warps = 40; //cores per CU, SM ///HOMMEXX_CUDA_MAX_WARP_PER_TEAM; | ||
| const int num_threads_warp = Kokkos::Experimental::Impl::HIPTraits::WarpSize; | ||
|
|
||
| #else | ||
|
|
||
| // I want thread-distribution rules to be unit-testable even when Cuda is | ||
| // off. Thus, make up a P100-like machine: | ||
| const int num_warps_device = 1792; | ||
| const int num_threads_warp = 32; | ||
| const int max_num_warps = 16; | ||
|
|
||
| #endif | ||
|
|
||
| return Parallel::team_num_threads_vectors_for_gpu( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.