@@ -540,11 +540,9 @@ if(NOT INTERN_BUILD_MOBILE AND NOT BUILD_LITE_INTERPRETER)
540540 ${TORCH_SRC_DIR} /csrc/utils/byte_order.cpp
541541 )
542542
543- if (USE_DISTRIBUTED)
544- append_filelist ("libtorch_distributed_base_sources" TORCH_SRCS )
545- if (NOT WIN32 )
546- append_filelist ("libtorch_distributed_extra_sources" TORCH_SRCS )
547- endif ()
543+ append_filelist ("libtorch_distributed_base_sources" TORCH_SRCS )
544+ if (NOT WIN32 )
545+ append_filelist ("libtorch_distributed_extra_sources" TORCH_SRCS )
548546 endif ()
549547endif ()
550548
@@ -573,32 +571,30 @@ if(USE_CUDA)
573571 list (APPEND Caffe2_GPU_SRCS
574572 ${TORCH_SRC_DIR} /csrc/cuda/nccl.cpp)
575573 endif ()
576- if (USE_DISTRIBUTED)
577- append_filelist ("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS )
578- if (NOT WIN32 )
579- append_filelist ("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS )
580- set_source_files_properties (
581- ${TORCH_SRC_DIR} /csrc/distributed/c10d/ProcessGroupNCCL.cpp
582- ${TORCH_SRC_DIR} /csrc/distributed/c10d/cuda/utils.cpp
583- ${TORCH_SRC_DIR} /csrc/distributed/c10d/intra_node_comm.cpp
584- ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/CudaDMAConnectivity.cpp
585- ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu
586- ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu
587- ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp
588- ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu
589- ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp
590- PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1"
591- )
592- endif ()
574+ append_filelist ("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS )
575+ if (NOT WIN32 )
576+ append_filelist ("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS )
577+ set_source_files_properties (
578+ ${TORCH_SRC_DIR} /csrc/distributed/c10d/ProcessGroupNCCL.cpp
579+ ${TORCH_SRC_DIR} /csrc/distributed/c10d/cuda/utils.cpp
580+ ${TORCH_SRC_DIR} /csrc/distributed/c10d/intra_node_comm.cpp
581+ ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/CudaDMAConnectivity.cpp
582+ ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu
583+ ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu
584+ ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp
585+ ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu
586+ ${TORCH_SRC_DIR} /csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp
587+ PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1"
588+ )
589+ endif ()
593590
594- set (ASYNC_MM_FILE "${TORCH_SRC_DIR} /csrc/distributed/c10d/cuda/AsyncMM.cu" )
595- # Disable the warning to make cutlass warp-specialized cooperative kernel build for gcc-9
596- if (CMAKE_COMPILER_IS_GNUCXX)
597- set_source_files_properties (${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-Wno-unused-but-set-variable" )
598- endif ()
599- if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND CUDA_NVCC_FLAGS MATCHES ".*compute_90.*" )
600- set_source_files_properties (${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a" )
601- endif ()
591+ set (ASYNC_MM_FILE "${TORCH_SRC_DIR} /csrc/distributed/c10d/cuda/AsyncMM.cu" )
592+ # Disable the warning to make cutlass warp-specialized cooperative kernel build for gcc-9
593+ if (CMAKE_COMPILER_IS_GNUCXX)
594+ set_source_files_properties (${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-Wno-unused-but-set-variable" )
595+ endif ()
596+ if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND CUDA_NVCC_FLAGS MATCHES ".*compute_90.*" )
597+ set_source_files_properties (${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a" )
602598 endif ()
603599 set_source_files_properties (
604600 ${TORCH_ROOT} /aten/src/ATen/cuda/detail/LazyNVRTC.cpp
@@ -631,11 +627,9 @@ if(USE_ROCM)
631627 list (APPEND Caffe2_HIP_SRCS
632628 ${TORCH_SRC_DIR} /csrc/cuda/nccl.cpp)
633629 endif ()
634- if (USE_DISTRIBUTED)
635- append_filelist ("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS )
636- if (NOT WIN32 )
637- append_filelist ("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS )
638- endif ()
630+ append_filelist ("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS )
631+ if (NOT WIN32 )
632+ append_filelist ("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS )
639633 endif ()
640634 # caffe2_nvrtc's stubs to driver APIs are useful for HIP.
641635 # See NOTE [ ATen NVRTC Stub and HIP ]
@@ -1356,12 +1350,10 @@ if(BUILD_TEST)
13561350 add_subdirectory (${TORCH_ROOT} /test/cpp/jit ${CMAKE_BINARY_DIR} /test_jit )
13571351 add_subdirectory (${TORCH_ROOT} /test/cpp/nativert ${CMAKE_BINARY_DIR} /test_nativert )
13581352 add_subdirectory (${TORCH_ROOT} /test/inductor ${CMAKE_BINARY_DIR} /test_inductor )
1359- if (USE_DISTRIBUTED)
1360- add_subdirectory (${TORCH_ROOT} /test/cpp/c10d ${CMAKE_BINARY_DIR} /test_cpp_c10d )
1361- if (NOT WIN32 )
1362- add_subdirectory (${TORCH_ROOT} /test/cpp/dist_autograd ${CMAKE_BINARY_DIR} /dist_autograd )
1363- add_subdirectory (${TORCH_ROOT} /test/cpp/rpc ${CMAKE_BINARY_DIR} /test_cpp_rpc )
1364- endif ()
1353+ add_subdirectory (${TORCH_ROOT} /test/cpp/c10d ${CMAKE_BINARY_DIR} /test_cpp_c10d )
1354+ if (NOT WIN32 )
1355+ add_subdirectory (${TORCH_ROOT} /test/cpp/dist_autograd ${CMAKE_BINARY_DIR} /dist_autograd )
1356+ add_subdirectory (${TORCH_ROOT} /test/cpp/rpc ${CMAKE_BINARY_DIR} /test_cpp_rpc )
13651357 endif ()
13661358 if (NOT NO_API)
13671359 add_subdirectory (${TORCH_ROOT} /test/cpp/api ${CMAKE_BINARY_DIR} /test_api )
@@ -1466,47 +1458,41 @@ if(BUILD_LITE_INTERPRETER)
14661458 endif ()
14671459endif ()
14681460
1469-
1470- # Pass USE_DISTRIBUTED to torch_cpu, as some codes in jit/pickler.cpp and
1471- # jit/unpickler.cpp need to be compiled only when USE_DISTRIBUTED is set
1472- if (USE_DISTRIBUTED)
1473- target_compile_definitions (torch_cpu PUBLIC USE_DISTRIBUTED )
1474- if (USE_GLOO AND USE_C10D_GLOO)
1475- target_compile_definitions (torch_cpu PUBLIC USE_C10D_GLOO )
1476- endif ()
1477- if (USE_UCC AND USE_C10D_UCC)
1478- target_compile_definitions (torch_cpu PUBLIC USE_C10D_UCC )
1479- if (USE_CUDA)
1480- target_compile_definitions (torch_cuda PUBLIC USE_C10D_UCC )
1481- endif ()
1482- endif ()
1483- if (USE_NCCL AND USE_C10D_NCCL)
1484- if (USE_ROCM)
1485- target_compile_definitions (torch_hip PUBLIC USE_C10D_NCCL )
1486- else ()
1487- target_compile_definitions (torch_cuda PUBLIC USE_C10D_NCCL )
1488- endif ()
1489- endif ()
1490- if (USE_MPI AND USE_C10D_MPI)
1491- if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
1492- set_source_files_properties (
1493- "${TORCH_SRC_DIR} /csrc/distributed/c10d/ProcessGroupMPI.cpp"
1494- PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations )
1495- endif ()
1496- target_compile_definitions (torch_cpu PUBLIC USE_C10D_MPI )
1497- endif ()
1498- # Pass USE_RPC in order to reduce use of
1499- # #if defined(USE_DISTRIBUTED) && !defined(_WIN32)
1500- # need to be removed when RPC is supported
1501- if (NOT WIN32 )
1502- target_compile_definitions (torch_cpu PUBLIC USE_RPC )
1461+ if (USE_GLOO AND USE_C10D_GLOO)
1462+ target_compile_definitions (torch_cpu PUBLIC USE_C10D_GLOO )
1463+ endif ()
1464+ if (USE_UCC AND USE_C10D_UCC)
1465+ target_compile_definitions (torch_cpu PUBLIC USE_C10D_UCC )
1466+ if (USE_CUDA)
1467+ target_compile_definitions (torch_cuda PUBLIC USE_C10D_UCC )
15031468 endif ()
1504- # Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp
1505- # can only be compiled with USE_TENSORPIPE is set.
1506- if (USE_TENSORPIPE)
1507- target_compile_definitions (torch_cpu PUBLIC USE_TENSORPIPE )
1469+ endif ()
1470+ if (USE_NCCL AND USE_C10D_NCCL)
1471+ if (USE_ROCM)
1472+ target_compile_definitions (torch_hip PUBLIC USE_C10D_NCCL )
1473+ else ()
1474+ target_compile_definitions (torch_cuda PUBLIC USE_C10D_NCCL )
15081475 endif ()
15091476endif ()
1477+ if (USE_MPI AND USE_C10D_MPI)
1478+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU" )
1479+ set_source_files_properties (
1480+ "${TORCH_SRC_DIR} /csrc/distributed/c10d/ProcessGroupMPI.cpp"
1481+ PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations )
1482+ endif ()
1483+ target_compile_definitions (torch_cpu PUBLIC USE_C10D_MPI )
1484+ endif ()
1485+ # Pass USE_RPC in order to reduce use of
1486+ # #if defined(USE_DISTRIBUTED) && !defined(_WIN32)
1487+ # need to be removed when RPC is supported
1488+ if (NOT WIN32 )
1489+ target_compile_definitions (torch_cpu PUBLIC USE_RPC )
1490+ endif ()
1491+ # Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp
1492+ # can only be compiled with USE_TENSORPIPE is set.
1493+ if (USE_TENSORPIPE)
1494+ target_compile_definitions (torch_cpu PUBLIC USE_TENSORPIPE )
1495+ endif ()
15101496
15111497if (NOT INTERN_BUILD_MOBILE)
15121498 if (${CAFFE2_LINK_LOCAL_PROTOBUF} )
0 commit comments