Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ toolchain = {'name': 'GCC', 'version': '10.3.0'}

source_urls = ['https://github.com/NVIDIA/cuda-samples/archive/']
sources = ['v%(version)s.tar.gz']
checksums = ['2bee5f7c89347259aaab75aa6df6e10375059bdbbaf04cc7936f5db7d54fa3ac']
patches = ['cuda-samples-11.3_multiple-sms.patch']
checksums = [
{'v11.3.tar.gz': '2bee5f7c89347259aaab75aa6df6e10375059bdbbaf04cc7936f5db7d54fa3ac'},
{'cuda-samples-11.3_multiple-sms.patch': 'b31613f4160456f0d0abf82999c7fb7eee781f0efadc8b9bbb5a02ef0f37e21d'},
]

dependencies = [
('CUDA', '11.3.1', '', SYSTEM),
Expand All @@ -32,7 +36,7 @@ local_filters += "Samples/simpleVulkan/Makefile "
local_filters += "Samples/simpleVulkanMMAP/Makefile "
local_filters += "Samples/streamOrderedAllocationIPC/Makefile "
local_filters += "Samples/vulkanImageCUDA/Makefile"
buildopts = "HOST_COMPILER=g++ FILTER_OUT='%s'" % local_filters
buildopts = "HOST_COMPILER=g++ SMS='%%(cuda_cc_space_sep_no_period)s' FILTER_OUT='%s'" % local_filters

files_to_copy = [
(['bin/%s/linux/release/*' % ARCH], 'bin'),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ toolchain = {'name': 'GCC', 'version': '11.3.0'}

source_urls = ['https://github.com/NVIDIA/cuda-samples/archive/']
sources = ['v%(version)s.tar.gz']
checksums = ['75b858bcf9e534eaa0f129c418e661b83872d743de218df8a5278cc429f9ea98']
patches = ['cuda-samples-11.6_multiple-sms.patch']
checksums = [
{'v11.6.tar.gz': '75b858bcf9e534eaa0f129c418e661b83872d743de218df8a5278cc429f9ea98'},
{'cuda-samples-11.6_multiple-sms.patch': '8849e4882d797d155d6ebb71377fa1409205361776ade8da699452a4ecb94a0a'},
]

dependencies = [
('CUDA', '11.7.0', '', SYSTEM),
Expand All @@ -33,7 +37,7 @@ local_filters += "Samples/simpleVulkanMMAP/Makefile "
local_filters += "Samples/streamOrderedAllocationIPC/Makefile "
local_filters += "Samples/vulkanImageCUDA/Makefile"

buildopts = "HOST_COMPILER=g++ FILTER_OUT='%s'" % local_filters
buildopts = "HOST_COMPILER=g++ SMS='%%(cuda_cc_space_sep_no_period)s' FILTER_OUT='%s'" % local_filters

files_to_copy = [
(['bin/%s/linux/release/*' % ARCH], 'bin'),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
easyblock = 'MakeCp'

name = 'CUDA-Samples'
version = '11.8'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://github.com/NVIDIA/cuda-samples'
description = "Samples for CUDA Developers which demonstrates features in CUDA Toolkit"

toolchain = {'name': 'GCC', 'version': '11.3.0'}

source_urls = ['https://github.com/NVIDIA/cuda-samples/archive/']
sources = ['v%(version)s.tar.gz']
patches = ['cuda-samples-11.6_multiple-sms.patch']
checksums = [
{'v11.8.tar.gz': '1bc02c0ca42a323f3c7a05b5682eae703681a91e95b135bfe81f848b2d6a2c51'},
{'cuda-samples-11.6_multiple-sms.patch': '8849e4882d797d155d6ebb71377fa1409205361776ade8da699452a4ecb94a0a'},
]

dependencies = [
('CUDA', '11.7.0', '', SYSTEM),
]

# Get rid of pre-built Windows DLLs and only build deviceQuery for now.
prebuildopts = "rm -r bin/win64 && "

# Filter out samples that require extensive dependencies.
local_filters = "Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/Makefile "
local_filters += "Samples/4_CUDA_Libraries/boxFilterNPP/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cudaNvSci/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleGL/Makefile "
local_filters += "Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleVulkan/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile "
local_filters += "Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile "
local_filters += "Samples/5_Domain_Specific/vulkanImageCUDA/Makefile "
local_filters += "Samples/6_Performance/LargeKernelParameter/Makefile "

buildopts = "HOST_COMPILER=g++ SMS='%%(cuda_cc_space_sep_no_period)s' FILTER_OUT='%s'" % local_filters

files_to_copy = [
(['bin/%s/linux/release/*' % ARCH], 'bin'),
'LICENSE',
]

local_binaries = ['deviceQuery', 'matrixMul', 'bandwidthTest', 'cudaOpenMP']

# Only paths are used for sanity checks.
# Commands may fail due to missing compatibility libraries that might be needed
# to be able to use this specific CUDA version in combination with the available
# NVIDIA drivers.
sanity_check_paths = {
'files': ['bin/%s' % x for x in local_binaries],
'dirs': [],
}

moduleclass = 'lang'
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ toolchain = {'name': 'GCC', 'version': '12.3.0'}

source_urls = ['https://github.com/NVIDIA/cuda-samples/archive/']
sources = ['v%(version)s.tar.gz']
checksums = ['f758160645b366d79c2638d8dfd389f01029b8d179ab0c11726b9ef58aecebd9']
patches = ['cuda-samples-11.6_multiple-sms.patch']
checksums = [
{'v12.1.tar.gz': 'f758160645b366d79c2638d8dfd389f01029b8d179ab0c11726b9ef58aecebd9'},
{'cuda-samples-11.6_multiple-sms.patch': '8849e4882d797d155d6ebb71377fa1409205361776ade8da699452a4ecb94a0a'},
]

dependencies = [
('CUDA', '12.1.1', '', SYSTEM),
Expand Down Expand Up @@ -58,7 +62,7 @@ if ARCH == 'aarch64':
local_filters += "Samples/3_CUDA_Features/cdpQuadtree/Makefile "
local_filters += "Samples/3_CUDA_Features/cdpAdvancedQuicksort/Makefile "

buildopts = "HOST_COMPILER=g++ FILTER_OUT='%s'" % local_filters
buildopts = "HOST_COMPILER=g++ SMS='%%(cuda_cc_space_sep_no_period)s' FILTER_OUT='%s'" % local_filters

# Remove libraries in the bin dir after a successful 'make'
buildopts += " && rm bin/*/linux/release/lib*.so.*"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
easyblock = 'MakeCp'

name = 'CUDA-Samples'
version = '12.2'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://github.com/NVIDIA/cuda-samples'
description = "Samples for CUDA Developers which demonstrates features in CUDA Toolkit"

toolchain = {'name': 'GCC', 'version': '11.3.0'}

source_urls = ['https://github.com/NVIDIA/cuda-samples/archive/']
sources = ['v%(version)s.tar.gz']
patches = ['cuda-samples-11.6_multiple-sms.patch']
checksums = [
{'v12.2.tar.gz': '1823cfe28e97a9230107aa72b231f78952c0f178b71a920f036d360518480bdc'},
{'cuda-samples-11.6_multiple-sms.patch': '8849e4882d797d155d6ebb71377fa1409205361776ade8da699452a4ecb94a0a'},
]

builddependencies = [
('CMake', '3.24.3'),
]

dependencies = [
('CUDA', '12.2.0', '', SYSTEM),
]

# Get rid of pre-built Windows DLLs and only build deviceQuery for now.
prebuildopts = "rm -r bin/win64 && "

# Filter out samples that require extensive dependencies.
local_filters = "Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/Makefile "
local_filters += "Samples/4_CUDA_Libraries/boxFilterNPP/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cudaNvSci/Makefile "
local_filters += "Samples/4_CUDA_Libraries/cudaNvSciNvMedia/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleGL/Makefile "
local_filters += "Samples/3_CUDA_Features/warpAggregatedAtomicsCG/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleVulkan/Makefile "
local_filters += "Samples/5_Domain_Specific/simpleVulkanMMAP/Makefile "
local_filters += "Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/Makefile "
local_filters += "Samples/5_Domain_Specific/vulkanImageCUDA/Makefile "
local_filters += "Samples/6_Performance/LargeKernelParameter/Makefile "

buildopts = "HOST_COMPILER=g++ SMS='%%(cuda_cc_space_sep_no_period)s' FILTER_OUT='%s'" % local_filters

files_to_copy = [
(['bin/%s/linux/release/*' % ARCH], 'bin'),
'LICENSE',
]

local_binaries = ['deviceQuery', 'matrixMul', 'bandwidthTest', 'cudaOpenMP']

# Only paths are used for sanity checks.
# Commands may fail due to missing compatibility libraries that might be needed
# to be able to use this specific CUDA version in combination with the available
# NVIDIA drivers.
sanity_check_paths = {
'files': ['bin/%s' % x for x in local_binaries],
'dirs': [],
}

moduleclass = 'lang'
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Fixes "nvcc fatal: Option '--ptx (-ptx)' is not allowed when compiling for multiple GPU architectures"
# fatal compilation issue when building for multiple SM architectures
# More info, see https://github.com/NVIDIA/cuda-samples/issues/289

# Author: Caspar van Leeuwen

diff -Nru cuda-samples-11.3.orig/Samples/memMapIPCDrv/Makefile cuda-samples-11.3/Samples/memMapIPCDrv/Makefile
--- cuda-samples-11.3.orig/Samples/memMapIPCDrv/Makefile 2024-07-29 13:17:10.330743000 +0200
+++ cuda-samples-11.3/Samples/memMapIPCDrv/Makefile 2024-07-29 13:19:13.158507504 +0200
@@ -321,6 +321,12 @@
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
+
+# Generate the explicit PTX file for the lowest SM architecture in $(SMS), so it works on all SMS listed there
+LOWEST_SM := $(firstword $(sort $(SMS)))
+ifneq ($(LOWEST_SM),)
+GENCODE_FLAGS_LOWEST_SM += -gencode arch=compute_$(LOWEST_SM),code=compute_$(LOWEST_SM)
+endif
endif

ifeq ($(TARGET_OS),darwin)
@@ -401,7 +407,7 @@
endif

$(PTX_FILE): memMapIpc_kernel.cu
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -ptx $<
+ $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS_LOWEST_SM) -o $@ -ptx $<
$(EXEC) mkdir -p data
$(EXEC) cp -f $@ ./data
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Fixes "nvcc fatal: Option '--ptx (-ptx)' is not allowed when compiling for multiple GPU architectures"
# fatal compilation issue when building for multiple SM architectures
# More info, see https://github.com/NVIDIA/cuda-samples/issues/289

# Author: Caspar van Leeuwen

diff -Nru cuda-samples-12.2.orig/Samples/3_CUDA_Features/memMapIPCDrv/Makefile cuda-samples-12.2/Samples/3_CUDA_Features/memMapIPCDrv/Makefile
--- cuda-samples-12.2.orig/Samples/3_CUDA_Features/memMapIPCDrv/Makefile 2024-07-29 12:14:28.538848000 +0200
+++ cuda-samples-12.2/Samples/3_CUDA_Features/memMapIPCDrv/Makefile 2024-07-29 13:02:45.134261829 +0200
@@ -313,6 +313,12 @@
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
+
+# Generate the explicit PTX file for the lowest SM architecture in $(SMS), so it works on all SMS listed there
+LOWEST_SM := $(firstword $(sort $(SMS)))
+ifneq ($(LOWEST_SM),)
+GENCODE_FLAGS_LOWEST_SM += -gencode arch=compute_$(LOWEST_SM),code=compute_$(LOWEST_SM)
+endif
endif

ifeq ($(TARGET_OS),darwin)
@@ -394,7 +400,7 @@
endif

$(PTX_FILE): memMapIpc_kernel.cu
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -ptx $<
+ $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS_LOWEST_SM) -o $@ -ptx $<
$(EXEC) mkdir -p data
$(EXEC) cp -f $@ ./data
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
diff -Nru cuda-samples-12.2.orig/Samples/3_CUDA_Features/ptxjit/Makefile cuda-samples-12.2/Samples/3_CUDA_Features/ptxjit/Makefile
--- cuda-samples-12.2.orig/Samples/3_CUDA_Features/ptxjit/Makefile 2024-07-29 12:14:28.546771000 +0200
+++ cuda-samples-12.2/Samples/3_CUDA_Features/ptxjit/Makefile 2024-07-29 13:02:38.741961008 +0200
@@ -307,6 +307,12 @@
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
+
+# Generate the explicit PTX file for the lowest SM architecture in $(SMS), so it works on all SMS listed there
+LOWEST_SM := $(firstword $(sort $(SMS)))
+ifneq ($(LOWEST_SM),)
+GENCODE_FLAGS_LOWEST_SM += -gencode arch=compute_$(LOWEST_SM),code=compute_$(LOWEST_SM)
+endif
endif

ifeq ($(TARGET_OS),darwin)
@@ -390,7 +396,7 @@
endif

$(PTX_FILE): ptxjit_kernel.cu
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -ptx $<
+ $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS_LOWEST_SM) -o $@ -ptx $<
$(EXEC) mkdir -p data
$(EXEC) cp -f $@ ./data
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)