Skip to content

Commit 6895879

Browse files
authored
Merge pull request #515 from SPolton/fix-cuda-13
Fix CUDA 13.x Compatibility in PBRT-v4
2 parents 49c38c2 + 1231d35 commit 6895879

4 files changed

Lines changed: 30 additions & 2 deletions

File tree

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@
44
src/build
55
.DS_Store
66
.ipynb_checkpoints/
7-
build/
7+
*build*/
88
.cache/

src/pbrt/gpu/memory.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,16 @@ void CUDATrackedMemoryResource::PrefetchToGPU() const {
6161
LOG_VERBOSE("Prefetching %d allocations to GPU memory", allocations.size());
6262
size_t bytes = 0;
6363
for (auto iter : allocations) {
64+
#if CUDART_VERSION >= 13000
65+
cudaMemLocation location = {};
66+
location.type = cudaMemLocationTypeDevice;
67+
location.id = deviceIndex;
68+
CUDA_CHECK(
69+
cudaMemPrefetchAsync(iter.first, iter.second, location, 0 /* stream */));
70+
#else
6471
CUDA_CHECK(
6572
cudaMemPrefetchAsync(iter.first, iter.second, deviceIndex, 0 /* stream */));
73+
#endif
6674
bytes += iter.second;
6775
}
6876
CUDA_CHECK(cudaDeviceSynchronize());

src/pbrt/gpu/util.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,19 @@ void GPUInit() {
4848
CUDA_CHECK(cudaGetDeviceProperties(&deviceProperties, i));
4949
CHECK(deviceProperties.canMapHostMemory);
5050

51+
#if CUDART_VERSION >= 13000
52+
int clockRateKHz = 0;
53+
cudaDeviceGetAttribute(&clockRateKHz, cudaDevAttrClockRate, i);
54+
float clockRate = clockRateKHz;
55+
#else
56+
float clockRate = deviceProperties.clockRate;
57+
#endif
58+
5159
std::string deviceString = StringPrintf(
5260
"CUDA device %d (%s) with %f MiB, %d SMs running at %f MHz "
5361
"with shader model %d.%d",
5462
i, deviceProperties.name, deviceProperties.totalGlobalMem / (1024. * 1024.),
55-
deviceProperties.multiProcessorCount, deviceProperties.clockRate / 1000.,
63+
deviceProperties.multiProcessorCount, clockRate / 1000.,
5664
deviceProperties.major, deviceProperties.minor);
5765
LOG_VERBOSE("%s", deviceString);
5866
devices += deviceString + "\n";

src/pbrt/wavefront/integrator.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,10 +618,22 @@ void WavefrontPathIntegrator::PrefetchGPUAllocations() {
618618
// performance. (This makes it possible to use the values of things
619619
// like WavefrontPathIntegrator::haveSubsurface to conditionally launch
620620
// kernels according to what's in the scene...)
621+
#if CUDART_VERSION >= 13000
622+
cudaMemLocation location = {};
623+
location.type = cudaMemLocationTypeDevice;
624+
location.id = 0; // For ReadMostly: device ID is ignored
625+
626+
CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
627+
location));
628+
location.id = deviceIndex;
629+
CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetPreferredLocation,
630+
location));
631+
#else
621632
CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
622633
/* ignored argument */ 0));
623634
CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetPreferredLocation,
624635
deviceIndex));
636+
#endif
625637

626638
// Copy all of the scene data structures over to GPU memory. This
627639
// ensures that there isn't a big performance hitch for the first batch

0 commit comments

Comments
 (0)