Merge pull request #515 from SPolton/fix-cuda-13

mmp · web-flow · commit 689587986d4d · 2025-10-22T12:22:44.000-07:00
Fix CUDA 13.x Compatibility in PBRT-v4
diff --git a/.gitignore b/.gitignore
@@ -4,5 +4,5 @@
 src/build
 .DS_Store
 .ipynb_checkpoints/
-build/
+*build*/
 .cache/
diff --git a/src/pbrt/gpu/memory.cpp b/src/pbrt/gpu/memory.cpp
@@ -61,8 +61,16 @@ void CUDATrackedMemoryResource::PrefetchToGPU() const {
     LOG_VERBOSE("Prefetching %d allocations to GPU memory", allocations.size());
     size_t bytes = 0;
     for (auto iter : allocations) {
+    #if CUDART_VERSION >= 13000
+        cudaMemLocation location = {};
+        location.type = cudaMemLocationTypeDevice;
+        location.id = deviceIndex;
+        CUDA_CHECK(
+            cudaMemPrefetchAsync(iter.first, iter.second, location, 0 /* stream */));
+    #else
         CUDA_CHECK(
             cudaMemPrefetchAsync(iter.first, iter.second, deviceIndex, 0 /* stream */));
+    #endif
         bytes += iter.second;
     }
     CUDA_CHECK(cudaDeviceSynchronize());
diff --git a/src/pbrt/gpu/util.cpp b/src/pbrt/gpu/util.cpp
@@ -48,11 +48,19 @@ void GPUInit() {
         CUDA_CHECK(cudaGetDeviceProperties(&deviceProperties, i));
         CHECK(deviceProperties.canMapHostMemory);
 
+    #if CUDART_VERSION >= 13000
+        int clockRateKHz = 0;
+        cudaDeviceGetAttribute(&clockRateKHz, cudaDevAttrClockRate, i);
+        float clockRate = clockRateKHz;
+    #else
+        float clockRate = deviceProperties.clockRate;
+    #endif
+
         std::string deviceString = StringPrintf(
             "CUDA device %d (%s) with %f MiB, %d SMs running at %f MHz "
             "with shader model %d.%d",
             i, deviceProperties.name, deviceProperties.totalGlobalMem / (1024. * 1024.),
-            deviceProperties.multiProcessorCount, deviceProperties.clockRate / 1000.,
+            deviceProperties.multiProcessorCount, clockRate / 1000.,
             deviceProperties.major, deviceProperties.minor);
         LOG_VERBOSE("%s", deviceString);
         devices += deviceString + "\n";
diff --git a/src/pbrt/wavefront/integrator.cpp b/src/pbrt/wavefront/integrator.cpp
@@ -618,10 +618,22 @@ void WavefrontPathIntegrator::PrefetchGPUAllocations() {
         // performance. (This makes it possible to use the values of things
         // like WavefrontPathIntegrator::haveSubsurface to conditionally launch
         // kernels according to what's in the scene...)
+    #if CUDART_VERSION >= 13000
+        cudaMemLocation location = {};
+        location.type = cudaMemLocationTypeDevice;
+        location.id = 0; // For ReadMostly: device ID is ignored
+
+        CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
+                                 location));
+        location.id = deviceIndex;
+        CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetPreferredLocation,
+                                 location));
+    #else
         CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetReadMostly,
                                  /* ignored argument */ 0));
         CUDA_CHECK(cudaMemAdvise(this, sizeof(*this), cudaMemAdviseSetPreferredLocation,
                                  deviceIndex));
+    #endif
 
         // Copy all of the scene data structures over to GPU memory.  This
         // ensures that there isn't a big performance hitch for the first batch