99#include " ../geometry/vector.hpp"
1010
1111namespace kiwigl {
12- __device__ __host__ void cudaRotate (
13- Vector3D* vertex, double roll, double pitch, double yaw) {
12+ __device__ __host__ void cudaRotate (Vector3D* vertex, double roll, double pitch, double yaw) {
1413 // Roll (rotation around X-axis)
1514 double cosR = cos (roll);
1615 double sinR = sin (roll);
@@ -36,8 +35,7 @@ __device__ __host__ void cudaRotate(
3635 vertex->y = y * cosY + x * sinY;
3736}
3837
39- __device__ __host__ void cudaTranslate3D (
40- Vector3D* vertex, double x, double y, double z) {
38+ __device__ __host__ void cudaTranslate3D (Vector3D* vertex, double x, double y, double z) {
4139 vertex->x += x;
4240 vertex->y += y;
4341 vertex->z += z;
@@ -48,22 +46,19 @@ __device__ __host__ void cudaTranslate2D(Vector2D* vertex, double x, double y) {
4846 vertex->y += y;
4947}
5048
51- __device__ __host__ void cudaScale (
52- Vector3D* vertex, double x, double y, double z) {
49+ __device__ __host__ void cudaScale (Vector3D* vertex, double x, double y, double z) {
5350 vertex->x *= x;
5451 vertex->y *= y;
5552 vertex->z *= z;
5653}
5754
58- __device__ __host__ void cudaProject (
59- const Vector3D* vertex, Vector2D* projectedVertex) {
55+ __device__ __host__ void cudaProject (const Vector3D* vertex, Vector2D* projectedVertex) {
6056 projectedVertex->x = (vertex->x * FOV) / vertex->z ;
6157 projectedVertex->y = (vertex->y * FOV) / vertex->z ;
6258}
6359
64- __global__ void transformVerticesKernel (Face* faces, Vector3D* vertices,
65- Triangle* projectedTriangles, Vector3D rotation, Vector3D camera, int width,
66- int height, int numFaces) {
60+ __global__ void transformVerticesKernel (Face* faces, Vector3D* vertices, Triangle* projectedTriangles,
61+ Vector3D rotation, Vector3D camera, int width, int height, int numFaces) {
6762 int idx = blockIdx .x * blockDim .x + threadIdx .x ;
6863 if (idx >= numFaces) return ;
6964
@@ -92,28 +87,23 @@ __global__ void transformVerticesKernel(Face* faces, Vector3D* vertices,
9287
9388void Display::InitalizeCuda () {
9489 // Allocate memory on the device
95- cudaError_t err =
96- cudaMalloc ((void **)&d_faces, mesh.faces .size () * sizeof (Face));
90+ cudaError_t err = cudaMalloc ((void **)&d_faces, mesh.faces .size () * sizeof (Face));
9791 if (err != cudaSuccess) {
98- fprintf (stderr, " %s in %s at line %d\n " , cudaGetErrorString (err), __FILE__,
99- __LINE__);
92+ fprintf (stderr, " %s in %s at line %d\n " , cudaGetErrorString (err), __FILE__, __LINE__);
10093 exit (EXIT_FAILURE);
10194 }
102- err =
103- cudaMalloc ((void **)&d_vertices, mesh.vertices .size () * sizeof (Vector3D));
95+ err = cudaMalloc ((void **)&d_vertices, mesh.vertices .size () * sizeof (Vector3D));
10496 if (err != cudaSuccess) {
105- fprintf (stderr, " %s in %s at line %d\n " , cudaGetErrorString (err), __FILE__,
106- __LINE__);
97+ fprintf (stderr, " %s in %s at line %d\n " , cudaGetErrorString (err), __FILE__, __LINE__);
10798 exit (EXIT_FAILURE);
10899 }
109- err = cudaMalloc (
110- (void **)&d_projectedTriangles, mesh.faces .size () * sizeof (Triangle));
100+ err = cudaMalloc ((void **)&d_projectedTriangles, mesh.faces .size () * sizeof (Triangle));
111101 if (err != cudaSuccess) {
112- fprintf (stderr, " %s in %s at line %d\n " , cudaGetErrorString (err), __FILE__,
113- __LINE__);
102+ fprintf (stderr, " %s in %s at line %d\n " , cudaGetErrorString (err), __FILE__, __LINE__);
114103 exit (EXIT_FAILURE);
115104 }
116105}
106+
117107void Display::FreeCuda () {
118108 if (d_faces != nullptr ) {
119109 cudaFree (d_faces);
@@ -128,34 +118,30 @@ void Display::FreeCuda() {
128118 d_projectedTriangles = nullptr ;
129119 }
130120}
121+
131122void Display::LaunchCuda (int width, int height) {
132123 // Copy faces to device
133- cudaMemcpy (d_faces, mesh.faces .data (), mesh.faces .size () * sizeof (Face),
134- cudaMemcpyHostToDevice);
135- cudaMemcpy (d_vertices, mesh.vertices .data (),
136- mesh.vertices .size () * sizeof (Vector3D), cudaMemcpyHostToDevice);
124+ cudaMemcpy (d_faces, mesh.faces .data (), mesh.faces .size () * sizeof (Face), cudaMemcpyHostToDevice);
125+ cudaMemcpy (d_vertices, mesh.vertices .data (), mesh.vertices .size () * sizeof (Vector3D), cudaMemcpyHostToDevice);
137126
138127 // Launch kernel
139128 dim3 threadsPerBlock (256 , 1 , 1 );
140- dim3 blocksPerGrid (
141- (mesh.faces .size () + threadsPerBlock.x - 1 ) / threadsPerBlock.x , 1 , 1 );
142- transformVerticesKernel<<<blocksPerGrid, threadsPerBlock>>> (d_faces,
143- d_vertices, d_projectedTriangles, rotation, camera, width, height,
144- mesh.faces .size ());
129+ dim3 blocksPerGrid ((mesh.faces .size () + threadsPerBlock.x - 1 ) / threadsPerBlock.x , 1 , 1 );
130+ transformVerticesKernel<<<blocksPerGrid, threadsPerBlock>>> (d_faces, d_vertices, d_projectedTriangles, rotation,
131+ camera, width, height, mesh.faces .size ());
145132
146133 // Synchronize to ensure all operations are complete
147134 cudaDeviceSynchronize ();
148135
149136 // Copy projected triangles back to host
150- cudaMemcpy (projectedTriangles.data (), d_projectedTriangles,
151- mesh. faces . size () * sizeof (Triangle), cudaMemcpyDeviceToHost);
137+ cudaMemcpy (projectedTriangles.data (), d_projectedTriangles, mesh. faces . size () * sizeof (Triangle),
138+ cudaMemcpyDeviceToHost);
152139
153140 // Check for CUDA errors
154141 cudaError_t cudaStatus = cudaGetLastError ();
155142 if (cudaStatus != cudaSuccess) {
156- fprintf (stderr, " %s in %s at line %d\n " , cudaGetErrorString (cudaStatus),
157- __FILE__, __LINE__);
143+ fprintf (stderr, " %s in %s at line %d\n " , cudaGetErrorString (cudaStatus), __FILE__, __LINE__);
158144 exit (EXIT_FAILURE);
159145 }
160146}
161- } // namespace kiwigl
147+ } // namespace kiwigl
0 commit comments