Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ jobs:
DISTRO: [
{ "tag": "jammy_gtsam4.3a0", "base_image": "ubuntu:jammy", "suffix": "gcc" },
{ "tag": "jammy_gtsam4.3a0", "base_image": "ubuntu:jammy", "suffix": "llvm" },
{ "tag": "jammy_cuda12.2_gtsam4.3a0", "base_image": "nvidia/cuda:12.2.2-devel-ubuntu22.04", "suffix": "gcc.cuda" },
{ "tag": "jammy_cuda12.5_gtsam4.3a0", "base_image": "nvidia/cuda:12.5.1-devel-ubuntu22.04", "suffix": "gcc.cuda" },
{ "tag": "jammy_cuda13.1_gtsam4.3a0", "base_image": "nvidia/cuda:13.1.0-devel-ubuntu22.04", "suffix": "gcc.cuda" },
{ "tag": "noble_gtsam4.3a0", "base_image": "ubuntu:noble", "suffix": "gcc" },
Expand Down
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -251,12 +251,20 @@ if(BUILD_WITH_CUDA)
src/gtsam_points/cuda/nonlinear_factor_set_gpu_create.cpp
src/gtsam_points/cuda/stream_roundrobin.cu
src/gtsam_points/cuda/stream_temp_buffer_roundrobin.cu
# ann
src/gtsam_points/ann/kdtree_gpu.cpp
src/gtsam_points/ann/kdtree_gpu.cu
# types
src/gtsam_points/types/point_cloud.cu
src/gtsam_points/types/point_cloud_gpu.cu
src/gtsam_points/types/gaussian_voxelmap_gpu.cu
src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu
# factors
src/gtsam_points/factors/integrated_gicp_derivatives.cu
src/gtsam_points/factors/integrated_gicp_derivatives_inliers.cu
src/gtsam_points/factors/integrated_gicp_derivatives_linearize.cu
src/gtsam_points/factors/integrated_gicp_derivatives_compute.cu
src/gtsam_points/factors/integrated_gicp_factor_gpu.cpp
src/gtsam_points/factors/integrated_vgicp_derivatives.cu
src/gtsam_points/factors/integrated_vgicp_derivatives_inliers.cu
src/gtsam_points/factors/integrated_vgicp_derivatives_compute.cu
Expand All @@ -267,6 +275,7 @@ if(BUILD_WITH_CUDA)
)
target_include_directories(gtsam_points_cuda PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>
$<INSTALL_INTERFACE:include>
)
target_link_libraries(gtsam_points_cuda
Expand Down
66 changes: 66 additions & 0 deletions include/gtsam_points/ann/kdtree_gpu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp)
#pragma once

#include <limits>
#include <cstdint>
#include <gtsam_points/types/point_cloud.hpp>
#include <gtsam_points/ann/small_kdtree.hpp>

struct CUstream_st;

namespace gtsam_points {

struct KdTreeNodeGPU {
NodeIndexType left = INVALID_NODE; ///< Left child node index.
NodeIndexType right = INVALID_NODE; ///< Right child node index.

union {
struct Leaf {
NodeIndexType first; ///< First point index in the leaf node.
NodeIndexType last; ///< Last point index in the leaf node.
} lr; ///< Leaf node.
struct NonLeaf {
NodeIndexType axis; ///< Projection axis.
float thresh; ///< Threshold value.
} sub; ///< Non-leaf node.
} node_type;
};

class KdTreeGPU {
public:
using Ptr = std::shared_ptr<KdTreeGPU>;
using ConstPtr = std::shared_ptr<const KdTreeGPU>;

KdTreeGPU(const PointCloud::ConstPtr& points, CUstream_st* stream = nullptr);
~KdTreeGPU();

void nearest_neighbor_search(
const Eigen::Vector3f* queries,
size_t num_queries,
std::uint32_t* nn_indices,
float* nn_sq_dists,
CUstream_st* stream = nullptr);

void nearest_neighbor_search_cpu(
const Eigen::Vector3f* h_queries,
size_t num_queries,
std::uint32_t* h_nn_indices,
float* h_nn_sq_dists,
CUstream_st* stream = nullptr);

/// @brief Get the GPU pointer to the point indices
const std::uint32_t* get_indices() const { return indices; }

/// @brief Get the GPU pointer to the KdTree nodes
const KdTreeNodeGPU* get_nodes() const { return nodes; }

private:
PointCloud::ConstPtr points;
size_t num_indices;
size_t num_nodes;
std::uint32_t* indices;
KdTreeNodeGPU* nodes;
};

} // namespace gtsam_points
23 changes: 23 additions & 0 deletions include/gtsam_points/cuda/kernels/correspondence.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp)

#pragma once

#ifdef __CUDACC__
#define GTSAM_POINTS_HOST_DEVICE __host__ __device__
#else
#define GTSAM_POINTS_HOST_DEVICE
#endif

namespace gtsam_points {

/// @brief A pair of source and target point indices representing a correspondence.
struct Correspondence {
GTSAM_POINTS_HOST_DEVICE Correspondence() : source_idx(-1), target_idx(-1) {}
GTSAM_POINTS_HOST_DEVICE Correspondence(int source_idx, int target_idx) : source_idx(source_idx), target_idx(target_idx) {}

int source_idx;
int target_idx;
};

} // namespace gtsam_points
134 changes: 134 additions & 0 deletions include/gtsam_points/cuda/kernels/gicp_derivatives.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp)

#pragma once

#include <Eigen/Core>
#include <thrust/device_vector.h>

#include <gtsam_points/cuda/kernels/correspondence.hpp>
#include <gtsam_points/cuda/kernels/pose.cuh>
#include <gtsam_points/cuda/kernels/linearized_system.cuh>

namespace gtsam_points {

struct gicp_derivatives_kernel {
gicp_derivatives_kernel(
const Eigen::Isometry3f* linearization_point_ptr,
const Eigen::Vector3f* target_means,
const Eigen::Matrix3f* target_covs,
const Eigen::Vector3f* source_means,
const Eigen::Matrix3f* source_covs)
: linearization_point_ptr(linearization_point_ptr),
target_means_ptr(target_means),
target_covs_ptr(target_covs),
source_means_ptr(source_means),
source_covs_ptr(source_covs) {}

__device__ LinearizedSystem6 operator()(const Correspondence& source_target_correspondence) const {
const int source_idx = source_target_correspondence.source_idx;
const int target_idx = source_target_correspondence.target_idx;
if (source_idx < 0 || target_idx < 0) {
return LinearizedSystem6::zero();
}

const Eigen::Isometry3f& x = *linearization_point_ptr;
const Eigen::Matrix3f R = x.linear();
const Eigen::Vector3f t = x.translation();

const Eigen::Vector3f mean_A = source_means_ptr[source_idx];
const Eigen::Matrix3f cov_A = source_covs_ptr[source_idx];
const Eigen::Vector3f transed_mean_A = R * mean_A + t;

const Eigen::Vector3f mean_B = target_means_ptr[target_idx];
const Eigen::Matrix3f cov_B = target_covs_ptr[target_idx];

const Eigen::Matrix3f RCR = (R * cov_A * R.transpose());
const Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse();
Eigen::Vector3f error = mean_B - transed_mean_A;

Eigen::Matrix<float, 3, 6> J_target;
J_target.block<3, 3>(0, 0) = -skew_symmetric(transed_mean_A);
J_target.block<3, 3>(0, 3) = Eigen::Matrix3f::Identity();

Eigen::Matrix<float, 3, 6> J_source;
J_source.block<3, 3>(0, 0) = R * skew_symmetric(mean_A);
J_source.block<3, 3>(0, 3) = -R;

Eigen::Matrix<float, 6, 3> J_target_RCR_inv = J_target.transpose() * RCR_inv;
Eigen::Matrix<float, 6, 3> J_source_RCR_inv = J_source.transpose() * RCR_inv;

LinearizedSystem6 linearized;
linearized.num_inliers = 1;
linearized.error = error.transpose() * RCR_inv * error;
linearized.H_target = J_target_RCR_inv * J_target;
linearized.H_source = J_source_RCR_inv * J_source;
linearized.H_target_source = J_target_RCR_inv * J_source;
linearized.b_target = J_target_RCR_inv * error;
linearized.b_source = J_source_RCR_inv * error;

return linearized;
}

const Eigen::Isometry3f* linearization_point_ptr;

const Eigen::Vector3f* target_means_ptr;
const Eigen::Matrix3f* target_covs_ptr;

const Eigen::Vector3f* source_means_ptr;
const Eigen::Matrix3f* source_covs_ptr;
};

struct gicp_error_kernel {
gicp_error_kernel(
const Eigen::Isometry3f* linearization_point_ptr,
const Eigen::Isometry3f* evaluation_point_ptr,
const Eigen::Vector3f* target_means,
const Eigen::Matrix3f* target_covs,
const Eigen::Vector3f* source_means,
const Eigen::Matrix3f* source_covs)
: linearization_point_ptr(linearization_point_ptr),
evaluation_point_ptr(evaluation_point_ptr),
target_means_ptr(target_means),
target_covs_ptr(target_covs),
source_means_ptr(source_means),
source_covs_ptr(source_covs) {}

__device__ float operator()(const Correspondence& source_target_correspondence) const {
const int source_idx = source_target_correspondence.source_idx;
const int target_idx = source_target_correspondence.target_idx;
if (source_idx < 0 || target_idx < 0) {
return 0.0f;
}

const Eigen::Isometry3f& xl = *linearization_point_ptr;
const Eigen::Matrix3f Rl = xl.linear();

const Eigen::Isometry3f& xe = *evaluation_point_ptr;
const Eigen::Matrix3f Re = xe.linear();
const Eigen::Vector3f te = xe.translation();

const Eigen::Vector3f mean_A = source_means_ptr[source_idx];
const Eigen::Matrix3f cov_A = source_covs_ptr[source_idx];
const Eigen::Vector3f transed_mean_A = Re * mean_A + te;

const Eigen::Vector3f mean_B = target_means_ptr[target_idx];
const Eigen::Matrix3f cov_B = target_covs_ptr[target_idx];

const Eigen::Matrix3f RCR = (Rl * cov_A * Rl.transpose());
const Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse();
Eigen::Vector3f error = mean_B - transed_mean_A;

return error.transpose() * RCR_inv * error;
}

const Eigen::Isometry3f* linearization_point_ptr;
const Eigen::Isometry3f* evaluation_point_ptr;

const Eigen::Vector3f* target_means_ptr;
const Eigen::Matrix3f* target_covs_ptr;
const Eigen::Vector3f* source_means_ptr;
const Eigen::Matrix3f* source_covs_ptr;
};

} // namespace gtsam_points
72 changes: 72 additions & 0 deletions include/gtsam_points/cuda/kernels/kdtree.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp)
Comment thread
koide3 marked this conversation as resolved.
#include <gtsam_points/ann/kdtree_gpu.hpp>

namespace gtsam_points {

struct kdtree_nearest_neighbor_search_kernel {
public:
static constexpr int MAX_STACK_SIZE = 20;

__device__ thrust::pair<NodeIndexType, float> operator()(const Eigen::Vector3f query) const {
thrust::pair<NodeIndexType, float> result = {INVALID_NODE, std::numeric_limits<float>::max()};

int stack_size = 1;
thrust::pair<int, float> search_stack[MAX_STACK_SIZE] = {{0, 0.0f}};

while (stack_size > 0) {
const auto [node_index, sq_dist] = search_stack[--stack_size];
if (sq_dist > result.second) {
continue;
}

const KdTreeNodeGPU node = nodes[node_index];

// Leaf node
if (node.left == INVALID_NODE) {
for (NodeIndexType i = node.node_type.lr.first; i < node.node_type.lr.last; i++) {
const NodeIndexType pt_index = indices[i];
const float sq_dist = (points[pt_index] - query).squaredNorm();
if (sq_dist < result.second) {
result = {pt_index, sq_dist};
}
}
continue;
}

const float val = query[node.node_type.sub.axis];
const float diff = val - node.node_type.sub.thresh;
const float cut_sq_dist = diff * diff;

int best_child;
int other_child;

if (diff < 0.0f) {
best_child = node.left;
other_child = node.right;
} else {
best_child = node.right;
other_child = node.left;
}

if (stack_size > MAX_STACK_SIZE - 2) {
printf("kdtree stack overflow!!");
} else if (cut_sq_dist < result.second) {
search_stack[stack_size].first = other_child;
search_stack[stack_size++].second = cut_sq_dist;
}

search_stack[stack_size].first = best_child;
search_stack[stack_size++].second = 0.0f;
Comment on lines +53 to +61
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stack overflow handling is incomplete. When stack_size exceeds MAX_STACK_SIZE - 2, a warning is printed but execution continues, potentially writing beyond array bounds on lines 59-60. After printing the warning, the function should either return early with an error result or skip adding to the stack to prevent buffer overflow.

Copilot uses AI. Check for mistakes.
}

return result;
}

public:
const Eigen::Vector3f* __restrict__ points;
const std::uint32_t* __restrict__ indices;
const KdTreeNodeGPU* __restrict__ nodes;
};

} // namespace gtsam_points
Loading
Loading