-
Notifications
You must be signed in to change notification settings - Fork 78
kdtree_cuda #86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
kdtree_cuda #86
Changes from 11 commits
e2826fc
56925ca
eef66d8
6e251c8
9c50910
87c594f
2cb203c
95407ca
8e45c43
d74f3df
4cfd41f
649f5cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| // SPDX-License-Identifier: MIT | ||
| // Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp) | ||
| #pragma once | ||
|
|
||
| #include <limits> | ||
| #include <cstdint> | ||
| #include <gtsam_points/types/point_cloud.hpp> | ||
| #include <gtsam_points/ann/small_kdtree.hpp> | ||
|
|
||
| struct CUstream_st; | ||
|
|
||
| namespace gtsam_points { | ||
|
|
||
| struct KdTreeNodeGPU { | ||
| NodeIndexType left = INVALID_NODE; ///< Left child node index. | ||
| NodeIndexType right = INVALID_NODE; ///< Right child node index. | ||
|
|
||
| union { | ||
| struct Leaf { | ||
| NodeIndexType first; ///< First point index in the leaf node. | ||
| NodeIndexType last; ///< Last point index in the leaf node. | ||
| } lr; ///< Leaf node. | ||
| struct NonLeaf { | ||
| NodeIndexType axis; ///< Projection axis. | ||
| float thresh; ///< Threshold value. | ||
| } sub; ///< Non-leaf node. | ||
| } node_type; | ||
| }; | ||
|
|
||
| class KdTreeGPU { | ||
| public: | ||
| using Ptr = std::shared_ptr<KdTreeGPU>; | ||
| using ConstPtr = std::shared_ptr<const KdTreeGPU>; | ||
|
|
||
| KdTreeGPU(const PointCloud::ConstPtr& points, CUstream_st* stream = nullptr); | ||
| ~KdTreeGPU(); | ||
|
|
||
| void nearest_neighbor_search( | ||
| const Eigen::Vector3f* queries, | ||
| size_t num_queries, | ||
| std::uint32_t* nn_indices, | ||
| float* nn_sq_dists, | ||
| CUstream_st* stream = nullptr); | ||
|
|
||
| void nearest_neighbor_search_cpu( | ||
| const Eigen::Vector3f* h_queries, | ||
| size_t num_queries, | ||
| std::uint32_t* h_nn_indices, | ||
| float* h_nn_sq_dists, | ||
| CUstream_st* stream = nullptr); | ||
|
|
||
| /// @brief Get the GPU pointer to the point indices | ||
| const std::uint32_t* get_indices() const { return indices; } | ||
|
|
||
| /// @brief Get the GPU pointer to the KdTree nodes | ||
| const KdTreeNodeGPU* get_nodes() const { return nodes; } | ||
|
|
||
| private: | ||
| PointCloud::ConstPtr points; | ||
| size_t num_indices; | ||
| size_t num_nodes; | ||
| std::uint32_t* indices; | ||
| KdTreeNodeGPU* nodes; | ||
| }; | ||
|
|
||
| } // namespace gtsam_points |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| // SPDX-License-Identifier: MIT | ||
| // Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp) | ||
|
|
||
| #pragma once | ||
|
|
||
| #ifdef __CUDACC__ | ||
| #define GTSAM_POINTS_HOST_DEVICE __host__ __device__ | ||
| #else | ||
| #define GTSAM_POINTS_HOST_DEVICE | ||
| #endif | ||
|
|
||
| namespace gtsam_points { | ||
|
|
||
| /// @brief A pair of source and target point indices representing a correspondence. | ||
| struct Correspondence { | ||
| GTSAM_POINTS_HOST_DEVICE Correspondence() : source_idx(-1), target_idx(-1) {} | ||
| GTSAM_POINTS_HOST_DEVICE Correspondence(int source_idx, int target_idx) : source_idx(source_idx), target_idx(target_idx) {} | ||
|
|
||
| int source_idx; | ||
| int target_idx; | ||
| }; | ||
|
|
||
| } // namespace gtsam_points |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,134 @@ | ||
| // SPDX-License-Identifier: MIT | ||
| // Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp) | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <Eigen/Core> | ||
| #include <thrust/device_vector.h> | ||
|
|
||
| #include <gtsam_points/cuda/kernels/correspondence.hpp> | ||
| #include <gtsam_points/cuda/kernels/pose.cuh> | ||
| #include <gtsam_points/cuda/kernels/linearized_system.cuh> | ||
|
|
||
| namespace gtsam_points { | ||
|
|
||
| struct gicp_derivatives_kernel { | ||
| gicp_derivatives_kernel( | ||
| const Eigen::Isometry3f* linearization_point_ptr, | ||
| const Eigen::Vector3f* target_means, | ||
| const Eigen::Matrix3f* target_covs, | ||
| const Eigen::Vector3f* source_means, | ||
| const Eigen::Matrix3f* source_covs) | ||
| : linearization_point_ptr(linearization_point_ptr), | ||
| target_means_ptr(target_means), | ||
| target_covs_ptr(target_covs), | ||
| source_means_ptr(source_means), | ||
| source_covs_ptr(source_covs) {} | ||
|
|
||
| __device__ LinearizedSystem6 operator()(const Correspondence& source_target_correspondence) const { | ||
| const int source_idx = source_target_correspondence.source_idx; | ||
| const int target_idx = source_target_correspondence.target_idx; | ||
| if (source_idx < 0 || target_idx < 0) { | ||
| return LinearizedSystem6::zero(); | ||
| } | ||
|
|
||
| const Eigen::Isometry3f& x = *linearization_point_ptr; | ||
| const Eigen::Matrix3f R = x.linear(); | ||
| const Eigen::Vector3f t = x.translation(); | ||
|
|
||
| const Eigen::Vector3f mean_A = source_means_ptr[source_idx]; | ||
| const Eigen::Matrix3f cov_A = source_covs_ptr[source_idx]; | ||
| const Eigen::Vector3f transed_mean_A = R * mean_A + t; | ||
|
|
||
| const Eigen::Vector3f mean_B = target_means_ptr[target_idx]; | ||
| const Eigen::Matrix3f cov_B = target_covs_ptr[target_idx]; | ||
|
|
||
| const Eigen::Matrix3f RCR = (R * cov_A * R.transpose()); | ||
| const Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse(); | ||
| Eigen::Vector3f error = mean_B - transed_mean_A; | ||
|
|
||
| Eigen::Matrix<float, 3, 6> J_target; | ||
| J_target.block<3, 3>(0, 0) = -skew_symmetric(transed_mean_A); | ||
| J_target.block<3, 3>(0, 3) = Eigen::Matrix3f::Identity(); | ||
|
|
||
| Eigen::Matrix<float, 3, 6> J_source; | ||
| J_source.block<3, 3>(0, 0) = R * skew_symmetric(mean_A); | ||
| J_source.block<3, 3>(0, 3) = -R; | ||
|
|
||
| Eigen::Matrix<float, 6, 3> J_target_RCR_inv = J_target.transpose() * RCR_inv; | ||
| Eigen::Matrix<float, 6, 3> J_source_RCR_inv = J_source.transpose() * RCR_inv; | ||
|
|
||
| LinearizedSystem6 linearized; | ||
| linearized.num_inliers = 1; | ||
| linearized.error = error.transpose() * RCR_inv * error; | ||
| linearized.H_target = J_target_RCR_inv * J_target; | ||
| linearized.H_source = J_source_RCR_inv * J_source; | ||
| linearized.H_target_source = J_target_RCR_inv * J_source; | ||
| linearized.b_target = J_target_RCR_inv * error; | ||
| linearized.b_source = J_source_RCR_inv * error; | ||
|
|
||
| return linearized; | ||
| } | ||
|
|
||
| const Eigen::Isometry3f* linearization_point_ptr; | ||
|
|
||
| const Eigen::Vector3f* target_means_ptr; | ||
| const Eigen::Matrix3f* target_covs_ptr; | ||
|
|
||
| const Eigen::Vector3f* source_means_ptr; | ||
| const Eigen::Matrix3f* source_covs_ptr; | ||
| }; | ||
|
|
||
| struct gicp_error_kernel { | ||
| gicp_error_kernel( | ||
| const Eigen::Isometry3f* linearization_point_ptr, | ||
| const Eigen::Isometry3f* evaluation_point_ptr, | ||
| const Eigen::Vector3f* target_means, | ||
| const Eigen::Matrix3f* target_covs, | ||
| const Eigen::Vector3f* source_means, | ||
| const Eigen::Matrix3f* source_covs) | ||
| : linearization_point_ptr(linearization_point_ptr), | ||
| evaluation_point_ptr(evaluation_point_ptr), | ||
| target_means_ptr(target_means), | ||
| target_covs_ptr(target_covs), | ||
| source_means_ptr(source_means), | ||
| source_covs_ptr(source_covs) {} | ||
|
|
||
| __device__ float operator()(const Correspondence& source_target_correspondence) const { | ||
| const int source_idx = source_target_correspondence.source_idx; | ||
| const int target_idx = source_target_correspondence.target_idx; | ||
| if (source_idx < 0 || target_idx < 0) { | ||
| return 0.0f; | ||
| } | ||
|
|
||
| const Eigen::Isometry3f& xl = *linearization_point_ptr; | ||
| const Eigen::Matrix3f Rl = xl.linear(); | ||
|
|
||
| const Eigen::Isometry3f& xe = *evaluation_point_ptr; | ||
| const Eigen::Matrix3f Re = xe.linear(); | ||
| const Eigen::Vector3f te = xe.translation(); | ||
|
|
||
| const Eigen::Vector3f mean_A = source_means_ptr[source_idx]; | ||
| const Eigen::Matrix3f cov_A = source_covs_ptr[source_idx]; | ||
| const Eigen::Vector3f transed_mean_A = Re * mean_A + te; | ||
|
|
||
| const Eigen::Vector3f mean_B = target_means_ptr[target_idx]; | ||
| const Eigen::Matrix3f cov_B = target_covs_ptr[target_idx]; | ||
|
|
||
| const Eigen::Matrix3f RCR = (Rl * cov_A * Rl.transpose()); | ||
| const Eigen::Matrix3f RCR_inv = (cov_B + RCR).inverse(); | ||
| Eigen::Vector3f error = mean_B - transed_mean_A; | ||
|
|
||
| return error.transpose() * RCR_inv * error; | ||
| } | ||
|
|
||
| const Eigen::Isometry3f* linearization_point_ptr; | ||
| const Eigen::Isometry3f* evaluation_point_ptr; | ||
|
|
||
| const Eigen::Vector3f* target_means_ptr; | ||
| const Eigen::Matrix3f* target_covs_ptr; | ||
| const Eigen::Vector3f* source_means_ptr; | ||
| const Eigen::Matrix3f* source_covs_ptr; | ||
| }; | ||
|
|
||
| } // namespace gtsam_points |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| // SPDX-License-Identifier: MIT | ||
| // Copyright (c) 2025 Kenji Koide (k.koide@aist.go.jp) | ||
| #include <gtsam_points/ann/kdtree_gpu.hpp> | ||
|
|
||
| namespace gtsam_points { | ||
|
|
||
| struct kdtree_nearest_neighbor_search_kernel { | ||
| public: | ||
| static constexpr int MAX_STACK_SIZE = 20; | ||
|
|
||
| __device__ thrust::pair<NodeIndexType, float> operator()(const Eigen::Vector3f query) const { | ||
| thrust::pair<NodeIndexType, float> result = {INVALID_NODE, std::numeric_limits<float>::max()}; | ||
|
|
||
| int stack_size = 1; | ||
| thrust::pair<int, float> search_stack[MAX_STACK_SIZE] = {{0, 0.0f}}; | ||
|
|
||
| while (stack_size > 0) { | ||
| const auto [node_index, sq_dist] = search_stack[--stack_size]; | ||
| if (sq_dist > result.second) { | ||
| continue; | ||
| } | ||
|
|
||
| const KdTreeNodeGPU node = nodes[node_index]; | ||
|
|
||
| // Leaf node | ||
| if (node.left == INVALID_NODE) { | ||
| for (NodeIndexType i = node.node_type.lr.first; i < node.node_type.lr.last; i++) { | ||
| const NodeIndexType pt_index = indices[i]; | ||
| const float sq_dist = (points[pt_index] - query).squaredNorm(); | ||
| if (sq_dist < result.second) { | ||
| result = {pt_index, sq_dist}; | ||
| } | ||
| } | ||
| continue; | ||
| } | ||
|
|
||
| const float val = query[node.node_type.sub.axis]; | ||
| const float diff = val - node.node_type.sub.thresh; | ||
| const float cut_sq_dist = diff * diff; | ||
|
|
||
| int best_child; | ||
| int other_child; | ||
|
|
||
| if (diff < 0.0f) { | ||
| best_child = node.left; | ||
| other_child = node.right; | ||
| } else { | ||
| best_child = node.right; | ||
| other_child = node.left; | ||
| } | ||
|
|
||
| if (stack_size > MAX_STACK_SIZE - 2) { | ||
| printf("kdtree stack overflow!!"); | ||
| } else if (cut_sq_dist < result.second) { | ||
| search_stack[stack_size].first = other_child; | ||
| search_stack[stack_size++].second = cut_sq_dist; | ||
| } | ||
|
|
||
| search_stack[stack_size].first = best_child; | ||
| search_stack[stack_size++].second = 0.0f; | ||
|
Comment on lines
+53
to
+61
|
||
| } | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| public: | ||
| const Eigen::Vector3f* __restrict__ points; | ||
| const std::uint32_t* __restrict__ indices; | ||
| const KdTreeNodeGPU* __restrict__ nodes; | ||
| }; | ||
|
|
||
| } // namespace gtsam_points | ||
Uh oh!
There was an error while loading. Please reload this page.