Skip to content

Commit 50069ad

Browse files
committed
Fix and improve NDT parallelization
- Added the switch to select different search methods to computeHessian() as well (would not make sense to use different search methods in computeDerivatives() and computeHessian()) - Added overloads to functions computePointDerivatives(), updateDerivatives(), and updateHessian() to be able to use local variables instead of the global variables point_jacobian_ and point_hessian_ - Change the parallelized region in computeDerivatives() by using the new function overloads, and by introducing a custom reduction (goal: compatibility with OpenMP 2.0). Synchronization overhead between threads is minimal, with only two critical regions at the end to accumulate, score, gradient, and hessian - Tested with two larger point clouds, and there is indeed a nice speed-up when increasing the number of threads (however not a perfect 1-to-1 speedup because there are still other functions in NDT that are not parallelized)
1 parent 6c0acc8 commit 50069ad

2 files changed

Lines changed: 224 additions & 80 deletions

File tree

registration/include/pcl/registration/impl/ndt.hpp

Lines changed: 158 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -221,53 +221,78 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computeDerivativ
221221
std::vector<TargetGridLeafConstPtr> neighborhood;
222222
std::vector<float> distances;
223223

224-
#pragma omp parallel for num_threads(threads_) schedule(dynamic, 32) \
225-
shared(score, score_gradient, hessian) firstprivate(neighborhood, distances)
226-
// Update gradient and hessian for each point, line 17 in Algorithm 2 [Magnusson 2009]
227-
for (std::ptrdiff_t idx = 0; idx < static_cast<std::ptrdiff_t>(input_->size());
228-
idx++) {
229-
// Transformed Point
230-
const auto& x_trans_pt = trans_cloud[idx];
224+
Eigen::Matrix<double, 3, 6> point_jacobian = Eigen::Matrix<double, 3, 6>::Zero();
225+
point_jacobian_.block<3, 3>(0, 0).setIdentity();
226+
Eigen::Matrix<double, 18, 6> point_hessian = Eigen::Matrix<double, 18, 6>::Zero();
227+
228+
#pragma omp parallel num_threads(threads_) default(none) shared(score_gradient, hessian, trans_cloud, compute_hessian) firstprivate(neighborhood, distances, point_jacobian, point_hessian), reduction(+: score)
229+
{
230+
// workaround for a user-defined reduction on score_gradient and hessian, that still
231+
// works with OpenMP 2.0
232+
Eigen::Matrix<double, 6, 1> score_gradient_private =
233+
Eigen::Matrix<double, 6, 1>::Zero();
234+
Eigen::Matrix<double, 6, 6> hessian_private = Eigen::Matrix<double, 6, 6>::Zero();
235+
#pragma omp for schedule(dynamic, 32)
236+
// Update gradient and hessian for each point, line 17 in Algorithm 2 [Magnusson
237+
// 2009]
238+
for (std::ptrdiff_t idx = 0; idx < static_cast<std::ptrdiff_t>(input_->size());
239+
idx++) {
240+
// Transformed Point
241+
const auto& x_trans_pt = trans_cloud[idx];
242+
243+
// Find neighbors with different search method
244+
neighborhood.clear();
245+
switch (search_method_) {
246+
case NeighborSearchMethod::RADIUS:
247+
// Radius search has been experimentally faster than direct neighbor checking.
248+
target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
249+
break;
250+
case NeighborSearchMethod::DIRECT26:
251+
target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood);
252+
break;
253+
case NeighborSearchMethod::DIRECT7:
254+
target_cells_.getFaceNeighborsAtPoint(x_trans_pt, neighborhood);
255+
break;
256+
case NeighborSearchMethod::DIRECT1:
257+
target_cells_.getVoxelAtPoint(x_trans_pt, neighborhood);
258+
break;
259+
}
231260

232-
// Find neighbors with different search method
233-
neighborhood.clear();
234-
switch (search_method_) {
235-
case NeighborSearchMethod::KDTREE:
236-
// Radius search has been experimentally faster than direct neighbor checking.
237-
target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
238-
break;
239-
case NeighborSearchMethod::DIRECT26:
240-
target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood);
241-
break;
242-
case NeighborSearchMethod::DIRECT7:
243-
target_cells_.getFaceNeighborsAtPoint(x_trans_pt, neighborhood);
244-
break;
245-
case NeighborSearchMethod::DIRECT1:
246-
target_cells_.getVoxelAtPoint(x_trans_pt, neighborhood);
247-
break;
261+
// Original Point
262+
const Eigen::Vector3d x = (*input_)[idx].getVector3fMap().template cast<double>();
263+
const Eigen::Vector3d x_trans_position =
264+
x_trans_pt.getVector3fMap().template cast<double>();
265+
266+
for (const auto& cell : neighborhood) {
267+
// Denorm point, x_k' in Equations 6.12 and 6.13 [Magnusson 2009]
268+
const Eigen::Vector3d x_trans = x_trans_position - cell->getMean();
269+
// Inverse Covariance of Occupied Voxel
270+
// Uses precomputed covariance for speed.
271+
const Eigen::Matrix3d c_inv = cell->getInverseCov();
272+
273+
// Compute derivative of transform function w.r.t. transform vector, J_E and H_E
274+
// in Equations 6.18 and 6.20 [Magnusson 2009]
275+
computePointDerivatives(
276+
x, point_jacobian, compute_hessian ? &point_hessian : nullptr);
277+
// Update score, gradient and hessian, lines 19-21 in Algorithm 2, according to
278+
// Equations 6.10, 6.12 and 6.13, respectively [Magnusson 2009]
279+
score += updateDerivatives(score_gradient_private,
280+
hessian_private,
281+
x_trans,
282+
c_inv,
283+
point_jacobian,
284+
compute_hessian ? &point_hessian : nullptr);
285+
}
248286
}
249-
250-
// Original Point
251-
const Eigen::Vector3d x = (*input_)[idx].getVector3fMap().template cast<double>();
252-
const Eigen::Vector3d x_trans_position =
253-
x_trans_pt.getVector3fMap().template cast<double>();
254-
255-
for (const auto& cell : neighborhood) {
256-
// Denorm point, x_k' in Equations 6.12 and 6.13 [Magnusson 2009]
257-
const Eigen::Vector3d x_trans = x_trans_position - cell->getMean();
258-
// Inverse Covariance of Occupied Voxel
259-
// Uses precomputed covariance for speed.
260-
const Eigen::Matrix3d c_inv = cell->getInverseCov();
261-
262-
// Compute derivative of transform function w.r.t. transform vector, J_E and H_E
263-
// in Equations 6.18 and 6.20 [Magnusson 2009]
264-
computePointDerivatives(x);
265-
// Update score, gradient and hessian, lines 19-21 in Algorithm 2, according to
266-
// Equations 6.10, 6.12 and 6.13, respectively [Magnusson 2009]
267-
score +=
268-
updateDerivatives(score_gradient, hessian, x_trans, c_inv, compute_hessian);
287+
#pragma omp critical(accum_score_gradient)
288+
{
289+
score_gradient += score_gradient_private;
269290
}
270-
}
291+
#pragma omp critical(accum_hessian)
292+
{
293+
hessian += hessian_private;
294+
}
295+
} // end of omp parallel
271296
return score;
272297
}
273298

@@ -359,23 +384,25 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computeAngleDeri
359384
template <typename PointSource, typename PointTarget, typename Scalar>
360385
void
361386
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computePointDerivatives(
362-
const Eigen::Vector3d& x, bool compute_hessian)
387+
const Eigen::Vector3d& x,
388+
Eigen::Matrix<double, 3, 6>& point_jacobian,
389+
Eigen::Matrix<double, 18, 6>* point_hessian) const
363390
{
364391
// Calculate first derivative of Transformation Equation 6.17 w.r.t. transform vector.
365392
// Derivative w.r.t. ith element of transform vector corresponds to column i,
366393
// Equation 6.18 and 6.19 [Magnusson 2009]
367394
Eigen::Matrix<double, 8, 1> point_angular_jacobian =
368395
angular_jacobian_ * Eigen::Vector4d(x[0], x[1], x[2], 0.0);
369-
point_jacobian_(1, 3) = point_angular_jacobian[0];
370-
point_jacobian_(2, 3) = point_angular_jacobian[1];
371-
point_jacobian_(0, 4) = point_angular_jacobian[2];
372-
point_jacobian_(1, 4) = point_angular_jacobian[3];
373-
point_jacobian_(2, 4) = point_angular_jacobian[4];
374-
point_jacobian_(0, 5) = point_angular_jacobian[5];
375-
point_jacobian_(1, 5) = point_angular_jacobian[6];
376-
point_jacobian_(2, 5) = point_angular_jacobian[7];
377-
378-
if (compute_hessian) {
396+
point_jacobian(1, 3) = point_angular_jacobian[0];
397+
point_jacobian(2, 3) = point_angular_jacobian[1];
398+
point_jacobian(0, 4) = point_angular_jacobian[2];
399+
point_jacobian(1, 4) = point_angular_jacobian[3];
400+
point_jacobian(2, 4) = point_angular_jacobian[4];
401+
point_jacobian(0, 5) = point_angular_jacobian[5];
402+
point_jacobian(1, 5) = point_angular_jacobian[6];
403+
point_jacobian(2, 5) = point_angular_jacobian[7];
404+
405+
if (point_hessian) {
379406
Eigen::Matrix<double, 15, 1> point_angular_hessian =
380407
angular_hessian_ * Eigen::Vector4d(x[0], x[1], x[2], 0.0);
381408

@@ -390,26 +417,36 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computePointDeri
390417
// Calculate second derivative of Transformation Equation 6.17 w.r.t. transform
391418
// vector. Derivative w.r.t. ith and jth elements of transform vector corresponds to
392419
// the 3x1 block matrix starting at (3i,j), Equation 6.20 and 6.21 [Magnusson 2009]
393-
point_hessian_.block<3, 1>(9, 3) = a;
394-
point_hessian_.block<3, 1>(12, 3) = b;
395-
point_hessian_.block<3, 1>(15, 3) = c;
396-
point_hessian_.block<3, 1>(9, 4) = b;
397-
point_hessian_.block<3, 1>(12, 4) = d;
398-
point_hessian_.block<3, 1>(15, 4) = e;
399-
point_hessian_.block<3, 1>(9, 5) = c;
400-
point_hessian_.block<3, 1>(12, 5) = e;
401-
point_hessian_.block<3, 1>(15, 5) = f;
420+
point_hessian->block<3, 1>(9, 3) = a;
421+
point_hessian->block<3, 1>(12, 3) = b;
422+
point_hessian->block<3, 1>(15, 3) = c;
423+
point_hessian->block<3, 1>(9, 4) = b;
424+
point_hessian->block<3, 1>(12, 4) = d;
425+
point_hessian->block<3, 1>(15, 4) = e;
426+
point_hessian->block<3, 1>(9, 5) = c;
427+
point_hessian->block<3, 1>(12, 5) = e;
428+
point_hessian->block<3, 1>(15, 5) = f;
402429
}
403430
}
404431

432+
template <typename PointSource, typename PointTarget, typename Scalar>
433+
void
434+
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computePointDerivatives(
435+
const Eigen::Vector3d& x, bool compute_hessian)
436+
{
437+
computePointDerivatives(
438+
x, point_jacobian_, compute_hessian ? &point_hessian_ : nullptr);
439+
}
440+
405441
template <typename PointSource, typename PointTarget, typename Scalar>
406442
double
407443
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateDerivatives(
408444
Eigen::Matrix<double, 6, 1>& score_gradient,
409445
Eigen::Matrix<double, 6, 6>& hessian,
410446
const Eigen::Vector3d& x_trans,
411447
const Eigen::Matrix3d& c_inv,
412-
bool compute_hessian) const
448+
const Eigen::Matrix<double, 3, 6>& point_jacobian,
449+
const Eigen::Matrix<double, 18, 6>* point_hessian) const
413450
{
414451
// e^(-d_2/2 * (x_k - mu_k)^T Sigma_k^-1 (x_k - mu_k)) Equation 6.9 [Magnusson 2009]
415452
double e_x_cov_x = std::exp(-gauss_d2_ * x_trans.dot(c_inv * x_trans) / 2);
@@ -430,26 +467,43 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateDerivative
430467
for (int i = 0; i < 6; i++) {
431468
// Sigma_k^-1 d(T(x,p))/dpi, Reusable portion of Equation 6.12 and 6.13 [Magnusson
432469
// 2009]
433-
const Eigen::Vector3d cov_dxd_pi = c_inv * point_jacobian_.col(i);
470+
const Eigen::Vector3d cov_dxd_pi = c_inv * point_jacobian.col(i);
434471

435472
// Update gradient, Equation 6.12 [Magnusson 2009]
436473
score_gradient(i) += x_trans.dot(cov_dxd_pi) * e_x_cov_x;
437474

438-
if (compute_hessian) {
475+
if (point_hessian) {
439476
for (Eigen::Index j = 0; j < hessian.cols(); j++) {
440477
// Update hessian, Equation 6.13 [Magnusson 2009]
441478
hessian(i, j) +=
442479
e_x_cov_x * (-gauss_d2_ * x_trans.dot(cov_dxd_pi) *
443-
x_trans.dot(c_inv * point_jacobian_.col(j)) +
444-
x_trans.dot(c_inv * point_hessian_.block<3, 1>(3 * i, j)) +
445-
point_jacobian_.col(j).dot(cov_dxd_pi));
480+
x_trans.dot(c_inv * point_jacobian.col(j)) +
481+
x_trans.dot(c_inv * point_hessian->block<3, 1>(3 * i, j)) +
482+
point_jacobian.col(j).dot(cov_dxd_pi));
446483
}
447484
}
448485
}
449486

450487
return score_inc;
451488
}
452489

490+
template <typename PointSource, typename PointTarget, typename Scalar>
491+
double
492+
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateDerivatives(
493+
Eigen::Matrix<double, 6, 1>& score_gradient,
494+
Eigen::Matrix<double, 6, 6>& hessian,
495+
const Eigen::Vector3d& x_trans,
496+
const Eigen::Matrix3d& c_inv,
497+
bool compute_hessian) const
498+
{
499+
return updateDerivatives(score_gradient,
500+
hessian,
501+
x_trans,
502+
c_inv,
503+
point_jacobian_,
504+
compute_hessian ? &point_hessian_ : nullptr);
505+
}
506+
453507
template <typename PointSource, typename PointTarget, typename Scalar>
454508
void
455509
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computeHessian(
@@ -464,11 +518,25 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::computeHessian(
464518
// Transformed Point
465519
const auto& x_trans_pt = trans_cloud[idx];
466520

467-
// Find neighbors (Radius search has been experimentally faster than direct neighbor
468-
// checking.
469521
std::vector<TargetGridLeafConstPtr> neighborhood;
470522
std::vector<float> distances;
471-
target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
523+
524+
// Find neighbors with different search methods
525+
switch (search_method_) {
526+
case NeighborSearchMethod::RADIUS:
527+
// Radius search has been experimentally faster than direct neighbor checking.
528+
target_cells_.radiusSearch(x_trans_pt, resolution_, neighborhood, distances);
529+
break;
530+
case NeighborSearchMethod::DIRECT26:
531+
target_cells_.getNeighborhoodAtPoint(x_trans_pt, neighborhood);
532+
break;
533+
case NeighborSearchMethod::DIRECT7:
534+
target_cells_.getFaceNeighborsAtPoint(x_trans_pt, neighborhood);
535+
break;
536+
case NeighborSearchMethod::DIRECT1:
537+
target_cells_.getVoxelAtPoint(x_trans_pt, neighborhood);
538+
break;
539+
}
472540

473541
for (const auto& cell : neighborhood) {
474542
// Original Point
@@ -497,7 +565,9 @@ void
497565
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateHessian(
498566
Eigen::Matrix<double, 6, 6>& hessian,
499567
const Eigen::Vector3d& x_trans,
500-
const Eigen::Matrix3d& c_inv) const
568+
const Eigen::Matrix3d& c_inv,
569+
const Eigen::Matrix<double, 3, 6>& point_jacobian,
570+
const Eigen::Matrix<double, 18, 6>& point_hessian) const
501571
{
502572
// e^(-d_2/2 * (x_k - mu_k)^T Sigma_k^-1 (x_k - mu_k)) Equation 6.9 [Magnusson 2009]
503573
double e_x_cov_x =
@@ -514,19 +584,29 @@ NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateHessian(
514584
for (int i = 0; i < 6; i++) {
515585
// Sigma_k^-1 d(T(x,p))/dpi, Reusable portion of Equation 6.12 and 6.13 [Magnusson
516586
// 2009]
517-
const Eigen::Vector3d cov_dxd_pi = c_inv * point_jacobian_.col(i);
587+
const Eigen::Vector3d cov_dxd_pi = c_inv * point_jacobian.col(i);
518588

519589
for (Eigen::Index j = 0; j < hessian.cols(); j++) {
520590
// Update hessian, Equation 6.13 [Magnusson 2009]
521591
hessian(i, j) +=
522592
e_x_cov_x * (-gauss_d2_ * x_trans.dot(cov_dxd_pi) *
523-
x_trans.dot(c_inv * point_jacobian_.col(j)) +
524-
x_trans.dot(c_inv * point_hessian_.block<3, 1>(3 * i, j)) +
525-
point_jacobian_.col(j).dot(cov_dxd_pi));
593+
x_trans.dot(c_inv * point_jacobian.col(j)) +
594+
x_trans.dot(c_inv * point_hessian.block<3, 1>(3 * i, j)) +
595+
point_jacobian.col(j).dot(cov_dxd_pi));
526596
}
527597
}
528598
}
529599

600+
template <typename PointSource, typename PointTarget, typename Scalar>
601+
void
602+
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateHessian(
603+
Eigen::Matrix<double, 6, 6>& hessian,
604+
const Eigen::Vector3d& x_trans,
605+
const Eigen::Matrix3d& c_inv) const
606+
{
607+
updateHessian(hessian, x_trans, c_inv, point_jacobian_, point_hessian_);
608+
}
609+
530610
template <typename PointSource, typename PointTarget, typename Scalar>
531611
bool
532612
NormalDistributionsTransform<PointSource, PointTarget, Scalar>::updateIntervalMT(

0 commit comments

Comments
 (0)