Merge pull request #417 from chibai/master

christian-rauch · web-flow · commit 31b29af3cd59 · 2026-01-08T21:11:33.000+01:00
add python wrap for estimate_tag_pose
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -173,7 +173,7 @@ if(BUILD_PYTHON_WRAPPER AND Python3_Development_FOUND AND Python3_NumPy_FOUND)
 
     include(CMake/vtkEncodeString.cmake)
 
-    foreach(X IN ITEMS detect py_type)
+    foreach(X IN ITEMS detect py_type estimate_tag_pose)
         vtk_encode_string(
             INPUT ${CMAKE_CURRENT_SOURCE_DIR}/apriltag_${X}.docstring
             NAME apriltag_${X}_docstring
@@ -182,6 +182,7 @@ if(BUILD_PYTHON_WRAPPER AND Python3_Development_FOUND AND Python3_NumPy_FOUND)
     add_custom_target(apriltag_py_docstrings DEPENDS
         ${PROJECT_BINARY_DIR}/apriltag_detect_docstring.h
         ${PROJECT_BINARY_DIR}/apriltag_py_type_docstring.h
+        ${PROJECT_BINARY_DIR}/apriltag_estimate_tag_pose_docstring.h
     )
 
     # set the SOABI manually since renaming the library via OUTPUT_NAME does not work on MSVC
diff --git a/apriltag_detect.docstring b/apriltag_detect.docstring
@@ -60,3 +60,8 @@ a tuple containing the detections. Each detection is a dict with keys:
   of detection accuracy only for very small tags-- not effective for larger tags
   (where we could have sampled anywhere within a bit cell and still gotten a
   good detection.)
+
+- homography: A 3x3 homography matrix that describes the projection from an
+  "ideal" tag (with corners at (-1,1), (1,1), (1,-1), and (-1,-1)) to pixels
+  in the image. This matrix can be used to map points from the tag's coordinate
+  system to the image coordinate system, and is useful for pose estimation.
diff --git a/apriltag_estimate_tag_pose.docstring b/apriltag_estimate_tag_pose.docstring
@@ -0,0 +1,70 @@
+estimate_tag_pose(detection, tagsize, fx, fy, cx, cy) -> dict
+
+SYNOPSIS
+
+    import cv2
+    import numpy as np
+    from apriltag import apriltag
+
+    imagepath = '/tmp/tst.jpg'
+    image     = cv2.imread(imagepath, cv2.IMREAD_GRAYSCALE)
+    detector = apriltag("tag36h11")
+
+    detections = detector.detect(image)
+    if detections:
+        # Estimate pose for the first detected tag
+        # tagsize is the physical size of the tag in meters
+        # fx, fy are focal lengths in pixels
+        # cx, cy are principal point coordinates in pixels
+        pose = detector.estimate_tag_pose(detections[0], 
+                                         tagsize=0.16,     # 16cm tag
+                                         fx=600, fy=600, # focal lengths
+                                         cx=320, cy=240) # principal point
+        print("Rotation matrix R:\n", pose['R'])
+        print("Translation vector t:", pose['t'])
+        print("Reprojection error:", pose['error'])
+
+DESCRIPTION
+
+The estimate_tag_pose() method estimates the 6-DOF pose (position and orientation)
+of a detected AprilTag in 3D space. This method requires the detection result from
+the detect() method, the physical size of the tag, and camera intrinsic parameters.
+
+The pose estimation uses the homography matrix from the detection result to
+compute the transformation from the tag's coordinate system to the camera's
+coordinate system.
+
+ARGUMENTS
+
+- detection: A dictionary containing detection information returned by the
+  detect() method. This dictionary must include the 'homography' key with the
+  3x3 homography matrix.
+
+- tagsize: The physical side length of the AprilTag in meters. This is the real-
+  world size of the tag, which is necessary for computing the scale of the pose.
+
+- fx: Focal length in the x direction in pixels. This is a camera intrinsic
+  parameter that describes how the camera projects 3D points to 2D image space.
+
+- fy: Focal length in the y direction in pixels. This is a camera intrinsic
+  parameter that describes how the camera projects 3D points to 2D image space.
+
+- cx: Principal point x coordinate in pixels. This is the x coordinate of the
+  optical center of the camera in the image.
+
+- cy: Principal point y coordinate in pixels. This is the y coordinate of the
+  optical center of the camera in the image.
+
+RETURNED VALUE
+
+Returns a dictionary containing:
+
+- 'R': 3x3 rotation matrix as a numpy array that represents the orientation
+  of the tag in the camera coordinate system.
+
+- 't': 3x1 translation vector as a numpy array (in meters) that represents the
+  position of the tag in the camera coordinate system.
+
+- 'error': The object-space error after the iteration process, representing 
+  the sum of squared reprojection errors between observed and  estimated points 
+  in object space. A lower value indicates a better pose estimate.
diff --git a/apriltag_pywrap.c b/apriltag_pywrap.c
@@ -10,6 +10,7 @@
 #include <signal.h>
 
 #include "apriltag.h"
+#include "apriltag_pose.h"
 #include "tag36h10.h"
 #include "tag36h11.h"
 #include "tag25h9.h"
@@ -227,6 +228,7 @@ static PyObject* apriltag_detect(apriltag_py_t* self,
     PyObject*      result           = NULL;
     PyArrayObject* xy_c             = NULL;
     PyArrayObject* xy_lb_rb_rt_lt   = NULL;
+    PyArrayObject* homography       = NULL;
     PyArrayObject* image            = NULL;
     PyObject*      detections_tuple = NULL;
 
@@ -311,13 +313,32 @@ static PyObject* apriltag_detect(apriltag_py_t* self,
             *(double*)PyArray_GETPTR2(xy_lb_rb_rt_lt, j, 1) = det->p[j][1];
         }
 
+        // Add homography matrix (3x3)
+        homography = (PyArrayObject*)PyArray_SimpleNew(2, ((npy_intp[]){3,3}), NPY_FLOAT64);
+        if(homography == NULL)
+        {
+            Py_DECREF(xy_c);
+            Py_DECREF(xy_lb_rb_rt_lt);
+            PyErr_SetString(PyExc_RuntimeError, "Could not allocate homography array");
+            goto done;
+        }
+
+        for(int j=0; j<3; j++)
+        {
+            for(int k=0; k<3; k++)
+            {
+                *(double*)PyArray_GETPTR2(homography, j, k) = MATD_EL(det->H, j, k);
+            }
+        }
+
         PyTuple_SET_ITEM(detections_tuple, i,
-                         Py_BuildValue("{s:i,s:f,s:i,s:N,s:N}",
+                         Py_BuildValue("{s:i,s:f,s:i,s:N,s:N,s:N}",
                                        "hamming", det->hamming,
                                        "margin",  det->decision_margin,
                                        "id",      det->id,
                                        "center",  xy_c,
-                                       "lb-rb-rt-lt", xy_lb_rb_rt_lt));
+                                       "lb-rb-rt-lt", xy_lb_rb_rt_lt,
+                                       "homography", homography));
         xy_c           = NULL;
         xy_lb_rb_rt_lt = NULL;
     }
@@ -338,12 +359,148 @@ static PyObject* apriltag_detect(apriltag_py_t* self,
     return result;
 }
 
+static PyObject* apriltag_estimate_tag_pose(apriltag_py_t* self,
+                                             PyObject* args)
+{
+    PyObject* result = NULL;
+    PyObject* detection_dict = NULL;
+    PyArrayObject* R_array = NULL;
+    PyArrayObject* t_array = NULL;
+    matd_t* H_matrix = NULL;
+    double tagsize, fx, fy, cx, cy;
+
+    if(!PyArg_ParseTuple(args, "Oddddd",
+                         &detection_dict,
+                         &tagsize,
+                         &fx, &fy, &cx, &cy))
+        return NULL;
+
+    if(!PyDict_Check(detection_dict))
+    {
+        PyErr_SetString(PyExc_TypeError, "First argument must be a detection dictionary");
+        return NULL;
+    }
+
+    // Extract detection information from the dictionary
+    PyObject* py_id = PyDict_GetItemString(detection_dict, "id");
+    PyObject* py_hamming = PyDict_GetItemString(detection_dict, "hamming");
+    PyObject* py_margin = PyDict_GetItemString(detection_dict, "margin");
+    PyObject* py_center = PyDict_GetItemString(detection_dict, "center");
+    PyObject* py_corners = PyDict_GetItemString(detection_dict, "lb-rb-rt-lt");
+    PyObject* py_homography = PyDict_GetItemString(detection_dict, "homography");
+
+    if(!py_id || !py_hamming || !py_margin || !py_center || !py_corners || !py_homography)
+    {
+        PyErr_SetString(PyExc_ValueError,
+            "Detection dictionary is missing required fields. "
+            "Make sure you're using a detection from the updated detect() method that includes 'homography'.");
+        return NULL;
+    }
+
+    // Create a temporary detection structure
+    apriltag_detection_t det;
+    det.family = self->tf;
+    det.id = PyLong_AsLong(py_id);
+    det.hamming = PyLong_AsLong(py_hamming);
+    det.decision_margin = PyFloat_AsDouble(py_margin);
+
+    // Extract center
+    PyArrayObject* center_array = (PyArrayObject*)py_center;
+    det.c[0] = *(double*)PyArray_GETPTR1(center_array, 0);
+    det.c[1] = *(double*)PyArray_GETPTR1(center_array, 1);
+
+    // Extract corners
+    PyArrayObject* corners_array = (PyArrayObject*)py_corners;
+    for(int i = 0; i < 4; i++)
+    {
+        det.p[i][0] = *(double*)PyArray_GETPTR2(corners_array, i, 0);
+        det.p[i][1] = *(double*)PyArray_GETPTR2(corners_array, i, 1);
+    }
+
+    // Extract and copy homography matrix
+    PyArrayObject* homography_array = (PyArrayObject*)py_homography;
+    H_matrix = matd_create(3, 3);
+    if(!H_matrix)
+    {
+        PyErr_SetString(PyExc_RuntimeError, "Could not allocate homography matrix");
+        return NULL;
+    }
+
+    for(int i = 0; i < 3; i++)
+    {
+        for(int j = 0; j < 3; j++)
+        {
+            MATD_EL(H_matrix, i, j) = *(double*)PyArray_GETPTR2(homography_array, i, j);
+        }
+    }
+    det.H = H_matrix;
+
+    // Setup detection info
+    apriltag_detection_info_t info;
+    info.det = &det;
+    info.tagsize = tagsize;
+    info.fx = fx;
+    info.fy = fy;
+    info.cx = cx;
+    info.cy = cy;
+
+    // Estimate pose
+    apriltag_pose_t pose;
+    double error = estimate_tag_pose(&info, &pose);
+
+    // Create numpy arrays for R and t
+    R_array = (PyArrayObject*)PyArray_SimpleNew(2, ((npy_intp[]){3, 3}), NPY_FLOAT64);
+    t_array = (PyArrayObject*)PyArray_SimpleNew(2, ((npy_intp[]){3, 1}), NPY_FLOAT64);
+
+    if(!R_array || !t_array)
+    {
+        PyErr_SetString(PyExc_RuntimeError, "Could not allocate output arrays");
+        goto cleanup;
+    }
+
+    // Copy rotation matrix
+    for(int i = 0; i < 3; i++)
+    {
+        for(int j = 0; j < 3; j++)
+        {
+            *(double*)PyArray_GETPTR2(R_array, i, j) = MATD_EL(pose.R, i, j);
+        }
+    }
+
+    // Copy translation vector
+    for(int i = 0; i < 3; i++)
+    {
+        *(double*)PyArray_GETPTR2(t_array, i, 0) = MATD_EL(pose.t, i, 0);
+    }
+
+    result = Py_BuildValue("{s:N,s:N,s:d}",
+                          "R", R_array,
+                          "t", t_array,
+                          "error", error);
+    R_array = NULL;
+    t_array = NULL;
+
+cleanup:
+    if(H_matrix)
+        matd_destroy(H_matrix);
+    if(pose.R)
+        matd_destroy(pose.R);
+    if(pose.t)
+        matd_destroy(pose.t);
+    Py_XDECREF(R_array);
+    Py_XDECREF(t_array);
+
+    return result;
+}
+
 
 #include "apriltag_detect_docstring.h"
 #include "apriltag_py_type_docstring.h"
+#include "apriltag_estimate_tag_pose_docstring.h"
 
 static PyMethodDef apriltag_methods[] =
     { PYMETHODDEF_ENTRY(apriltag_, detect, METH_VARARGS),
+      PYMETHODDEF_ENTRY(apriltag_, estimate_tag_pose, METH_VARARGS),
       {NULL, NULL, 0, NULL}
     };
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
@@ -3,6 +3,10 @@ add_library(getline OBJECT getline.c)
 add_executable(test_detection test_detection.c)
 target_link_libraries(test_detection ${PROJECT_NAME} getline)
 
+# Add the pose estimation test executable
+add_executable(test_tag_pose_estimation test_tag_pose_estimation.c)
+target_link_libraries(test_tag_pose_estimation ${PROJECT_NAME} getline)
+
 # test images with true detection
 set(TEST_IMAGE_NAMES
     "33369213973_9d9bb4cc96_c"
@@ -16,3 +20,13 @@ foreach(IMG IN LISTS TEST_IMAGE_NAMES)
              WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
     )
 endforeach()
+
+# Add pose estimation tests for each image
+foreach(IMG IN LISTS TEST_IMAGE_NAMES)
+    add_test(NAME test_tag_pose_estimation_${IMG}
+             COMMAND $<TARGET_FILE:test_tag_pose_estimation> 
+             data/${IMG}.jpg data/${IMG}.txt
+             WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    )
+endforeach()
+
diff --git a/test/test_tag_pose_estimation.c b/test/test_tag_pose_estimation.c
diff --git a/test/test_tag_pose_estimation.py b/test/test_tag_pose_estimation.py