Skip to content

Commit 74d1285

Browse files
raimbekovmzy1gitZhitao Yu
authored
Fix CXCYWH to XYXY conversion for integer bounding boxes (#9322)
Co-authored-by: zy1git <zycoding1@gmail.com> Co-authored-by: Zhitao Yu <zhitao@fb.com>
1 parent 6940e19 commit 74d1285

2 files changed

Lines changed: 37 additions & 4 deletions

File tree

test/test_transforms_v2.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4412,6 +4412,27 @@ def test_errors(self):
44124412
input_tv_tensor, old_format=input_tv_tensor.format, new_format=input_tv_tensor.format
44134413
)
44144414

4415+
def test_cxcywh_to_xyxy_odd_dimensions(self):
4416+
# Non-regression test for https://github.com/pytorch/vision/issues/8887
4417+
# Integer bounding boxes with odd width/height produced incorrect results
4418+
# due to integer division rounding issues (ceil instead of truncation).
4419+
bounding_boxes = tv_tensors.BoundingBoxes(
4420+
[[5, 6, 10, 13]],
4421+
format=tv_tensors.BoundingBoxFormat.CXCYWH,
4422+
canvas_size=(17, 11),
4423+
dtype=torch.int64,
4424+
)
4425+
4426+
actual = F.convert_bounding_box_format(bounding_boxes, new_format=tv_tensors.BoundingBoxFormat.XYXY)
4427+
expected = tv_tensors.BoundingBoxes(
4428+
[[0, 0, 10, 12]],
4429+
format=tv_tensors.BoundingBoxFormat.XYXY,
4430+
canvas_size=(17, 11),
4431+
)
4432+
4433+
assert (actual >= 0).all()
4434+
torch.testing.assert_close(actual, expected)
4435+
44154436

44164437
class TestResizedCrop:
44174438
INPUT_SIZE = (17, 11)

torchvision/transforms/v2/functional/_meta.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,17 +177,29 @@ def _xyxy_to_xywh(xyxy: torch.Tensor, inplace: bool) -> torch.Tensor:
177177

178178

179179
def _cxcywh_to_xyxy(cxcywh: torch.Tensor, inplace: bool) -> torch.Tensor:
180-
if not inplace:
180+
# For integer tensors, use float arithmetic to match the behavior of
181+
# `torchvision.ops._box_convert._box_cxcywh_to_xyxy`.
182+
original = cxcywh
183+
dtype = cxcywh.dtype
184+
need_cast = not cxcywh.is_floating_point()
185+
186+
if need_cast:
187+
cxcywh = cxcywh.float()
188+
elif not inplace:
181189
cxcywh = cxcywh.clone()
182190

183-
# Trick to do fast division by 2 and ceil, without casting. It produces the same result as
184-
# `torchvision.ops._box_convert._box_cxcywh_to_xyxy`.
185-
half_wh = cxcywh[..., 2:].div(-2, rounding_mode=None if cxcywh.is_floating_point() else "floor").abs_()
191+
half_wh = cxcywh[..., 2:] / 2
186192
# (cx - width / 2) = x1, same for y1
187193
cxcywh[..., :2].sub_(half_wh)
188194
# (x1 + width) = x2, same for y2
189195
cxcywh[..., 2:].add_(cxcywh[..., :2])
190196

197+
if need_cast:
198+
cxcywh = cxcywh.to(dtype)
199+
if inplace:
200+
original[:] = cxcywh
201+
return original
202+
191203
return cxcywh
192204

193205

0 commit comments

Comments
 (0)