[API Compatibility] align smooth_l1_loss by adding it to compat module -part (#79277)

Manfredss · web-flow · commit 886db26b5740 · 2026-06-12T18:56:02.000+08:00
* align smooth_l1_loss by adding it to compat module

* remove additional comments

* add coverage
diff --git a/python/paddle/compat/nn/__init__.py b/python/paddle/compat/nn/__init__.py
@@ -15,6 +15,7 @@
 from __future__ import annotations
 
 import collections
+import warnings
 from itertools import repeat
 from math import sqrt
 from typing import TYPE_CHECKING
@@ -48,6 +49,7 @@
     'AvgPool2d',
     'AvgPool3d',
     'MultiheadAttention',
+    'SmoothL1Loss',
 ]
 
 
@@ -745,6 +747,96 @@ def extra_repr(self) -> str:
         return f"dim={self.dim}"
 
 
+class SmoothL1Loss(nn.Layer):
+    r"""
+
+    PyTorch compatible version of :ref:`api_paddle_nn_SmoothL1Loss`, aligned with
+    ``torch.nn.SmoothL1Loss``. The per-element loss is
+
+    .. math::
+
+        z_i = \left\{\begin{array}{rcl}
+            0.5 (x_i - y_i)^2 / beta & & {if |x_i - y_i| < beta} \\
+            |x_i - y_i| - 0.5 * beta & & {otherwise}
+        \end{array} \right.
+
+    which equals Paddle's Huber loss divided by ``beta``. This differs from
+    :ref:`api_paddle_nn_SmoothL1Loss` whose default ``is_huber=True`` returns the
+    raw Huber loss.
+
+    Parameters:
+        size_average (bool|None, optional): Deprecated (see ``reduction``). When
+            ``size_average`` or ``reduce`` is not ``None``, it is translated into
+            ``reduction`` with a ``DeprecationWarning``. Default is ``None``.
+        reduce (bool|None, optional): Deprecated (see ``reduction``). Default is ``None``.
+        reduction (str, optional): Indicate how to calculate the loss, the candidates
+            are ``'none'`` | ``'mean'`` | ``'sum'``. Default is ``'mean'``.
+        beta (float, optional): Non-negative threshold at which to change between L1
+            and L2 loss. When ``beta == 0`` the loss degrades to the L1 loss, matching
+            PyTorch. Default is ``1.0``.
+
+    Call Parameters:
+        input (Tensor): Input tensor, the data type is float32 or float64.
+        target (Tensor): Label tensor with the same shape as ``input``.
+
+    Returns:
+        Tensor, The tensor storing the smooth L1 loss of ``input`` and ``target``.
+
+    Examples:
+        .. code-block:: pycon
+
+            >>> import paddle
+
+            >>> input = paddle.to_tensor([[0.5, 1.5], [2.0, 0.0]], dtype='float32')
+            >>> target = paddle.to_tensor([[1.0, 1.0], [1.0, 0.5]], dtype='float32')
+            >>> loss = paddle.compat.nn.SmoothL1Loss(beta=1.0)
+            >>> output = loss(input, target)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+                   0.21875000)
+    """
+
+    __constants__ = ["reduction", "beta"]
+    reduction: str
+    beta: float
+
+    @ForbidKeywordsDecorator(
+        illegal_keys={"delta", "is_huber", "name", "label"},
+        func_name="paddle.compat.nn.SmoothL1Loss",
+        correct_name="paddle.nn.SmoothL1Loss",
+    )
+    def __init__(
+        self,
+        size_average: bool | None = None,
+        reduce: bool | None = None,
+        reduction: str = 'mean',
+        beta: float = 1.0,
+    ) -> None:
+        super().__init__()
+        if size_average is not None or reduce is not None:
+            reduction = (
+                'none'
+                if reduce is False
+                else ('sum' if size_average is False else 'mean')
+            )
+            warnings.warn(
+                "'size_average' and 'reduce' args of 'SmoothL1Loss' will be "
+                f"deprecated, please use reduction='{reduction}' instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        self.reduction = reduction
+        self.beta = beta
+
+    def forward(self, input: Tensor, target: Tensor) -> Tensor:
+        return functional.smooth_l1_loss.__wrapped__(
+            input, target, reduction=self.reduction, beta=self.beta
+        )
+
+    def extra_repr(self) -> str:
+        return f"reduction={self.reduction}, beta={self.beta}"
+
+
 AvgPool1d = AvgPool1D
 AvgPool2d = AvgPool2D
 AvgPool3d = AvgPool3D
diff --git a/python/paddle/compat/nn/functional/__init__.py b/python/paddle/compat/nn/functional/__init__.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import warnings
 from typing import TYPE_CHECKING, Literal
 
 import paddle
@@ -39,6 +40,7 @@
     _PaddingTensorMode: TypeAlias = Literal[
         "zeros", "constant", "reflect", "replicate", "circular"
     ]
+    _ReduceMode: TypeAlias = Literal["mean", "sum", "none"]
 
 
 __all__ = [
@@ -48,6 +50,7 @@
     'linear',
     'scaled_dot_product_attention',
     'unfold',
+    'smooth_l1_loss',
 ]
 
 
@@ -380,3 +383,90 @@ def to_list_if_necessary(x):
         paddings=to_list_if_necessary(padding),
         dilations=to_list_if_necessary(dilation),
     )
+
+
+@ForbidKeywordsDecorator(
+    illegal_keys={"label", "delta", "is_huber", "name"},
+    func_name="paddle.compat.nn.functional.smooth_l1_loss",
+    correct_name="paddle.nn.functional.smooth_l1_loss",
+)
+def smooth_l1_loss(
+    input: Tensor,
+    target: Tensor,
+    size_average: bool | None = None,
+    reduce: bool | None = None,
+    reduction: _ReduceMode = 'mean',
+    beta: float = 1.0,
+) -> Tensor:
+    r"""
+
+    PyTorch compatible version of :ref:`api_paddle_nn_functional_smooth_l1_loss`.
+
+    Computes the Smooth L1 loss, aligned with ``torch.nn.functional.smooth_l1_loss``.
+    The per-element loss is:
+
+    .. math::
+
+        z_i = \left\{\begin{array}{rcl}
+            0.5 (x_i - y_i)^2 / beta & & {if |x_i - y_i| < beta} \\
+            |x_i - y_i| - 0.5 * beta & & {otherwise}
+        \end{array} \right.
+
+    This equals Paddle's Huber loss divided by ``beta`` (i.e. ``is_huber=False`` with
+    ``delta=beta``), which is the key difference from
+    :ref:`api_paddle_nn_functional_smooth_l1_loss` whose default ``is_huber=True``
+    returns the raw Huber loss.
+
+    Args:
+        input (Tensor): Input tensor, the data type is float32 or float64.
+        target (Tensor): Label tensor with the same shape as ``input``.
+        size_average (bool|None, optional): Deprecated (see ``reduction``). When
+            ``size_average`` or ``reduce`` is not ``None``, it is translated into
+            ``reduction`` with a ``DeprecationWarning``. Default is ``None``.
+        reduce (bool|None, optional): Deprecated (see ``reduction``). Default is ``None``.
+        reduction (str, optional): Indicate how to calculate the loss, the candidates
+            are ``'none'`` | ``'mean'`` | ``'sum'``. Default is ``'mean'``.
+        beta (float, optional): Specifies the threshold at which to change between L1
+            and L2 loss. The value must be non-negative. When ``beta == 0`` the loss
+            degrades to the L1 loss, matching PyTorch. Default is ``1.0``.
+
+    Returns:
+        Tensor, The tensor storing the smooth L1 loss of ``input`` and ``target``.
+
+    Examples:
+        .. code-block:: pycon
+
+            >>> import paddle
+
+            >>> input = paddle.to_tensor([[0.5, 1.5], [2.0, 0.0]], dtype='float32')
+            >>> target = paddle.to_tensor([[1.0, 1.0], [1.0, 0.5]], dtype='float32')
+            >>> output = paddle.compat.nn.functional.smooth_l1_loss(input, target, beta=1.0)
+            >>> print(output)
+            Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
+                   0.21875000)
+    """
+    # Translate PyTorch's deprecated size_average / reduce into reduction.
+    if size_average is not None or reduce is not None:
+        reduction = (
+            'none'
+            if reduce is False
+            else ('sum' if size_average is False else 'mean')
+        )
+        warnings.warn(
+            "'size_average' and 'reduce' args of 'smooth_l1_loss' will be "
+            f"deprecated, please use reduction='{reduction}' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+    if beta < 0:
+        raise ValueError(
+            f"smooth_l1_loss does not accept negative beta, but got beta={beta}."
+        )
+
+    if beta == 0:
+        return paddle.nn.functional.l1_loss(input, target, reduction=reduction)
+
+    return paddle.nn.functional.smooth_l1_loss(
+        input, target, reduction=reduction, delta=beta, is_huber=False
+    )
diff --git a/test/legacy_test/test_compat_smooth_l1_loss.py b/test/legacy_test/test_compat_smooth_l1_loss.py