PaddlePaddle · luotao1 · Apr 13, 2023 · Feb 23, 2023 · Feb 26, 2023 · Feb 27, 2023
diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_nll_loss.py b/python/paddle/fluid/tests/unittests/test_gaussian_nll_loss.py
@@ -0,0 +1,166 @@
+#   Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+import paddle.fluid.core as core
+import paddle.nn.functional as F
+
+np.random.seed(10)
+
+
+def ref_gaussian_nll_loss(
+    input, target, var, full=False, eps=1e-6, reduction='none'
+):
+    if var.shape != input.shape:
+        if input.shape[:-1] == var.shape:
+            var = np.expand_dims(var, -1)
+        elif input.shape[:-1] == var.shape[:-1] and var.shape[-1] == 1:
+            pass
+        else:
+            raise ValueError("var is of incorrect size")
+    if reduction != 'none' and reduction != 'mean' and reduction != 'sum':
+        raise ValueError(reduction + " is not valid")
+
+    if np.any(var < 0):
+        raise ValueError("var has negative entry/entries")
+
+    var = var.copy()
+    var = np.clip(var, a_min=eps, a_max=None)
+
+    loss = 0.5 * (np.log(var) + (input - target) ** 2 / var)
+    if full:
+        loss += 0.5 * np.log(2 * np.pi)
+
+    if reduction == 'none':
+        return loss
+    elif reduction == 'sum':
+        return [np.sum(loss)]
+    elif reduction == 'mean':
+        return [np.mean(loss)]
+
+
+class TestGaussianNLLLossAPI(unittest.TestCase):
+    # test paddle.nn.functional.gaussian_nll_loss, paddle.nn.gaussian_nll_loss
+
+    def setUp(self, type=None):
+        self.shape = [10, 2]
+        if type == 'float64':
+            self.input_np = np.random.random(self.shape).astype(np.float64)
+            self.target_np = np.random.random(self.shape).astype(np.float64)
+            self.var_np = np.ones(self.shape).astype(np.float64)
+        elif type == 'broadcast':
+            self.shape = [10, 2, 3]
+            self.broadcast_shape = [10, 2]
+            self.input_np = np.random.random(self.shape).astype(np.float32)
+            self.target_np = np.random.random(self.shape).astype(np.float32)
+            self.var_np = np.ones(self.broadcast_shape).astype(np.float32)
+        else:
+            self.input_np = np.random.random(self.shape).astype(np.float32)
+            self.target_np = np.random.random(self.shape).astype(np.float32)
+            self.var_np = np.ones(self.shape).astype(np.float32)
+
+        self.place = (
+            paddle.CUDAPlace(0)
+            if core.is_compiled_with_cuda()
+            else paddle.CPUPlace()
+        )
+
+    def test_dynamic_case(self, type=None, full=False, reduction='none'):
+        self.setUp(type)
+        out_ref = ref_gaussian_nll_loss(
+            self.input_np,
+            self.target_np,
+            self.var_np,
+            full=full,
+            reduction=reduction,
+        )
+        paddle.disable_static(self.place)
+
+        input_x = paddle.to_tensor(self.input_np)
+        target = paddle.to_tensor(self.target_np)
+        var = paddle.to_tensor(self.var_np)
+        out1 = F.gaussian_nll_loss(
+            input_x, target, var, full=full, reduction=reduction
+        )
+        gaussian_nll_loss = paddle.nn.GaussianNLLLoss(full, reduction=reduction)
+        out2 = gaussian_nll_loss(input_x, target, var)
+
+        for r in [out1, out2]:
+            self.assertEqual(
+                np.allclose(out_ref, r.numpy(), rtol=1e-8, atol=1e-7), True
+            )
+        paddle.enable_static()
+
+    def test_static_case(self, type=None, full=False, reduction='none'):
+        self.setUp(type)
+        out_ref = ref_gaussian_nll_loss(
+            self.input_np,
+            self.target_np,
+            self.var_np,
+            full=full,
+            reduction=reduction,
+        )
+        paddle.enable_static()
+        with paddle.static.program_guard(paddle.static.Program()):
+            if type == 'float64':
+                input_x = paddle.static.data('Input_x', self.shape, type)
+                target = paddle.static.data('Target', self.shape, type)
+                var = paddle.static.data('Var', self.shape, type)
+            elif type == 'broadcast':
+                input_x = paddle.static.data('Input_x', self.shape)
+                target = paddle.static.data('Target', self.shape)
+                var = paddle.static.data('Var', self.broadcast_shape)
+            else:
+                input_x = paddle.static.data('Input_x', self.shape, 'float32')
+                target = paddle.static.data('Target', self.shape, 'float32')
+                var = paddle.static.data('Var', self.shape, 'float32')
+            out1 = F.gaussian_nll_loss(
+                input_x, target, var, full=full, reduction=reduction
+            )
+            gaussian_nll_loss = paddle.nn.GaussianNLLLoss(
+                full, reduction=reduction
+            )
+            out2 = gaussian_nll_loss(input_x, target, var)
+
+            exe = paddle.static.Executor(self.place)
+            res = exe.run(
+                feed={
+                    'Input_x': self.input_np,
+                    'Target': self.target_np,
+                    'Var': self.var_np,
+                },
+                fetch_list=[out1, out2],
+            )
+        for r in res:
+            self.assertEqual(
+                np.allclose(out_ref, r, rtol=1e-8, atol=1e-7), True
+            )
+
+    def test_api(self):
+        self.test_dynamic_case('float64')
+        self.test_dynamic_case('broadcast')
+        self.test_dynamic_case()
+        self.test_dynamic_case(full=True, reduction='mean')
+        self.test_static_case(full=True, reduction='mean')
+        self.test_static_case()
+        self.test_static_case('broadcast')
+        self.test_static_case('float64')
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
@@ -114,6 +114,8 @@
 from .layer.loss import TripletMarginWithDistanceLoss
 from .layer.loss import TripletMarginLoss
 from .layer.loss import SoftMarginLoss
+from .layer.loss import GaussianNLLLoss
+
 from .layer.norm import BatchNorm  # noqa: F401
 from .layer.norm import SyncBatchNorm  # noqa: F401
 from .layer.norm import GroupNorm  # noqa: F401
@@ -332,4 +334,5 @@ def weight_norm(*args):
     'TripletMarginWithDistanceLoss',
     'TripletMarginLoss',
     'SoftMarginLoss',
+    'GaussianNLLLoss',
 ]
diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py
@@ -98,6 +98,8 @@
 from .loss import triplet_margin_with_distance_loss
 from .loss import triplet_margin_loss
 from .loss import soft_margin_loss
+from .loss import gaussian_nll_loss
+
 from .norm import batch_norm  # noqa: F401
 from .norm import instance_norm  # noqa: F401
 from .norm import layer_norm  # noqa: F401
@@ -246,4 +248,5 @@
     'triplet_margin_loss',
     'multi_margin_loss',
     'soft_margin_loss',
+    'gaussian_nll_loss',
 ]
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
+
 # TODO: define loss functions of neural network
 import paddle
 import paddle.fluid as fluid
@@ -3884,3 +3886,136 @@ def soft_margin_loss(input, label, reduction='mean', name=None):
         return paddle.mean(out, name=name)
     else:
         return out
+
+
+def gaussian_nll_loss(
+    input, target, var, full=False, eps=1e-6, reduction='mean', name=None
+):
+    r"""Gaussian negative log likelihood loss.
+
+    The targets are treated as samples from Gaussian distributions with
+    expectations and variances predicted by the neural network. For a
+    ``target`` tensor modelled as having Gaussian distribution with a tensor
+    of expectations ``input`` and a tensor of positive variances ``var`` the loss is:
+
+    .. math::
+        \text{loss} = \frac{1}{2}\left(\log\left(\text{max}\left(\text{var},
+        \ \text{eps}\right)\right) + \frac{\left(\text{input} - \text{target}\right)^2}
+        {\text{max}\left(\text{var}, \ \text{eps}\right)}\right) + \text{const.}
+
+    where :attr:`eps` is used for stability. By default, the constant term of
+    the loss function is omitted unless :attr:`full` is ``True``. If ``var`` is not the same
+    size as ``input`` (due to a homoscedastic assumption), it must either have a final dimension
+    of 1 or have one fewer dimension (with all other sizes being the same) for correct broadcasting.
+
+    Args:
+        input(Tensor): input tensor, expectation of the Gaussian distribution, available dtype is float32, float64.
+        target(Tensor): target tensor, sample from the Gaussian distribution, available dtype is float32, float64.
+        var(Tensor): tensor of positive variance(s), one for each of the expectations
+            in the input (heteroscedastic), or a single one (homoscedastic), available dtype is float32, float64.
+        full (bool, optional): include the constant term in the loss
+            calculation. Default: ``False``.
+        eps (float, optional): value used to clamp ``var`` (see note below), for
+            stability. Default: 1e-6.
+        reduction (str, optional): specifies the reduction to apply to the
+            output:``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction
+            will be applied, ``'mean'``: the output is the average of all batch
+            member losses, ``'sum'``: the output is the sum of all batch member
+            losses. Default: ``'mean'``.
+        name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+
+    Shape:
+        - Input: :math:`(N, *)` or :math:`(*)` where :math:`*` means any number of additional
+          dimensions
+        - Target: :math:`(N, *)` or :math:`(*)`, same shape as the input, or same shape as the input
+          but with one dimension equal to 1 (to allow for broadcasting)
+        - Var: :math:`(N, *)` or :math:`(*)`, same shape as the input, or same shape as the input but
+          with one dimension equal to 1, or same shape as the input but with one fewer
+          dimension (to allow for broadcasting)
+        - Output: scalar if :attr:`reduction` is ``'mean'`` (default) or
+          ``'sum'``. If :attr:`reduction` is ``'none'``, then :math:`(N, *)`, same
+          shape as the input
+
+    Examples::
+        .. code-block:: python
+            import paddle
+            import paddle.nn.functional as F
+
+            input = paddle.randn([5, 2], dtype=paddle.float32)
+            target = paddle.randn([5, 2], dtype=paddle.float32)
+            var = paddle.ones([5, 2], dtype=paddle.float32)
+
+            loss = F.multi_label_soft_margin_loss(input, target, var, reduction='none')
+            print(loss)
+
+            loss = F.multi_label_soft_margin_loss(input, target, var, reduction='mean')
+            print(loss)
+
+
+    Note:
+        The clamping of ``var`` is ignored with respect to autograd, and so the
+        gradients are unaffected by it.
+    """
+
+    # Check var shape
+    # If var.shape == input.shape, the case is heteroscedastic and no further checks are needed.
+    # Otherwise:
+    if var.shape != input.shape:
+        # If var is one dimension short of input, but the shape match otherwise, then this is a homoscedastic case.
+        # e.g. input.shape = (10, 2, 3), var.shape = (10, 2)
+        # -> unsqueeze var so that var.shape = (10, 2, 1)
+        # this is done so that broadcasting can happen in the loss calculation
+        if input.shape[:-1] == var.shape:
+            var = paddle.unsqueeze(var, -1)
+        # This checks if the shape match up to the final dimension, and the final dimension of var is of shape 1.
+        # This is also a homoscedastic case.
+        # e.g. input.shape = (10, 2, 3), var.shape = (10, 2, 1)
+        elif (
+            input.shape[:-1] == var.shape[:-1] and var.shape[-1] == 1
+        ):  # Heteroscedastic case
+            pass
+        # If none of the above pass, then the shape of var is incorrect.
+        else:
+            raise ValueError("var is of incorrect shape")
+
+    # Check validity of reduction mode
+    if reduction != 'none' and reduction != 'mean' and reduction != 'sum':
+        raise ValueError(reduction + " is not valid")
+
+    # Entries of var must be non-negative
+    # print(paddle.any(var < 0))
+    # if paddle.any(var < 0):
+    #     raise ValueError("var has negative entry/entries")
+
+    if not in_dygraph_mode():
+        check_variable_and_dtype(
+            input, 'Input', ['float32', 'float64'], 'gaussian_nll_loss'
+        )
+        check_variable_and_dtype(
+            target,
+            'Target',
+            ['float32', 'float64'],
+            'gaussian_nll_loss',
+        )
+        check_variable_and_dtype(
+            var,
+            'Var',
+            ['float32', 'float64'],
+            'gaussian_nll_loss',
+        )
+
+    # Clamp for stability
+    var = var.clone()
+    with paddle.no_grad():
+        var = paddle.clip(var, min=eps)
+    # Calculate the loss
+    loss = 0.5 * (paddle.log(var) + paddle.square(input - target) / var)
+    if full:
+        loss += 0.5 * math.log(2 * math.pi)
+
+    if reduction == 'mean':
+        return loss.mean()
+    elif reduction == 'sum':
+        return loss.sum()
+    else:
+        return loss
diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py
@@ -84,6 +84,8 @@
 from .loss import TripletMarginLoss
 from .loss import SoftMarginLoss
 from .loss import MultiMarginLoss
+from .loss import GaussianNLLLoss
+
 from .norm import BatchNorm1D  # noqa: F401
 from .norm import BatchNorm2D  # noqa: F401
 from .norm import BatchNorm3D  # noqa: F401