loss multibinary log (#2141)

* add loss_multilabel_log * add alias template for loss_multilabel_log * add missing assert * increment truth iterator * rename loss to loss_multibinary_log * rename loss to loss_multibinary_log * explicitly capture dims in lambda
2024-11-01 10:14:53 +08:00 · 2020-08-24 11:15:16 +09:00 · 2020-08-24 11:15:16 +09:00 · dd06c1169b
commit dd06c1169b
parent d7ca478b79
3 changed files with 265 additions and 0 deletions
--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
@ -5,6 +5,7 @@

 #include "loss_abstract.h"
 #include "core.h"
+#include "utilities.h"
 #include "../matrix.h"
 #include "../cuda/tensor_tools.h"
 #include "../geometry.h"
@ -784,6 +785,123 @@ namespace dlib
    inline bool operator== (const loss_multimulticlass_log_::classifier_output& lhs, const std::string& rhs)
    { return rhs == static_cast<const std::string&>(lhs); }

+// ----------------------------------------------------------------------------------------
+
+    class loss_multibinary_log_
+    {
+    public:
+        typedef std::vector<float> training_label_type;
+        typedef std::vector<float> output_label_type;
+
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+            DLIB_CASSERT(output_tensor.nr() == 1 && output_tensor.nc() == 1);
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+
+            // Note that output_tensor.k() should match the number of labels.
+
+            const float* out_data = output_tensor.host();
+            for (long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                output_label_type predictions(output_tensor.k(), 0);
+                for (long k = 0; k < output_tensor.k(); ++k)
+                {
+                    predictions[k] = out_data[i * output_tensor.k() + k];
+                }
+                *iter++ = std::move(predictions);
+            }
+        }
+
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth,
+            SUBNET& sub
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            tensor& grad = sub.get_gradient_input();
+
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+            DLIB_CASSERT(input_tensor.num_samples() != 0);
+            DLIB_CASSERT(input_tensor.num_samples() % sub.sample_expansion_factor() == 0);
+            DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+            DLIB_CASSERT(output_tensor.nr() == 1 && output_tensor.nc() == 1);
+            DLIB_CASSERT(grad.nr() == 1 && grad.nc() == 1);
+
+            tt::sigmoid(grad, output_tensor);
+
+            // The loss we output is the average loss over the mini-batch.
+            const double scale = 1.0 / output_tensor.num_samples();
+            double loss = 0;
+            float* g = grad.host();
+            const float* out_data  = output_tensor.host();
+            for (long i = 0; i < output_tensor.num_samples(); ++i, ++truth)
+            {
+                for (long k = 0; k < output_tensor.k(); ++k)
+                {
+                    const float y = (*truth)[k];
+                    DLIB_CASSERT(y != 0, "y: " << y);
+                    const size_t idx = i * output_tensor.k() + k;
+                    if (y > 0)
+                    {
+                        const float temp = log1pexp(-out_data[idx]);
+                        loss += y * scale * temp;
+                        g[idx] = y * scale * (g[idx] - 1);
+                    }
+                    else
+                    {
+                        const float temp = -(-out_data[idx] - log1pexp(-out_data[idx]));
+                        loss += -y * scale * temp;
+                        g[idx] = -y * scale * g[idx];
+                    }
+                }
+            }
+            return loss;
+        }
+
+        friend void serialize(const loss_multibinary_log_&, std::ostream& out)
+        {
+            serialize("loss_multibinary_log_", out);
+        }
+
+        friend void deserialize(loss_multibinary_log_&, std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "loss_multibinary_log_")
+                throw serialization_error("Unexpected version found while deserializing dlib::loss_multibinary_log_.");
+        }
+
+        friend std::ostream& operator<<(std::ostream& out, const loss_multibinary_log_& )
+        {
+            out << "loss_multibinary_log";
+            return out;
+        }
+
+        friend void to_xml(const loss_multibinary_log_& /*item*/, std::ostream& out)
+        {
+            out << "<loss_multibinary_log/>";
+        }
+    };
+
+    template <typename SUBNET>
+    using loss_multibinary_log = add_loss_layer<loss_multibinary_log_, SUBNET>;
+
 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

--- a/dlib/dnn/loss_abstract.h
+++ b/dlib/dnn/loss_abstract.h
@ -693,6 +693,89 @@ namespace dlib
    inline bool operator== (const loss_multimulticlass_log_::classifier_output& lhs, const std::string& rhs)
    { return rhs == static_cast<const std::string&>(lhs); }

+// ----------------------------------------------------------------------------------------
+
+    class loss_multibinary_log_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object implements the loss layer interface defined above by
+                EXAMPLE_LOSS_LAYER_.  In particular, it implements a collection of
+                binary classifiers using the log loss, which is appropriate for
+                binary classification problems where each sample can belong to zero
+                or more categories.  Therefore, there are two possible classes of labels:
+                positive (> 0) and negative (< 0) when using this loss.
+                The absolute value of the label represents its weight.  Putting a larger
+                weight on a sample increases its importance of getting its prediction
+                correct during training.  A good rule of thumb is to use weights with
+                absolute value 1 unless you have a very unbalanced training dataset,
+                in that case, give larger weight to the class with less training examples.
+
+                This loss will cause the network to produce outputs > 0 when predicting a
+                member of the positive classes and values < 0 otherwise.
+
+                To be more specific, this object contains a sigmoid layer followed by a
+                cross-entropy layer.
+
+                An example will make its use clear.  So suppose, for example, that you want
+                to make a classifier for cats and dogs, but what happens if they both
+                appear in one image? Or none of them? This layer allows you to handle
+                those use cases by using the following labels:
+                    - std::vector<float> dog_label = {1.f, -1.f};
+                    - std::vector<float> cat_label = {-1.f , 1.f};
+                    - std::vector<float> both_label = {1.f, 1.f};
+                    - std::vector<float> none_label = {-1.f, -1.f};
+        !*/
+    public:
+        typedef std::vector<float> training_label_type;
+        typedef std::vector<float> output_label_type;
+
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
+            it has the additional calling requirements that: 
+                - sub.get_output().nr() == 1
+                - sub.get_output().nc() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+            and the output labels are the raw scores for each classified object.  If a score
+            is > 0 then the classifier is predicting the +1 class for that category, otherwise
+            it is predicting the -1 class.
+        !*/
+
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth,
+            SUBNET& sub
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient() 
+            except it has the additional calling requirements that:
+                - sub.get_output().nr() == 1
+                - sub.get_output().nc() == 1
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+                - all values pointed to by truth are std::vectors of non-zero elements.
+                  Nominally they should be +1 or -1, each indicating the desired class label.
+        !*/
+
+    };
+
+    template <typename SUBNET>
+    using loss_multibinary_log = add_loss_layer<loss_multibinary_log_, SUBNET>;
+
 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@ -3348,6 +3348,69 @@ namespace
        }
    }

+// ----------------------------------------------------------------------------------------
+
+    void test_loss_multibinary_log()
+    {
+        print_spinner();
+        dlib::rand rnd;
+
+        const long dims = 3;
+        const std::vector<float> empty_label(2, -1.f);
+        std::vector<matrix<float, 0, 1>> samples;
+        std::vector<std::vector<float>> labels(128, empty_label);
+
+        for (size_t i = 0; i < labels.size(); ++i)
+        {
+            matrix<float, 0, 1> x = matrix_cast<float>(randm(dims, 1)) * rnd.get_double_in_range(1, 9);
+            const auto norm = sqrt(sum(squared(x)));
+            if (norm < 3)
+            {
+                labels[i][0] = 1.f;
+            }
+            else if (3 <= norm && norm < 6)
+            {
+                labels[i][0] = 1.f;
+                labels[i][1] = 1.f;
+            }
+            else
+            {
+                labels[i][1] = 1.f;
+            }
+            samples.push_back(std::move(x));
+        }
+
+        using net_type = loss_multibinary_log<fc<2, relu<bn_fc<fc<10, input<matrix<float, 0, 1>>>>>>>;
+        net_type net;
+
+        auto compute_error = [&net, &samples, &labels, dims]()
+        {
+            const auto preds = net(samples);
+            double num_wrong = 0;
+            for (size_t i = 0; i < labels.size(); ++i)
+            {
+                for (size_t j = 0; j < labels[i].size(); ++j)
+                {
+                    if (labels[i][j] == 1 && preds[i][j] < 0 ||
+                        labels[i][j] == 0 && preds[i][j] > 0)
+                        ++num_wrong;
+                }
+            }
+            return num_wrong / labels.size() / dims;
+        };
+
+        dnn_trainer<net_type> trainer(net);
+        const auto error_before = compute_error();
+        trainer.set_learning_rate(0.1);
+        trainer.set_iterations_without_progress_threshold(10);
+        trainer.set_mini_batch_size(128);
+        trainer.set_min_learning_rate(1e-3);
+        trainer.train(samples, labels);
+        const auto error_after = compute_error();
+
+        DLIB_TEST_MSG(error_after < error_before && error_after == 0, "multibinary_log error increased after training");
+    }
+
 // ----------------------------------------------------------------------------------------

    void test_tensor_resize_bilinear(long samps, long k, long nr, long nc,  long onr, long onc)
@ -3771,6 +3834,7 @@ namespace
            test_loss_multiclass_per_pixel_with_noise_and_pixels_to_ignore();
            test_loss_multiclass_per_pixel_weighted();
            test_loss_multiclass_log_weighted();
+            test_loss_multibinary_log();
            test_serialization();
            test_loss_dot();
            test_loss_multimulticlass_log();