Refactored the Platt scaling code a little. Now there is a function,

learn_platt_scaling(), that allows you to directly call the Platt scaling code without supplying a trainer object.
2024-11-01 10:14:53 +08:00 · 2013-01-17 20:22:44 -05:00 · 2013-01-17 20:22:44 -05:00 · a2ae46cd03
commit a2ae46cd03
parent 03ec260cb3
2 changed files with 114 additions and 38 deletions
--- a/dlib/svm/svm.h
+++ b/dlib/svm/svm.h
@ -557,6 +557,77 @@ namespace dlib
        };
    }

+// ----------------------------------------------------------------------------------------
+
+    inline double platt_score (
+        const std::pair<double,double>& params,
+        const double score
+    )
+    {
+        return 1/(1 + std::exp(params.first*score + params.second));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename alloc>
+    std::pair<T,T> learn_platt_scaling (
+        const std::vector<T,alloc>& scores,
+        const std::vector<T,alloc>& labels
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_binary_classification_problem(scores,labels) == true,
+            "\t std::pair<T,T> learn_platt_scaling()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t scores.size(): " << scores.size() 
+            << "\n\t labels.size(): " << labels.size() 
+            << "\n\t is_binary_classification_problem(scores,labels): " << is_binary_classification_problem(scores,labels)
+            );
+
+        const T prior0 = sum(mat(labels)>0); 
+        const T prior1 = sum(mat(labels)<0);
+        const T hi_target = (prior1+1)/(prior1+2);
+        const T lo_target = 1.0/(prior0+2);
+
+        std::vector<T,alloc> target;
+        for (unsigned long i = 0; i < labels.size(); ++i)
+        {
+            // if this was a positive example
+            if (labels[i] == +1.0)
+            {
+                target.push_back(hi_target);
+            }
+            else if (labels[i] == -1.0)
+            {
+                target.push_back(lo_target);
+            }
+            else
+            {
+                throw dlib::error("invalid input labels to the learn_platt_scaling() function.");
+            }
+        }
+
+        // Now find the maximum likelihood parameters of the sigmoid.  
+
+        prob_impl::objective<std::vector<T,alloc> > obj(scores, target);
+        prob_impl::der<std::vector<T,alloc> > obj_der(scores, target);
+        prob_impl::hessian<std::vector<T,alloc> > obj_hessian(scores, target);
+
+        matrix<double,2,1> val;
+        val = 0;
+        find_min(newton_search_strategy(obj_hessian),
+                 objective_delta_stop_strategy(),
+                 obj,
+                 obj_der,
+                 val,
+                 0);
+
+        const double A = val(0);
+        const double B = val(1);
+
+        return std::make_pair(A,B);
+    }
+
 // ----------------------------------------------------------------------------------------

    template <
@ -617,19 +688,11 @@ namespace dlib
        x_train.resize(num_pos_train_samples + num_neg_train_samples);
        y_train.resize(num_pos_train_samples + num_neg_train_samples);

-        typedef std::vector<scalar_type > dvector;
-
-        dvector out;
-        dvector target;
+        std::vector<scalar_type> out, out_label;

        long pos_idx = 0;
        long neg_idx = 0;

-        const scalar_type prior0 = num_pos_test_samples*folds; 
-        const scalar_type prior1 = num_neg_test_samples*folds; 
-        const scalar_type hi_target = (prior1+1)/(prior1+2);
-        const scalar_type lo_target = 1.0/(prior0+2);
-
        for (long i = 0; i < folds; ++i)
        {
            long cur = 0;
@ -695,40 +758,15 @@ namespace dlib
            for (unsigned long i = 0; i < x_test.size(); ++i)
            {
                out.push_back(d(x_test[i]));
-                // if this was a positive example
-                if (y_test[i] == +1.0)
-                {
-                    target.push_back(hi_target);
-                }
-                else if (y_test[i] == -1.0)
-                {
-                    target.push_back(lo_target);
-                }
-                else
-                {
-                    throw dlib::error("invalid input labels to the train_probabilistic_decision_function() function");
-                }
+                out_label.push_back(y_test[i]);
            }

        } // for (long i = 0; i < folds; ++i)

-        // Now find the maximum likelihood parameters of the sigmoid.  
+        std::pair<scalar_type,scalar_type> params = learn_platt_scaling(out, out_label);

-        prob_impl::objective<dvector> obj(out, target);
-        prob_impl::der<dvector> obj_der(out, target);
-        prob_impl::hessian<dvector> obj_hessian(out, target);
-
-        matrix<double,2,1> val;
-        val = 0;
-        find_min(newton_search_strategy(obj_hessian),
-                 objective_delta_stop_strategy(),
-                 obj,
-                 obj_der,
-                 val,
-                 0);
-
-        const double A = val(0);
-        const double B = val(1);
+        const double A = params.first;
+        const double B = params.second;

        return probabilistic_function<typename trainer_type::trained_function_type>( A, B, trainer.train(x,y) );
    }
--- a/dlib/svm/svm_abstract.h
+++ b/dlib/svm/svm_abstract.h
@ -130,6 +130,43 @@ namespace dlib

 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    inline double platt_score (
+        const std::pair<double,double>& params,
+        const double score
+    );
+    /*!
+        ensures
+            - returns 1/(1 + std::exp(params.first*score + params.second))
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename alloc>
+    std::pair<T,T> learn_platt_scaling (
+        const std::vector<T,alloc>& scores,
+        const std::vector<T,alloc>& labels
+    );
+    /*!
+        requires
+            - T should be either float, double, or long double 
+            - is_binary_classification_problem(scores,labels) == true
+        ensures
+            - This function learns to map scalar values into well calibrated probabilities
+              using Platt scaling.  In particular, it returns a params object such that:
+                - for all valid i:
+                    - platt_score(params,scores[i]) == the scaled version of the scalar value
+                      scores[i].  That is, the output is a number between 0 and 1.
+            - This function is an implementation of the algorithm described in the following
+              papers: 
+                Probabilistic Outputs for Support Vector Machines and Comparisons to
+                Regularized Likelihood Methods by John C. Platt.  March 26, 1999
+
+                A Note on Platt's Probabilistic Outputs for Support Vector Machines
+                by Hsuan-Tien Lin, Chih-Jen Lin, and Ruby C. Weng
+    !*/
+
 // ----------------------------------------------------------------------------------------

    template <
@ -156,6 +193,7 @@ namespace dlib
            - The parameters of the probability model are estimated by performing k-fold 
              cross validation. 
            - The number of folds used is given by the folds argument.
+            - This function is implemented using learn_platt_scaling()
        throws
            - any exceptions thrown by trainer.train()
            - std::bad_alloc