Made find_max_global() automatically apply a log-scale transform to variables

that obviously need it.
2024-11-01 10:14:53 +08:00 · 2017-11-24 21:44:09 -05:00 · 2017-11-24 21:44:09 -05:00 · fc6cce9f89
commit fc6cce9f89
parent 99621934ff
2 changed files with 67 additions and 3 deletions
--- a/dlib/global_optimization/find_max_global.h
+++ b/dlib/global_optimization/find_max_global.h
@ -117,21 +117,55 @@ template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> de
        >
    std::pair<size_t,function_evaluation> find_max_global (
        std::vector<funct>& functions,
-        const std::vector<function_spec>& specs,
+        std::vector<function_spec> specs,
        const max_function_calls num,
        const std::chrono::nanoseconds max_runtime = FOREVER,
        double solver_epsilon = 1e-11
    ) 
    {
+        // Decide which parameters should be searched on a log scale.  Basically, it's
+        // common for machine learning models to have parameters that should be searched on
+        // a log scale (e.g. SVM C).  These parameters are usually identifiable because
+        // they have bounds like [1e-5 1e10], that is, they span a very large range of
+        // magnitudes from really small to really big.  So there we are going to check for
+        // that and if we find parameters with that kind of bound constraints we will
+        // transform them to a log scale automatically.
+        std::vector<std::vector<bool>> log_scale(specs.size());
+        for (size_t i = 0; i < specs.size(); ++i)
+        {
+            for (long j = 0; j < specs[i].lower.size(); ++j)
+            {
+                if (!specs[i].is_integer_variable[j] && specs[i].lower(j) > 0 && specs[i].upper(j)/specs[i].lower(j) > 1000)
+                {
+                    log_scale[i].push_back(true);
+                    specs[i].lower(j) = std::log(specs[i].lower(j));
+                    specs[i].upper(j) = std::log(specs[i].upper(j));
+                }
+                else
+                {
+                    log_scale[i].push_back(false);
+                }
+            }
+        }
+
        global_function_search opt(specs);
        opt.set_solver_epsilon(solver_epsilon);

        const auto time_to_stop = std::chrono::steady_clock::now() + max_runtime;

+        // Now run the main solver loop.
        for (size_t i = 0; i < num.max_calls && std::chrono::steady_clock::now() < time_to_stop; ++i)
        {
            auto next = opt.get_next_x();
-            double y = call_function_and_expand_args(functions[next.function_idx()], next.x());
+            matrix<double,0,1> x = next.x();
+            // Undo any log-scaling that was applied to the variables before we pass them
+            // to the functions being optimized.
+            for (long j = 0; j < x.size(); ++j)
+            {
+                if (log_scale[next.function_idx()][j])
+                    x(j) = std::exp(x(j));
+            }
+            double y = call_function_and_expand_args(functions[next.function_idx()], x);
            next.set(y);
        }

@ -140,6 +174,12 @@ template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> de
        double y;
        size_t function_idx;
        opt.get_best_function_eval(x,y,function_idx);
+        // Undo any log-scaling that was applied to the variables before we output them. 
+        for (long j = 0; j < x.size(); ++j)
+        {
+            if (log_scale[function_idx][j])
+                x(j) = std::exp(x(j));
+        }
        return std::make_pair(function_idx, function_evaluation(x,std::move(y)));
    }

@ -160,7 +200,7 @@ template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> de
    {
        std::vector<funct> functions(1,std::move(f));
        std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable));
-        return find_max_global(functions, specs, num, max_runtime, solver_epsilon).second;
+        return find_max_global(functions, std::move(specs), num, max_runtime, solver_epsilon).second;
    }

 // ----------------------------------------------------------------------------------------
--- a/dlib/global_optimization/find_max_global_abstract.h
+++ b/dlib/global_optimization/find_max_global_abstract.h
@ -118,6 +118,18 @@ namespace dlib
            - find_max_global() runs until one of the following is true:
                - The total number of calls to the provided functions is == num.max_calls
                - More than max_runtime time has elapsed since the start of this function.
+            - Any variables that satisfy the following conditions are optimized on a log-scale:
+                - The lower bound on the variable is > 0
+                - The ratio of the upper bound to lower bound is > 1000
+                - The variable is not an integer variable
+              We do this because it's common to optimize machine learning models that have
+              parameters with bounds in a range such as [1e-5 to 1e10] (e.g. the SVM C
+              parameter) and it's much more appropriate to optimize these kinds of
+              variables on a log scale.  So we transform them by applying std::log() to
+              them and then undo the transform via std::exp() before invoking the function
+              being optimized.  Therefore, this transformation is invisible to the user
+              supplied functions.  In most cases, it improves the efficiency of the
+              optimizer.
    !*/

 // ----------------------------------------------------------------------------------------
@ -170,6 +182,18 @@ namespace dlib
            - find_max_global() runs until one of the following is true:
                - The total number of calls to f() is == num.max_calls
                - More than max_runtime time has elapsed since the start of this function.
+            - Any variables that satisfy the following conditions are optimized on a log-scale:
+                - The lower bound on the variable is > 0
+                - The ratio of the upper bound to lower bound is > 1000
+                - The variable is not an integer variable
+              We do this because it's common to optimize machine learning models that have
+              parameters with bounds in a range such as [1e-5 to 1e10] (e.g. the SVM C
+              parameter) and it's much more appropriate to optimize these kinds of
+              variables on a log scale.  So we transform them by applying std::log() to
+              them and then undo the transform via std::exp() before invoking the function
+              being optimized.  Therefore, this transformation is invisible to the user
+              supplied functions.  In most cases, it improves the efficiency of the
+              optimizer.
    !*/

 // ----------------------------------------------------------------------------------------