Added another overload of poly_min_extrap() and also improved the speed of

backtracking_line_search() by making it use 3rd degree polynomial interpolation after the first step. Also made it more robust to alpha inputs with improper signs.
2024-11-01 10:14:53 +08:00 · 2013-09-23 22:53:08 -04:00 · 2013-09-23 22:53:08 -04:00 · 8199ae1a48
commit 8199ae1a48
parent 53c908e72c
2 changed files with 100 additions and 7 deletions
--- a/dlib/optimization/optimization_line_search.h
+++ b/dlib/optimization/optimization_line_search.h
@ -183,6 +183,57 @@ namespace dlib
        return put_in_range(0,1,alpha);
    }

+// ----------------------------------------------------------------------------------------
+
+    inline double poly_min_extrap (
+        double f0,
+        double d0,
+        double x1,
+        double f_x1,
+        double x2,
+        double f_x2
+    )
+    {
+        DLIB_ASSERT(0 < x1 && x1 < x2,"Invalid inputs were given to this function");
+        // The contents of this function follow the equations described on page 58 of the
+        // book Numerical Optimization by Nocedal and Wright, second edition.
+        matrix<double,2,2> m;
+        matrix<double,2,1> v;
+
+        const double aa2 = x2*x2;
+        const double aa1 = x1*x1;
+        m =  aa2,       -aa1,
+            -aa2*x2, aa1*x1;   
+        v = f_x1 - f0 - d0*x1,
+            f_x2 - f0 - d0*x2;
+
+
+        double temp = aa2*aa1*(x1-x2);
+
+        // just take a guess if this happens
+        if (temp == 0)
+        {
+            return x1/2.0;
+        }
+
+        matrix<double,2,1> temp2;
+        temp2 = m*v/temp;
+        const double a = temp2(0);
+        const double b = temp2(1);
+
+        temp = b*b - 3*a*d0;
+        if (temp < 0 || a == 0)
+        {
+            // This is probably a line so just pick the lowest point
+            if (f0 < f_x2)
+                return 0;
+            else
+                return x2;
+        }
+        temp = (-b + std::sqrt(temp))/(3*a);
+        return put_in_range(0, x2, temp);
+    }
+
 // ----------------------------------------------------------------------------------------

    inline double lagrange_poly_min_extrap (
@ -447,11 +498,17 @@ namespace dlib
            << "\n\t max_iter: " << max_iter 
        );

-        // If the gradient is telling us we need to search backwards then that is what we
-        // will do.
-        if (d0 > 0 && alpha > 0)
+        // make sure alpha is going in the right direction.  That is, it should be opposite
+        // the direction of the gradient.
+        if ((d0 > 0 && alpha > 0) ||
+            (d0 < 0 && alpha < 0))
+        {
            alpha *= -1;
+        }

+        bool have_prev_alpha = false;
+        double prev_alpha = 0;
+        double prev_val = 0;
        unsigned long iter = 0;
        while (true)
        {
@ -466,12 +523,26 @@ namespace dlib
                // Interpolate a new alpha.  We also make sure the step by which we
                // reduce alpha is not super small.
                double step;
-                if (d0 < 0)
-                    step = put_in_range(0.1,0.9, poly_min_extrap(f0, d0, val));
+                if (!have_prev_alpha)
+                {
+                    if (d0 < 0)
+                        step = alpha*put_in_range(0.1,0.9, poly_min_extrap(f0, d0, val));
+                    else
+                        step = alpha*put_in_range(0.1,0.9, poly_min_extrap(f0, -d0, val));
+                    have_prev_alpha = true;
+                }
                else
-                    step = put_in_range(0.1,0.9, poly_min_extrap(f0, -d0, val));
+                {
+                    if (d0 < 0)
+                        step = put_in_range(0.1*alpha,0.9*alpha, poly_min_extrap(f0, d0, alpha, val, prev_alpha, prev_val));
+                    else
+                        step = put_in_range(0.1*alpha,0.9*alpha, -poly_min_extrap(f0, -d0, -alpha, val, -prev_alpha, prev_val));
+                }

-                alpha *= step;
+                prev_alpha = alpha;
+                prev_val = val;
+
+                alpha = step;
            }
        }
    }
--- a/dlib/optimization/optimization_line_search_abstract.h
+++ b/dlib/optimization/optimization_line_search_abstract.h
@ -119,6 +119,28 @@ namespace dlib
            - returns the point in the range [0,1] that minimizes the polynomial c(x) 
    !*/

+// ----------------------------------------------------------------------------------------
+
+    inline double poly_min_extrap (
+        double f0,
+        double d0,
+        double x1,
+        double f_x1,
+        double x2,
+        double f_x2
+    )
+    /*!
+        requires
+            - 0 < x1 < x2
+        ensures
+            - let f(x) be a 3rd degree polynomial such that:
+                - f(0) == f0
+                - derivative of f(x) at x==0 is d0
+                - f(x1) == f_x1
+                - f(x2) == f_x2
+            - returns the point in the range [0,x2] that minimizes the polynomial f(x) 
+    !*/
+
 // ----------------------------------------------------------------------------------------

    inline double lagrange_poly_min_extrap (