Added solve_trust_region_subproblem_bounded()

2024-11-01 10:14:53 +08:00 · 2017-11-12 15:16:25 -05:00 · 2017-11-12 15:16:25 -05:00 · 61b6c1ff78
commit 61b6c1ff78
parent 1e90fc6dbd
3 changed files with 236 additions and 0 deletions
--- a/dlib/optimization/optimization_trust_region.h
+++ b/dlib/optimization/optimization_trust_region.h
@ -225,6 +225,168 @@ namespace dlib
        return max_iter+1;
    }

+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <
+            typename EXP1,
+            typename EXP2,
+            typename EXP3
+            >
+        bool bounds_violated (
+            const matrix_exp<EXP1>& v,
+            const matrix_exp<EXP2>& l,
+            const matrix_exp<EXP3>& u
+        )
+        {
+            DLIB_ASSERT(v.nr() == l.nr() && v.nr() == u.nr());
+            DLIB_ASSERT(v.nc() == l.nc() && v.nc() == u.nc());
+            for (long r = 0; r < v.nr(); ++r)
+            {
+                for (long c = 0; c < v.nc(); c++)
+                {
+                    if (!(l(r,c) <= v(r,c) && v(r,c) <= u(r,c)))
+                        return true;
+                }
+            }
+            return false;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP1,
+        typename EXP2,
+        typename T, long NR, long NC, typename MM, typename L,
+        typename EXP3
+        >
+    void solve_trust_region_subproblem_bounded ( 
+        const matrix_exp<EXP1>& B_,
+        const matrix_exp<EXP2>& g_,
+        const typename EXP1::type radius_,
+        matrix<T,NR,NC,MM,L>& p_,
+        double eps,
+        unsigned long max_iter,
+        const matrix_exp<EXP3>& lower_,
+        const matrix_exp<EXP3>& upper_
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(B_.nr() == B_.nc() && is_col_vector(g_) && g_.size() == B_.nr(),
+            "\t unsigned long solve_trust_region_subproblem_bounded()"
+            << "\n\t invalid arguments were given to this function"
+            << "\n\t B_.nr():            " << B_.nr()
+            << "\n\t B_.nc():            " << B_.nc()
+            << "\n\t is_col_vector(g_):  " << is_col_vector(g_) 
+            << "\n\t g_.size():          " << g_.size() 
+            );
+        DLIB_ASSERT(radius_ > 0 && eps > 0 && max_iter > 0,
+            "\t unsigned long solve_trust_region_subproblem_bounded()"
+            << "\n\t invalid arguments were given to this function"
+            << "\n\t radius_:   " << radius_
+            << "\n\t eps:      " << eps 
+            << "\n\t max_iter: " << max_iter 
+            );
+        DLIB_ASSERT(is_col_vector(lower_) && lower_.size() == g_.size());
+        DLIB_ASSERT(is_col_vector(upper_) && upper_.size() == g_.size());
+        DLIB_ASSERT(max(upper_-lower_) >= 0);
+        // make sure the problem is feasible.  That is, there should be a point inside the
+        // lower and upper bounds that has a norm <= radius_
+        DLIB_ASSERT(length(clamp(zeros_matrix(lower_),lower_,upper_)) <= radius_, 
+            "The lower and upper bounds are incompatible with the radius since there is no point within the bounds with a norm less than the radius.");
+
+        // We are going to solve this by greedily finding the most violated bound constraint,
+        // locking that variable to its constrained value, removing it from the problem,
+        // and then resolving.  We do that until no more constraint violations are present.
+
+        solve_trust_region_subproblem(B_,g_,radius_,p_,eps,max_iter);
+
+
+        // just stop here if all the bounds are satisfied.
+        if (!impl::bounds_violated(p_, lower_, upper_))
+            return;
+
+        matrix<double> B = matrix_cast<double>(B_);
+        matrix<double,0,1> g = matrix_cast<double>(g_);
+        double radius = radius_;
+        matrix<double,0,1> p = matrix_cast<double>(p_);
+        matrix<double,0,1> lower = matrix_cast<double>(lower_);
+        matrix<double,0,1> upper = matrix_cast<double>(upper_);
+
+        // keep a table that tells us how to map any reduced QP back to the original QP
+        std::vector<long> idxs(g.size());
+        for (size_t i = 0; i < idxs.size(); ++i)
+            idxs[i] = i;
+
+
+        // while we haven't found a p that satisfies the bounds constraints
+        while(impl::bounds_violated(p, lower, upper) )
+        {
+            // Find the most violated variable and fix its value to a constant (the bound
+            // value).
+            long most_violated_idx = 0;
+            double max_violation = 0;
+            double bounded_value = 0;
+            for (long i = 0; i < lower.size(); ++i)
+            {
+                if (!(lower(i) <= p(i) && p(i) <= upper(i)))
+                {
+                    if (lower(i)-p(i) > max_violation)
+                    {
+                        max_violation = lower(i)-p(i);
+                        most_violated_idx = i;
+                        bounded_value = lower(i);
+                    }
+                    else if (p(i)-upper(i) > max_violation)
+                    {
+                        max_violation = p(i)-upper(i);
+                        most_violated_idx = i;
+                        bounded_value = upper(i);
+                    }
+                }
+            }
+
+            // assign this variable to its final value.
+            p_(idxs[most_violated_idx]) = bounded_value;
+
+
+            // now reduce the QP by removing the variable p_(idxs[most_violated_idx]).  
+            idxs.erase(idxs.begin()+most_violated_idx);
+            // we are out of variables to remove since everything is at bounds.
+            if (idxs.size() == 0)
+                break;
+
+            lower = remove_row(lower,most_violated_idx);
+            upper = remove_row(upper,most_violated_idx);
+            g += colm(B,most_violated_idx)*bounded_value;
+            g = remove_row(g,most_violated_idx);
+            p = remove_row(p,most_violated_idx);
+            B = removerc(B,most_violated_idx, most_violated_idx);
+
+            // Removing a variable changes the radius, so we have to subtract the bounded
+            // value from the radius so as to not change the effective radius for the whole
+            // problem.
+            double squared_radius = radius*radius - bounded_value*bounded_value;
+            if (squared_radius <= 0)
+            {
+                p = 0;
+                break;
+            }
+            radius = std::sqrt(squared_radius);
+
+
+            solve_trust_region_subproblem(B,g,radius,p,eps,max_iter);
+        }
+
+        // assign the non-bound-constrained variables to their final values
+        for (size_t i = 0; i < idxs.size(); ++i)
+            p_(idxs[i]) = p(i);
+
+    }
+
+// ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

    template <
--- a/dlib/optimization/optimization_trust_region_abstract.h
+++ b/dlib/optimization/optimization_trust_region_abstract.h
@ -47,6 +47,57 @@ namespace dlib
                  the radius constraint is active and std::abs(length(#p)-radius)/radius <= eps.
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP1,
+        typename EXP2,
+        typename T, long NR, long NC, typename MM, typename L,
+        typename EXP3
+        >
+    void solve_trust_region_subproblem_bounded ( 
+        const matrix_exp<EXP1>& B,
+        const matrix_exp<EXP2>& g,
+        const typename EXP1::type radius,
+        matrix<T,NR,NC,MM,L>& p,
+        double eps,
+        unsigned long max_iter,
+        const matrix_exp<EXP3>& lower,
+        const matrix_exp<EXP3>& upper
+    );
+    /*!
+        requires
+            - B == trans(B)
+              (i.e. B should be a symmetric matrix)
+            - B.nr() == B.nc()
+            - is_col_vector(g) == true
+            - is_col_vector(lower) == true
+            - is_col_vector(upper) == true
+            - g.size() == B.nr()
+            - lower.size() == B.nr()
+            - upper.size() == B.nr()
+            - p is capable of containing a column vector the size of g
+              (i.e. p = g; should be a legal expression)
+            - radius > 0
+            - eps > 0
+            - max_iter > 0
+            - min(upper-lower) >= 0
+            - length(clamp(zeros_matrix(lower),lower,upper)) <= radius
+              (i.e. the lower and upper bounds can't exclude all points with the desired radius.)
+        ensures
+            - This function solves the following optimization problem:
+                Minimize: f(p) == 0.5*trans(p)*B*p + trans(g)*p
+                subject to the following constraint:
+                    - length(p) <= radius
+                    - lower(i) <= p(i) <= upper(i), for all i
+            - Solves the problem to eps accuracy.  We do this by greedily finding the most
+              violated bound constraint, locking that variable to its constrained value, removing
+              it from the problem, and then resolving.  We do that until no more constraint
+              violations are present.  Each time we just call solve_trust_region_subproblem() 
+              to get the solution and pass eps and max_iter directly to these calls to
+              solve_trust_region_subproblem().
+    !*/
+
 // ----------------------------------------------------------------------------------------

    class function_model 
--- a/dlib/test/optimization.cpp
+++ b/dlib/test/optimization.cpp
@ -1182,6 +1182,28 @@ namespace
        off = 1.0; DLIB_TEST(std::abs( poly_min_extrap(off*off, -2*off, (1-off)*(1-off)) - off) < 1e-13); 
    }

+    void test_solve_trust_region_subproblem_bounded()
+    {
+        print_spinner();
+        matrix<double> H(2,2);
+        H = 1, 0,
+        0, 1;
+        matrix<double,0,1> g, lower, upper, p, true_p;
+        g = {0, 0};
+
+        double radius = 0.5;
+        lower = {0.5, 0};
+        upper = {10, 10};
+
+
+        solve_trust_region_subproblem_bounded(H,g, radius, p,  0.001, 500, lower, upper);
+        true_p = { 0.5, 0};
+        DLIB_TEST_MSG(length(p-true_p) < 1e-12, p);
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
    class optimization_tester : public tester
    {
    public:
@ -1200,6 +1222,7 @@ namespace
            test_box_constrained_optimizers(lbfgs_search_strategy(5));
            test_poly_min_extract_2nd();
            optimization_test();
+            test_solve_trust_region_subproblem_bounded();
        }
    } a;