This commit is contained in:
Davis King 2012-04-11 19:03:49 -04:00
commit 87113b6111
22 changed files with 968 additions and 94 deletions

View File

@ -19,64 +19,128 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template <typename T>
typename T::edge_type& edge(
typename enable_if<is_graph<T>,typename T::edge_type>::type& edge(
T& g,
unsigned long idx,
unsigned long n
unsigned long idx_i,
unsigned long idx_j
)
{
// make sure requires clause is not broken
DLIB_ASSERT(g.has_edge(idx,n) == true,
"\tT::edge_type& edge(g, i, j)"
<< "\n\tyou have requested an invalid edge"
<< "\n\ti: " << idx
<< "\n\tj: " << n
DLIB_ASSERT(g.has_edge(idx_i,idx_j) == true,
"\tT::edge_type& edge(g, idx_i, idx_j)"
<< "\n\t you have requested an invalid edge"
<< "\n\t idx_i: " << idx_i
<< "\n\t idx_j: " << idx_j
);
for (unsigned long i = 0; i < g.node(idx).number_of_neighbors(); ++i)
for (unsigned long i = 0; i < g.node(idx_i).number_of_neighbors(); ++i)
{
if (g.node(idx).neighbor(i).index() == n)
return g.node(idx).edge(i);
if (g.node(idx_i).neighbor(i).index() == idx_j)
return g.node(idx_i).edge(i);
}
// put this here just so compilers don't complain about a lack of
// a return here
DLIB_CASSERT(false,
"\tT::edge_type& edge(g, i, j)"
<< "\n\tyou have requested an invalid edge"
<< "\n\ti: " << idx
<< "\n\tj: " << n
"\tT::edge_type& edge(g, idx_i, idx_j)"
<< "\n\t you have requested an invalid edge"
<< "\n\t idx_i: " << idx_i
<< "\n\t idx_j: " << idx_j
);
}
template <typename T>
const typename T::edge_type& edge(
const typename enable_if<is_graph<T>,typename T::edge_type>::type& edge(
const T& g,
unsigned long idx,
unsigned long n
unsigned long idx_i,
unsigned long idx_j
)
{
// make sure requires clause is not broken
DLIB_ASSERT(g.has_edge(idx,n) == true,
"\tT::edge_type& edge(g, i, j)"
<< "\n\tyou have requested an invalid edge"
<< "\n\ti: " << idx
<< "\n\tj: " << n
DLIB_ASSERT(g.has_edge(idx_i,idx_j) == true,
"\tT::edge_type& edge(g, idx_i, idx_j)"
<< "\n\t you have requested an invalid edge"
<< "\n\t idx_i: " << idx_i
<< "\n\t idx_j: " << idx_j
);
for (unsigned long i = 0; i < g.node(idx).number_of_neighbors(); ++i)
for (unsigned long i = 0; i < g.node(idx_i).number_of_neighbors(); ++i)
{
if (g.node(idx).neighbor(i).index() == n)
return g.node(idx).edge(i);
if (g.node(idx_i).neighbor(i).index() == idx_j)
return g.node(idx_i).edge(i);
}
// put this here just so compilers don't complain about a lack of
// a return here
DLIB_CASSERT(false,
"\tT::edge_type& edge(g, i, j)"
<< "\n\tyou have requested an invalid edge"
<< "\n\ti: " << idx
<< "\n\tj: " << n
"\tT::edge_type& edge(g, idx_i, idx_j)"
<< "\n\t you have requested an invalid edge"
<< "\n\t idx_i: " << idx_i
<< "\n\t idx_j: " << idx_j
);
}
// ----------------------------------------------------------------------------------------
template <typename T>
typename enable_if<is_directed_graph<T>,typename T::edge_type>::type& edge(
T& g,
unsigned long parent_idx,
unsigned long child_idx
)
{
// make sure requires clause is not broken
DLIB_ASSERT(g.has_edge(parent_idx,child_idx) == true,
"\t T::edge_type& edge(g, parent_idx, child_idx)"
<< "\n\t you have requested an invalid edge"
<< "\n\t parent_idx: " << parent_idx
<< "\n\t child_idx: " << child_idx
);
for (unsigned long i = 0; i < g.node(parent_idx).number_of_children(); ++i)
{
if (g.node(parent_idx).child(i).index() == child_idx)
return g.node(parent_idx).child_edge(i);
}
// put this here just so compilers don't complain about a lack of
// a return here
DLIB_CASSERT(false,
"\t T::edge_type& edge(g, parent_idx, child_idx)"
<< "\n\t you have requested an invalid edge"
<< "\n\t parent_idx: " << parent_idx
<< "\n\t child_idx: " << child_idx
);
}
template <typename T>
const typename enable_if<is_directed_graph<T>,typename T::edge_type>::type& edge(
const T& g,
unsigned long parent_idx,
unsigned long child_idx
)
{
// make sure requires clause is not broken
DLIB_ASSERT(g.has_edge(parent_idx,child_idx) == true,
"\t T::edge_type& edge(g, parent_idx, child_idx)"
<< "\n\t you have requested an invalid edge"
<< "\n\t parent_idx: " << parent_idx
<< "\n\t child_idx: " << child_idx
);
for (unsigned long i = 0; i < g.node(parent_idx).number_of_children(); ++i)
{
if (g.node(parent_idx).child(i).index() == child_idx)
return g.node(parent_idx).child_edge(i);
}
// put this here just so compilers don't complain about a lack of
// a return here
DLIB_ASSERT(false,
"\t T::edge_type& edge(g, parent_idx, child_idx)"
<< "\n\t you have requested an invalid edge"
<< "\n\t parent_idx: " << parent_idx
<< "\n\t child_idx: " << child_idx
);
}

View File

@ -45,6 +45,42 @@ namespace dlib
(i.e. returns g.node(i).edge(x) such that g.node(i).neighbor(x).index() == j)
!*/
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename T::edge_type& edge(
T& g,
unsigned long parent_idx,
unsigned long child_idx
);
/*!
requires
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h
- g.has_edge(parent_idx,child_idx)
ensures
- returns a reference to the edge data for the directed edge connecting parent
node g.node(parent_idx) to child node g.node(child_idx).
!*/
template <
typename T
>
typename const T::edge_type& edge(
const T& g,
unsigned long parent_idx,
unsigned long child_idx
);
/*!
requires
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h
- g.has_edge(parent_idx,child_idx)
ensures
- returns a reference to the edge data for the directed edge connecting parent
node g.node(parent_idx) to child node g.node(child_idx).
!*/
// ----------------------------------------------------------------------------------------
template <

View File

@ -2872,6 +2872,80 @@ namespace dlib
return matrix_op<op>(op(m.ref(),lower, upper));
}
// ----------------------------------------------------------------------------------------
template <typename M>
struct op_lowerbound : basic_op_m<M>
{
typedef typename M::type type;
op_lowerbound( const M& m_, const type& thresh_) :
basic_op_m<M>(m_), thresh(thresh_){}
const type& thresh;
typedef const typename M::type const_ret_type;
const static long cost = M::cost + 2;
const_ret_type apply ( long r, long c) const
{
const type temp = this->m(r,c);
if (temp >= thresh)
return temp;
else
return thresh;
}
};
template <
typename EXP
>
const matrix_op<op_lowerbound<EXP> > lowerbound (
const matrix_exp<EXP>& m,
const typename EXP::type& thresh
)
{
typedef op_lowerbound<EXP> op;
return matrix_op<op>(op(m.ref(), thresh));
}
// ----------------------------------------------------------------------------------------
template <typename M>
struct op_upperbound : basic_op_m<M>
{
typedef typename M::type type;
op_upperbound( const M& m_, const type& thresh_) :
basic_op_m<M>(m_), thresh(thresh_){}
const type& thresh;
typedef const typename M::type const_ret_type;
const static long cost = M::cost + 2;
const_ret_type apply ( long r, long c) const
{
const type temp = this->m(r,c);
if (temp <= thresh)
return temp;
else
return thresh;
}
};
template <
typename EXP
>
const matrix_op<op_upperbound<EXP> > upperbound (
const matrix_exp<EXP>& m,
const typename EXP::type& thresh
)
{
typedef op_upperbound<EXP> op;
return matrix_op<op>(op(m.ref(), thresh));
}
// ----------------------------------------------------------------------------------------
template <typename M>

View File

@ -1707,6 +1707,42 @@ namespace dlib
- R(r,c) == m(r,c)
!*/
// ----------------------------------------------------------------------------------------
const matrix_exp lowerbound (
const matrix_exp& m,
const matrix_exp::type& thresh
);
/*!
ensures
- returns a matrix R such that:
- R::type == the same type that was in m
- R has the same dimensions as m
- for all valid r and c:
- if (m(r,c) >= thresh) then
- R(r,c) == m(r,c)
- else
- R(r,c) == thresh
!*/
// ----------------------------------------------------------------------------------------
const matrix_exp upperbound (
const matrix_exp& m,
const matrix_exp::type& thresh
);
/*!
ensures
- returns a matrix R such that:
- R::type == the same type that was in m
- R has the same dimensions as m
- for all valid r and c:
- if (m(r,c) <= thresh) then
- R(r,c) == m(r,c)
- else
- R(r,c) == thresh
!*/
// ----------------------------------------------------------------------------------------
}

View File

@ -118,6 +118,7 @@ namespace dlib
of another node in the graph.
!*/
public:
neighbor_iterator(
);
/*!

View File

@ -7,7 +7,7 @@
#include "../matrix.h"
#include "optimization_solve_qp_using_smo.h"
#include <list>
#include <vector>
// ----------------------------------------------------------------------------------------
@ -110,7 +110,8 @@ namespace dlib
>
typename matrix_type::type operator() (
const oca_problem<matrix_type>& problem,
matrix_type& w
matrix_type& w,
bool require_nonnegative_w = false
) const
{
// make sure requires clause is not broken
@ -130,10 +131,10 @@ namespace dlib
const scalar_type C = problem.get_c();
std::list<vect_type> planes;
matrix<scalar_type,0,0,mem_manager_type, layout_type> planes;
std::vector<scalar_type> bs, miss_count;
vect_type temp, alpha;
vect_type new_plane, alpha;
w.set_size(problem.get_num_dimensions(), 1);
w = 0;
@ -154,7 +155,7 @@ namespace dlib
// The flat lower bounding plane is always good to have if we know
// what it is.
bs.push_back(R_lower_bound);
planes.push_back(zeros_matrix<scalar_type>(w.size(),1));
planes = zeros_matrix(w);
alpha = uniform_matrix<scalar_type>(1,1, C);
miss_count.push_back(0);
@ -169,9 +170,12 @@ namespace dlib
// add the next cutting plane
scalar_type cur_risk;
planes.resize(planes.size()+1);
problem.get_risk(w, cur_risk, planes.back());
bs.push_back(cur_risk - dot(w,planes.back()));
problem.get_risk(w, cur_risk, new_plane);
if (planes.size() != 0)
planes = join_rows(planes, new_plane);
else
planes = new_plane;
bs.push_back(cur_risk - dot(w,new_plane));
miss_count.push_back(0);
// If alpha is empty then initialize it (we must always have sum(alpha) == C).
@ -187,24 +191,22 @@ namespace dlib
// report current status
const scalar_type risk_gap = cur_risk - (cp_obj-wnorm)/C;
if (counter > 0 && problem.optimization_status(cur_obj, cur_obj - cp_obj,
cur_risk, risk_gap, planes.size(), counter))
cur_risk, risk_gap, planes.nc(), counter))
{
break;
}
// compute kernel matrix for all the planes
K.swap(Ktmp);
K.set_size(planes.size(), planes.size());
K.set_size(planes.nc(), planes.nc());
// copy over the old K matrix
set_subm(K, 0,0, Ktmp.nr(), Ktmp.nc()) = Ktmp;
// now add the new row and column to K
long rr = 0;
for (typename std::list<vect_type>::iterator r = planes.begin(); r != planes.end(); ++r)
for (long c = 0; c < planes.nc(); ++c)
{
K(rr, Ktmp.nc()) = dot(*r, planes.back());
K(Ktmp.nc(), rr) = K(rr,Ktmp.nc());
++rr;
K(c, Ktmp.nc()) = dot(colm(planes,c), new_plane);
K(Ktmp.nc(), c) = K(c,Ktmp.nc());
}
@ -216,23 +218,22 @@ namespace dlib
eps = 1e-16;
// Note that we warm start this optimization by using the alpha from the last
// iteration as the starting point.
solve_qp_using_smo(K, vector_to_matrix(bs), alpha, eps, sub_max_iter);
if (require_nonnegative_w)
solve_qp4_using_smo(planes, K, vector_to_matrix(bs), alpha, eps, sub_max_iter);
else
solve_qp_using_smo(K, vector_to_matrix(bs), alpha, eps, sub_max_iter);
// construct the w that minimized the subproblem.
w = 0;
rr = 0;
for (typename std::list<vect_type>::iterator i = planes.begin(); i != planes.end(); ++i)
w = -(planes*alpha);
if (require_nonnegative_w)
w = lowerbound(w,0);
for (long i = 0; i < alpha.size(); ++i)
{
if (alpha(rr) != 0)
{
w -= alpha(rr)*(*i);
miss_count[rr] = 0;
}
if (alpha(i) != 0)
miss_count[i] = 0;
else
{
miss_count[rr] += 1;
}
++rr;
miss_count[i] += 1;
}
// Compute the lower bound on the true objective given to us by the cutting
@ -245,13 +246,11 @@ namespace dlib
while (max(vector_to_matrix(miss_count)) >= inactive_thresh)
{
const long idx = index_of_max(vector_to_matrix(miss_count));
typename std::list<vect_type>::iterator i0 = planes.begin();
advance(i0, idx);
planes.erase(i0);
bs.erase(bs.begin()+idx);
miss_count.erase(miss_count.begin()+idx);
K = removerc(K, idx, idx);
alpha = remove_row(alpha,idx);
planes = remove_col(planes,idx);
}
++counter;

View File

@ -124,7 +124,8 @@ namespace dlib
For reference, OCA solves optimization problems with the following form:
Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
Where R(w) is a user-supplied convex function and C > 0
Where R(w) is a user-supplied convex function and C > 0. Optionally,
this object can also add the non-negativity constraint that min(w) >= 0.
For a detailed discussion you should consult the following papers
@ -149,7 +150,8 @@ namespace dlib
>
typename matrix_type::type operator() (
const oca_problem<matrix_type>& problem,
matrix_type& w
matrix_type& w,
bool require_nonnegative_w = false
) const;
/*!
requires
@ -160,6 +162,10 @@ namespace dlib
- The optimization algorithm runs until problem.optimization_status()
indicates it is time to stop.
- returns the objective value at the solution #w
- if (require_nonnegative_w == true) then
- Adds the constraint that every element of w be non-negative.
Therefore, if this argument is true then #w won't contain any
negative values.
!*/
void set_subproblem_epsilon (

View File

@ -147,7 +147,7 @@ namespace dlib
// Check how big the duality gap is and stop when it goes below eps.
// The duality gap is the gap between the objective value of the function
// we are optimizing and the value of it's primal form. This value is always
// we are optimizing and the value of its primal form. This value is always
// greater than or equal to the distance to the optimum solution so it is a
// good way to decide if we should stop. See the book referenced above for
// more information. In particular, see the part about the Wolfe Dual.
@ -211,6 +211,176 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template <
typename EXP1,
typename EXP2,
typename EXP3,
typename T, long NR, long NC, typename MM, typename L
>
unsigned long solve_qp4_using_smo (
const matrix_exp<EXP1>& A,
const matrix_exp<EXP2>& Q,
const matrix_exp<EXP3>& b,
matrix<T,NR,NC,MM,L>& alpha,
T eps,
unsigned long max_iter
)
{
// make sure requires clause is not broken
DLIB_CASSERT(A.nc() == alpha.size() &&
Q.nr() == Q.nc() &&
is_col_vector(b) &&
is_col_vector(alpha) &&
b.size() == alpha.size() &&
b.size() == Q.nr() &&
alpha.size() > 0 &&
min(alpha) >= 0 &&
eps > 0 &&
max_iter > 0,
"\t void solve_qp4_using_smo()"
<< "\n\t Invalid arguments were given to this function"
<< "\n\t A.nc(): " << A.nc()
<< "\n\t Q.nr(): " << Q.nr()
<< "\n\t Q.nc(): " << Q.nc()
<< "\n\t is_col_vector(b): " << is_col_vector(b)
<< "\n\t is_col_vector(alpha): " << is_col_vector(alpha)
<< "\n\t b.size(): " << b.size()
<< "\n\t alpha.size(): " << alpha.size()
<< "\n\t Q.nr(): " << Q.nr()
<< "\n\t min(alpha): " << min(alpha)
<< "\n\t eps: " << eps
<< "\n\t max_iter: " << max_iter
);
const T C = sum(alpha);
/*
For this optimization problem, it is the case that the optimal
value of lambda is given by a simple closed form expression if we
know the optimal alpha. So what we will do is to just optimize
alpha and every now and then we will update lambda with its optimal
value. Therefore, we use essentially the same method as the
solve_qp_using_smo() routine.
*/
// compute optimal lambda for current alpha
matrix<T,NR,1,MM,L> lambda = A*alpha;
lambda = lowerbound(lambda, 0);
// Compute f'(alpha) (i.e. the gradient of f(alpha) with respect to alpha) for the current alpha.
matrix<T,NR,NC,MM,L> df = Q*alpha - b - trans(A)*lambda;
const T tau = 1000*std::numeric_limits<T>::epsilon();
T big, little;
unsigned long iter = 0;
for (; iter < max_iter; ++iter)
{
// Find the two elements of df that satisfy the following:
// - little_idx == index_of_min(df)
// - big_idx == the index of the largest element in df such that alpha(big_idx) > 0
// These two indices will tell us which two alpha values are most in violation of the KKT
// optimality conditions.
big = -std::numeric_limits<T>::max();
long big_idx = 0;
little = std::numeric_limits<T>::max();
long little_idx = 0;
for (long i = 0; i < df.nr(); ++i)
{
if (df(i) > big && alpha(i) > 0)
{
big = df(i);
big_idx = i;
}
if (df(i) < little)
{
little = df(i);
little_idx = i;
}
}
// Check how big the duality gap is and stop when it goes below eps.
// The duality gap is the gap between the objective value of the function
// we are optimizing and the value of its primal form. This value is always
// greater than or equal to the distance to the optimum solution so it is a
// good way to decide if we should stop.
if (trans(alpha)*df - C*little < eps)
{
// compute optimal lambda and recheck the duality gap to make
// sure we have really converged.
lambda = A*alpha;
lambda = lowerbound(lambda, 0);
df = Q*alpha - b - trans(A)*lambda;
if (trans(alpha)*df - C*min(df) < eps)
break;
else
continue;
}
// Save these values, we will need them later.
const T old_alpha_big = alpha(big_idx);
const T old_alpha_little = alpha(little_idx);
// Now optimize the two variables we just picked.
T quad_coef = Q(big_idx,big_idx) + Q(little_idx,little_idx) - 2*Q(big_idx, little_idx);
if (quad_coef <= tau)
quad_coef = tau;
const T delta = (big - little)/quad_coef;
alpha(big_idx) -= delta;
alpha(little_idx) += delta;
// Make sure alpha stays feasible. That is, make sure the updated alpha doesn't
// violate the non-negativity constraint.
if (alpha(big_idx) < 0)
{
// Since an alpha can't be negative we will just set it to 0 and shift all the
// weight to the other alpha.
alpha(big_idx) = 0;
alpha(little_idx) = old_alpha_big + old_alpha_little;
}
// Every 300 iterations
if ((iter%300) == 299)
{
// compute the optimal lambda for the current alpha
lambda = A*alpha;
lambda = lowerbound(lambda, 0);
// Perform this form of the update every so often because doing so can help
// avoid the buildup of numerical errors you get with the alternate update
// below.
df = Q*alpha - b - trans(A)*lambda;
}
else
{
// Now update the gradient. We will perform the equivalent of: df = Q*alpha - b;
const T delta_alpha_big = alpha(big_idx) - old_alpha_big;
const T delta_alpha_little = alpha(little_idx) - old_alpha_little;
for(long k = 0; k < df.nr(); ++k)
df(k) += Q(big_idx,k)*delta_alpha_big + Q(little_idx,k)*delta_alpha_little;;
}
}
/*
using namespace std;
cout << "SMO: " << endl;
cout << " duality gap: "<< trans(alpha)*df - C*min(df) << endl;
cout << " KKT gap: "<< big-little << endl;
cout << " iter: "<< iter+1 << endl;
cout << " eps: "<< eps << endl;
*/
return iter+1;
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OPTIMIZATION_SOLVE_QP_UsING_SMO_H__

View File

@ -52,6 +52,57 @@ namespace dlib
converge to eps accuracy then the number returned will be max_iter+1.
!*/
// ----------------------------------------------------------------------------------------
template <
typename EXP1,
typename EXP2,
typename EXP3,
typename T, long NR, long NC, typename MM, typename L
>
unsigned long solve_qp4_using_smo (
const matrix_exp<EXP1>& A,
const matrix_exp<EXP2>& Q,
const matrix_exp<EXP3>& b,
matrix<T,NR,NC,MM,L>& alpha,
T eps,
unsigned long max_iter
);
/*!
requires
- A.nc() == alpha.size()
- Q.nr() == Q.nc()
- is_col_vector(b) == true
- is_col_vector(alpha) == true
- b.size() == alpha.size() == Q.nr()
- alpha.size() > 0
- min(alpha) >= 0
- eps > 0
- max_iter > 0
ensures
- Let C == sum(alpha) (i.e. C is the sum of the alpha values you
supply to this function)
- This function solves the following quadratic program:
Minimize: f(alpha,lambda) == 0.5*trans(alpha)*Q*alpha - trans(alpha)*b +
0.5*trans(lambda)*lambda - trans(lambda)*A*alpha
subject to the following constraints:
- sum(alpha) == C (i.e. the sum of alpha values doesn't change)
- min(alpha) >= 0 (i.e. all alpha values are nonnegative)
- min(lambda) >= 0 (i.e. all lambda values are nonnegative)
Where f is convex. This means that Q should be positive-semidefinite.
- The solution to the above QP will be stored in #alpha. The optimal
lambda is not output since its value is given by the following expression:
lowerbound(A*alpha,0)
- This function uses a simple implementation of the sequential minimal
optimization algorithm. It starts the algorithm with the given alpha
and it works on the problem until the duality gap (i.e. how far away
we are from the optimum solution) is less than eps. So eps controls
how accurate the solution is and smaller values result in better solutions.
- At most max_iter iterations of optimization will be performed.
- returns the number of iterations performed. If this method fails to
converge to eps accuracy then the number returned will be max_iter+1.
!*/
// ----------------------------------------------------------------------------------------
}

View File

@ -173,8 +173,11 @@ namespace dlib
{
using dlib::sparse_vector::dot;
using dlib::dot;
// The reason for using w_size_m1 and not just w.size()-1 is because
// doing it this way avoids an inane warning from gcc that can occur in some cases.
const long w_size_m1 = w.size()-1;
for (long i = 0; i < samples.size(); ++i)
dot_prods[i] = dot(colm(w,0,w.size()-1), samples(i)) - w(w.size()-1);
dot_prods[i] = dot(colm(w,0,w_size_m1), samples(i)) - w(w_size_m1);
if (is_first_call)
{

View File

@ -74,6 +74,7 @@ set (tests
metaprogramming.cpp
multithreaded_object.cpp
object_detector.cpp
oca.cpp
one_vs_all_trainer.cpp
one_vs_one_trainer.cpp
optimization.cpp

View File

@ -89,6 +89,7 @@ SRC += member_function_pointer.cpp
SRC += metaprogramming.cpp
SRC += multithreaded_object.cpp
SRC += object_detector.cpp
SRC += oca.cpp
SRC += one_vs_all_trainer.cpp
SRC += one_vs_one_trainer.cpp
SRC += optimization.cpp

View File

@ -607,6 +607,31 @@ namespace
DLIB_TEST(equal(0.0/m , zeros_matrix<double>(3,4)));
}
}
{
matrix<int> m(2,3);
m = 1,2,3,
4,5,6;
matrix<int> M(2,3);
M = m;
DLIB_TEST(upperbound(m,6) == M);
DLIB_TEST(upperbound(m,60) == M);
DLIB_TEST(lowerbound(m,-2) == M);
DLIB_TEST(lowerbound(m,0) == M);
M = 2,2,3,
4,5,6;
DLIB_TEST(lowerbound(m,2) == M);
M = 0,0,0,
0,0,0;
DLIB_TEST(upperbound(m,0) == M);
M = 1,2,3,
3,3,3;
DLIB_TEST(upperbound(m,3) == M);
}
}

84
dlib/test/oca.cpp Normal file
View File

@ -0,0 +1,84 @@
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/optimization.h>
#include <dlib/svm.h>
#include <sstream>
#include <string>
#include <cstdlib>
#include <ctime>
#include <vector>
#include "tester.h"
namespace
{
using namespace test;
using namespace dlib;
using namespace std;
logger dlog("test.oca");
// ----------------------------------------------------------------------------------------
class test_oca : public tester
{
public:
test_oca (
) :
tester ("test_oca",
"Runs tests on the oca component.")
{
}
void perform_test(
)
{
print_spinner();
typedef matrix<double,0,1> w_type;
w_type w;
std::vector<w_type> x;
w_type temp(2);
temp = -1, 1;
x.push_back(temp);
temp = 1, -1;
x.push_back(temp);
std::vector<double> y;
y.push_back(+1);
y.push_back(-1);
w_type true_w(3);
oca solver;
// test the version without a non-negativity constraint on w.
solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, vector_to_matrix(x), vector_to_matrix(y), false, 1e-12, 40), w, false);
dlog << LINFO << w;
true_w = -0.5, 0.5, 0;
dlog << LINFO << "error: "<< max(abs(w-true_w));
DLIB_TEST(max(abs(w-true_w)) < 1e-10);
print_spinner();
// test the version with a non-negativity constraint on w.
solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, vector_to_matrix(x), vector_to_matrix(y), false, 1e-12, 40), w, true);
dlog << LINFO << w;
true_w = 0, 1, 0;
dlog << LINFO << "error: "<< max(abs(w-true_w));
DLIB_TEST(max(abs(w-true_w)) < 1e-10);
}
} a;
}

View File

@ -72,6 +72,290 @@ namespace
matrix<double> Q, b;
};
// ----------------------------------------------------------------------------------------
double compute_objective_value (
const matrix<double,0,1>& w,
const matrix<double>& A,
const matrix<double,0,1>& b,
const double C
)
{
return 0.5*dot(w,w) + C*max(trans(A)*w + b);
}
// ----------------------------------------------------------------------------------------
void test_qp4_test1()
{
matrix<double> A(3,2);
A = 1,2,
-3,1,
6,7;
matrix<double,0,1> b(2);
b = 1,
2;
const double C = 2;
matrix<double,0,1> alpha(2), true_alpha(2);
alpha = C/2, C/2;
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
dlog << LINFO << "*******************************************************";
dlog << LINFO << "w: " << trans(w);
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
w = 0;
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
dlog << LINFO << "alpha: " << trans(alpha);
true_alpha = 0, 2;
dlog << LINFO << "true alpha: "<< trans(true_alpha);
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
}
// ----------------------------------------------------------------------------------------
void test_qp4_test2()
{
matrix<double> A(3,2);
A = 1,2,
3,-1,
6,7;
matrix<double,0,1> b(2);
b = 1,
2;
const double C = 2;
matrix<double,0,1> alpha(2), true_alpha(2);
alpha = C/2, C/2;
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
dlog << LINFO << "*******************************************************";
dlog << LINFO << "w: " << trans(w);
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
w = 0, 0.25, 0;
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
dlog << LINFO << "alpha: " << trans(alpha);
true_alpha = 0.43750, 1.56250;
dlog << LINFO << "true alpha: "<< trans(true_alpha);
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
}
// ----------------------------------------------------------------------------------------
void test_qp4_test3()
{
matrix<double> A(3,2);
A = 1,2,
-3,-1,
6,7;
matrix<double,0,1> b(2);
b = 1,
2;
const double C = 2;
matrix<double,0,1> alpha(2), true_alpha(2);
alpha = C/2, C/2;
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
dlog << LINFO << "*******************************************************";
dlog << LINFO << "w: " << trans(w);
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
w = 0, 2, 0;
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
dlog << LINFO << "alpha: " << trans(alpha);
true_alpha = 0, 2;
dlog << LINFO << "true alpha: "<< trans(true_alpha);
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
}
// ----------------------------------------------------------------------------------------
void test_qp4_test5()
{
matrix<double> A(3,3);
A = 1,2,4,
3,1,6,
6,7,-2;
matrix<double,0,1> b(3);
b = 1,
2,
3;
const double C = 2;
matrix<double,0,1> alpha(3), true_alpha(3);
alpha = C/2, C/2, 0;
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
dlog << LINFO << "*******************************************************";
dlog << LINFO << "w: " << trans(w);
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
w = 0, 0, 0.11111111111111111111;
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
dlog << LINFO << "alpha: " << trans(alpha);
true_alpha = 0, 0.432098765432099, 1.567901234567901;
dlog << LINFO << "true alpha: "<< trans(true_alpha);
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
}
// ----------------------------------------------------------------------------------------
void test_qp4_test4()
{
matrix<double> A(3,2);
A = 1,2,
3,1,
6,7;
matrix<double,0,1> b(2);
b = 1,
2;
const double C = 2;
matrix<double,0,1> alpha(2), true_alpha(2);
alpha = C/2, C/2;
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
dlog << LINFO << "*******************************************************";
dlog << LINFO << "w: " << trans(w);
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
w = 0, 0, 0;
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
dlog << LINFO << "alpha: " << trans(alpha);
true_alpha = 0, 2;
dlog << LINFO << "true alpha: "<< trans(true_alpha);
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
}
void test_qp4_test6()
{
matrix<double> A(3,3);
A = 1,2,4,
3,1,6,
6,7,-2;
matrix<double,0,1> b(3);
b = -1,
-2,
-3;
const double C = 2;
matrix<double,0,1> alpha(3), true_alpha(3);
alpha = C/2, C/2, 0;
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
dlog << LINFO << "*******************************************************";
dlog << LINFO << "w: " << trans(w);
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
w = 0, 0, 0;
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
dlog << LINFO << "alpha: " << trans(alpha);
true_alpha = 2, 0, 0;
dlog << LINFO << "true alpha: "<< trans(true_alpha);
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
}
void test_qp4_test7()
{
matrix<double> A(3,3);
A = -1,2,4,
-3,1,6,
-6,7,-2;
matrix<double,0,1> b(3);
b = -1,
-2,
3;
matrix<double> Q(3,3);
Q = 4,-5,6,
1,-4,2,
-9,-4,5;
Q = Q*trans(Q);
const double C = 2;
matrix<double,0,1> alpha(3), true_alpha(3);
alpha = C/2, C/2, 0;
solve_qp4_using_smo(A, Q, b, alpha, 1e-9, 800);
dlog << LINFO << "*******************************************************";
dlog << LINFO << "alpha: " << trans(alpha);
true_alpha = 0, 2, 0;
dlog << LINFO << "true alpha: "<< trans(true_alpha);
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
}
// ----------------------------------------------------------------------------------------
void test_solve_qp4_using_smo()
{
test_qp4_test1();
test_qp4_test2();
test_qp4_test3();
test_qp4_test4();
test_qp4_test5();
test_qp4_test6();
test_qp4_test7();
}
// ----------------------------------------------------------------------------------------
class opt_qp_solver_tester : public tester
@ -98,6 +382,10 @@ namespace
void perform_test(
)
{
print_spinner();
test_solve_qp4_using_smo();
print_spinner();
++thetime;
typedef matrix<double,0,1> sample_type;
//dlog << LINFO << "time seed: " << thetime;

View File

@ -59,14 +59,11 @@ cd examples
mkdir build
cd build
cmake ..
cmake --build . --config Release
</code_box>
Then cmake will create whatever is needed to compile the examples on your computer. If you are using Linux
or a Mac it will create a makefile and then you can compile everything by typing:
<code_box>
make
</code_box>
Or on windows, if you have Visual Studio installed, it will make a Visual Studio project which you can then open
and compile by clicking the build button.
Note that you also need to have a C++ compiler installed on your system. There are free C++ compilers
for most operating systems. For example, Visual Studio Express is free on Windows and GCC is free and
works well on Mac OS X and Linux systems.
<h2>Compiling on Linux From Command Line</h2>
From within the examples folder, you can compile any of the examples with a single command like so:
@ -87,7 +84,7 @@ it by typing:
sudo apt-get install libx11-dev
</code_box>
<h2>Compiling on Windows using gcc</h2>
<h2>Compiling on Windows Using GCC</h2>
<p>
The commands for gcc on windows are the same as above but you may also have to link
(via the -l option) to the following libraries: gdi32, comctl32, user32, ws2_32, or imm32.
@ -95,7 +92,7 @@ sudo apt-get install libx11-dev
windows development than gcc.
</p>
<h2>Compiling on Windows using Visual Studio</h2>
<h2>Compiling on Windows Using Visual Studio</h2>
<p>
All you need to do is create an empty console project. Then add dlib/all/source.cpp to it and add the
folder containing the dlib folder to the #include search path. Then you can compile any example program

View File

@ -557,6 +557,14 @@
<name>clamp</name>
<link>dlib/matrix/matrix_utilities_abstract.h.html#clamp</link>
</item>
<item>
<name>lowerbound</name>
<link>dlib/matrix/matrix_utilities_abstract.h.html#lowerbound</link>
</item>
<item>
<name>upperbound</name>
<link>dlib/matrix/matrix_utilities_abstract.h.html#upperbound</link>
</item>
<item>
<name>linspace</name>
<link>dlib/matrix/matrix_utilities_abstract.h.html#linspace</link>

View File

@ -161,7 +161,8 @@
<li><b>Image Processing</b>
<ul>
<li>Windows BMP <a href="imaging.html#load_bmp">read</a> and <a href="imaging.html#save_bmp">write</a> support</li>
<li>Routines for <a href="imaging.html#load_image">reading</a> and
<a href="imaging.html#save_bmp">writing</a> common image formats. </li>
<li>Automatic color space conversion between various pixel types</li>
<li>Common image operations such as edge finding and morphological operations</li>
<li>Implementations of the <a href="imaging.html#get_surf_points">SURF</a>

View File

@ -43,6 +43,7 @@
<item>solve_qp_using_smo</item>
<item>solve_qp2_using_smo</item>
<item>solve_qp3_using_smo</item>
<item>solve_qp4_using_smo</item>
<item>oca</item>
<item>solve_least_squares</item>
<item>solve_least_squares_lm</item>
@ -430,6 +431,28 @@ subject to the following constraint:
</component>
<!-- ************************************************************************* -->
<component>
<name>solve_qp4_using_smo</name>
<file>dlib/optimization.h</file>
<spec_file link="true">dlib/optimization/optimization_solve_qp_using_smo_abstract.h</spec_file>
<description>
This function solves the following quadratic program:
<pre>
Minimize: f(alpha,lambda) == 0.5*trans(alpha)*Q*alpha - trans(alpha)*b +
0.5*trans(lambda)*lambda - trans(lambda)*A*alpha
subject to the following constraints:
sum(alpha) == C
min(alpha) >= 0
min(lambda) >= 0
Where f is convex. This means that Q should be positive-semidefinite.
</pre>
</description>
</component>
<!-- ************************************************************************* -->
<component>
@ -520,7 +543,8 @@ subject to the following constraint:
<pre>
Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
Where R(w) is a user-supplied convex function and C > 0
Where R(w) is a user-supplied convex function and C > 0. Optionally,
this object can also add the non-negativity constraint that min(w) >= 0.
</pre>
<br/>
<br/>

View File

@ -13,8 +13,8 @@
<current>
New Stuff:
- Image Processing:
- Added the option to make the features generated by the poly_image
rotationally invariant.
- Added the option to make the features generated by poly_image rotationally
invariant.
- Added a set of routines for warping, scaling, and resizing images.
See the new "Scaling and Rotating" section of the image processing
documentation for details.
@ -27,7 +27,7 @@ New Stuff:
- Added the get_option() routines which slightly simplify option parsing
from the command line and config files.
- Added the 128bit version of Murmur hash.
- Added the rls_filter and kalman_filter objects. These are tools for
- Added the kalman_filter and rls_filter objects. These are tools for
performing Kalman filtering and recursive least squares filtering.
- Added the circular_buffer object.
@ -58,6 +58,9 @@ Other:
- Turned the array object into a single implementation object. Now arrays
can be created using the normal array&lt;type&gt; obj; syntax. Additionally,
all extensions were merged into the array object.
- Added an example program which better documents how to create training
data for the object detection tools as well as how this data can be used.
See the train_object_detector.cpp example for details.
</current>

View File

@ -80,6 +80,7 @@
<term file="optimization.html" name="solve_qp_using_smo"/>
<term file="optimization.html" name="solve_qp2_using_smo"/>
<term file="optimization.html" name="solve_qp3_using_smo"/>
<term file="optimization.html" name="solve_qp4_using_smo"/>
<term file="optimization.html" name="oca"/>
<term link="optimization.html#find_min_bobyqa" name="BOBYQA"/>
<term file="optimization.html" name="find_max"/>
@ -490,6 +491,8 @@
<term file="dlib/matrix/matrix_utilities_abstract.h.html" name="pixel_to_vector"/>
<term file="dlib/matrix/matrix_utilities_abstract.h.html" name="vector_to_pixel"/>
<term file="dlib/matrix/matrix_utilities_abstract.h.html" name="clamp"/>
<term file="dlib/matrix/matrix_utilities_abstract.h.html" name="lowerbound"/>
<term file="dlib/matrix/matrix_utilities_abstract.h.html" name="upperbound"/>
<term file="dlib/svm/sparse_vector_abstract.h.html" name="scale_by"/>

View File

@ -2,7 +2,7 @@
/*
This is an example showing how you might use dlib to create a reasonably
function command line tool for object detection. This example assumes
functional command line tool for object detection. This example assumes
you are familiar with the contents of at least the following example
programs:
- object_detector_ex.cpp
@ -13,17 +13,15 @@
This program is a command line tool for learning to detect objects in images.
Therefore, to create an object detector it requires a set of annotated training
images. To create this annotated data you will need to compile the imglab tool
included with dlib. To do this, go to the tools/imglab folder and type the
following:
images. To create this annotated data you will need to use the imglab tool
included with dlib. It is located in the tools/imglab folder and can be compiled
using the following commands.
cd tools/imglab
mkdir build
cd build
cmake ..
make
Note that you may need to install CMake (www.cmake.org) for this to work. Also,
if you are using visual studio then you should use the following command to
compile instead of "make"
cmake --build . --config Release
Note that you may need to install CMake (www.cmake.org) for this to work.
Next, lets assume you have a folder of images called /tmp/images. These images
should contain examples of the objects you want to learn to detect. You will
@ -35,17 +33,18 @@
A window will appear showing all the images. You can use the up and down arrow
keys to cycle though the images and the mouse to label objects. In particular,
holding the shift key, left clicking, and dragging the mouse will allow you to
draw boxes around the objects you which to detect. So next label all the objects
draw boxes around the objects you wish to detect. So next, label all the objects
with boxes. Note that it is important to label all the objects since any object
not labeled is implicitly assumed to be not an object we should detect.
Once you finish labeling objects go to the file menu, click save, and close the
program. This will save the object boxes back to mydataset.xml. You can verify
Once you finish labeling objects go to the file menu, click save, and then close
the program. This will save the object boxes back to mydataset.xml. You can verify
this by opening the tool again with
./imglab mydataset.xml
and observing that the boxes are present.
Returning to the present example program, we can now issue the command
Returning to the present example program, we can compile it using cmake just as we
did with the imglab tool. Once compiled, we can issue the command
./train_object_detector -tv mydataset.xml
which will train an object detection model based on our labeled data. The model
will be saved to the file object_detector.svm. Once this has finished we can use
@ -55,8 +54,8 @@
be indicated by a red box.
There are also a number of other useful command line options in the current
example program which you can explore.
There are a number of other useful command line options in the current example
program which you can explore below.
*/
@ -170,8 +169,8 @@ int main(int argc, char** argv)
image_scanner_type scanner;
setup_hashed_features(scanner, images, hash_bits);
setup_grid_detection_templates_verbose(scanner, object_locations, grid_size, grid_size);
setup_hashed_features(scanner, images, hash_bits);
structural_object_detection_trainer<image_scanner_type> trainer(scanner);
trainer.set_num_threads(threads);