mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
merged
This commit is contained in:
commit
87113b6111
@ -19,64 +19,128 @@ namespace dlib
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
typename T::edge_type& edge(
|
||||
typename enable_if<is_graph<T>,typename T::edge_type>::type& edge(
|
||||
T& g,
|
||||
unsigned long idx,
|
||||
unsigned long n
|
||||
unsigned long idx_i,
|
||||
unsigned long idx_j
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(g.has_edge(idx,n) == true,
|
||||
"\tT::edge_type& edge(g, i, j)"
|
||||
<< "\n\tyou have requested an invalid edge"
|
||||
<< "\n\ti: " << idx
|
||||
<< "\n\tj: " << n
|
||||
DLIB_ASSERT(g.has_edge(idx_i,idx_j) == true,
|
||||
"\tT::edge_type& edge(g, idx_i, idx_j)"
|
||||
<< "\n\t you have requested an invalid edge"
|
||||
<< "\n\t idx_i: " << idx_i
|
||||
<< "\n\t idx_j: " << idx_j
|
||||
);
|
||||
|
||||
for (unsigned long i = 0; i < g.node(idx).number_of_neighbors(); ++i)
|
||||
for (unsigned long i = 0; i < g.node(idx_i).number_of_neighbors(); ++i)
|
||||
{
|
||||
if (g.node(idx).neighbor(i).index() == n)
|
||||
return g.node(idx).edge(i);
|
||||
if (g.node(idx_i).neighbor(i).index() == idx_j)
|
||||
return g.node(idx_i).edge(i);
|
||||
}
|
||||
|
||||
// put this here just so compilers don't complain about a lack of
|
||||
// a return here
|
||||
DLIB_CASSERT(false,
|
||||
"\tT::edge_type& edge(g, i, j)"
|
||||
<< "\n\tyou have requested an invalid edge"
|
||||
<< "\n\ti: " << idx
|
||||
<< "\n\tj: " << n
|
||||
"\tT::edge_type& edge(g, idx_i, idx_j)"
|
||||
<< "\n\t you have requested an invalid edge"
|
||||
<< "\n\t idx_i: " << idx_i
|
||||
<< "\n\t idx_j: " << idx_j
|
||||
);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const typename T::edge_type& edge(
|
||||
const typename enable_if<is_graph<T>,typename T::edge_type>::type& edge(
|
||||
const T& g,
|
||||
unsigned long idx,
|
||||
unsigned long n
|
||||
unsigned long idx_i,
|
||||
unsigned long idx_j
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(g.has_edge(idx,n) == true,
|
||||
"\tT::edge_type& edge(g, i, j)"
|
||||
<< "\n\tyou have requested an invalid edge"
|
||||
<< "\n\ti: " << idx
|
||||
<< "\n\tj: " << n
|
||||
DLIB_ASSERT(g.has_edge(idx_i,idx_j) == true,
|
||||
"\tT::edge_type& edge(g, idx_i, idx_j)"
|
||||
<< "\n\t you have requested an invalid edge"
|
||||
<< "\n\t idx_i: " << idx_i
|
||||
<< "\n\t idx_j: " << idx_j
|
||||
);
|
||||
|
||||
for (unsigned long i = 0; i < g.node(idx).number_of_neighbors(); ++i)
|
||||
for (unsigned long i = 0; i < g.node(idx_i).number_of_neighbors(); ++i)
|
||||
{
|
||||
if (g.node(idx).neighbor(i).index() == n)
|
||||
return g.node(idx).edge(i);
|
||||
if (g.node(idx_i).neighbor(i).index() == idx_j)
|
||||
return g.node(idx_i).edge(i);
|
||||
}
|
||||
|
||||
// put this here just so compilers don't complain about a lack of
|
||||
// a return here
|
||||
DLIB_CASSERT(false,
|
||||
"\tT::edge_type& edge(g, i, j)"
|
||||
<< "\n\tyou have requested an invalid edge"
|
||||
<< "\n\ti: " << idx
|
||||
<< "\n\tj: " << n
|
||||
"\tT::edge_type& edge(g, idx_i, idx_j)"
|
||||
<< "\n\t you have requested an invalid edge"
|
||||
<< "\n\t idx_i: " << idx_i
|
||||
<< "\n\t idx_j: " << idx_j
|
||||
);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
typename enable_if<is_directed_graph<T>,typename T::edge_type>::type& edge(
|
||||
T& g,
|
||||
unsigned long parent_idx,
|
||||
unsigned long child_idx
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(g.has_edge(parent_idx,child_idx) == true,
|
||||
"\t T::edge_type& edge(g, parent_idx, child_idx)"
|
||||
<< "\n\t you have requested an invalid edge"
|
||||
<< "\n\t parent_idx: " << parent_idx
|
||||
<< "\n\t child_idx: " << child_idx
|
||||
);
|
||||
|
||||
for (unsigned long i = 0; i < g.node(parent_idx).number_of_children(); ++i)
|
||||
{
|
||||
if (g.node(parent_idx).child(i).index() == child_idx)
|
||||
return g.node(parent_idx).child_edge(i);
|
||||
}
|
||||
|
||||
// put this here just so compilers don't complain about a lack of
|
||||
// a return here
|
||||
DLIB_CASSERT(false,
|
||||
"\t T::edge_type& edge(g, parent_idx, child_idx)"
|
||||
<< "\n\t you have requested an invalid edge"
|
||||
<< "\n\t parent_idx: " << parent_idx
|
||||
<< "\n\t child_idx: " << child_idx
|
||||
);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const typename enable_if<is_directed_graph<T>,typename T::edge_type>::type& edge(
|
||||
const T& g,
|
||||
unsigned long parent_idx,
|
||||
unsigned long child_idx
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(g.has_edge(parent_idx,child_idx) == true,
|
||||
"\t T::edge_type& edge(g, parent_idx, child_idx)"
|
||||
<< "\n\t you have requested an invalid edge"
|
||||
<< "\n\t parent_idx: " << parent_idx
|
||||
<< "\n\t child_idx: " << child_idx
|
||||
);
|
||||
|
||||
for (unsigned long i = 0; i < g.node(parent_idx).number_of_children(); ++i)
|
||||
{
|
||||
if (g.node(parent_idx).child(i).index() == child_idx)
|
||||
return g.node(parent_idx).child_edge(i);
|
||||
}
|
||||
|
||||
// put this here just so compilers don't complain about a lack of
|
||||
// a return here
|
||||
DLIB_ASSERT(false,
|
||||
"\t T::edge_type& edge(g, parent_idx, child_idx)"
|
||||
<< "\n\t you have requested an invalid edge"
|
||||
<< "\n\t parent_idx: " << parent_idx
|
||||
<< "\n\t child_idx: " << child_idx
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -45,6 +45,42 @@ namespace dlib
|
||||
(i.e. returns g.node(i).edge(x) such that g.node(i).neighbor(x).index() == j)
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
typename T::edge_type& edge(
|
||||
T& g,
|
||||
unsigned long parent_idx,
|
||||
unsigned long child_idx
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h
|
||||
- g.has_edge(parent_idx,child_idx)
|
||||
ensures
|
||||
- returns a reference to the edge data for the directed edge connecting parent
|
||||
node g.node(parent_idx) to child node g.node(child_idx).
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename T
|
||||
>
|
||||
typename const T::edge_type& edge(
|
||||
const T& g,
|
||||
unsigned long parent_idx,
|
||||
unsigned long child_idx
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- T is an implementation of directed_graph/directed_graph_kernel_abstract.h
|
||||
- g.has_edge(parent_idx,child_idx)
|
||||
ensures
|
||||
- returns a reference to the edge data for the directed edge connecting parent
|
||||
node g.node(parent_idx) to child node g.node(child_idx).
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
|
@ -2872,6 +2872,80 @@ namespace dlib
|
||||
return matrix_op<op>(op(m.ref(),lower, upper));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename M>
|
||||
struct op_lowerbound : basic_op_m<M>
|
||||
{
|
||||
typedef typename M::type type;
|
||||
|
||||
op_lowerbound( const M& m_, const type& thresh_) :
|
||||
basic_op_m<M>(m_), thresh(thresh_){}
|
||||
|
||||
const type& thresh;
|
||||
|
||||
typedef const typename M::type const_ret_type;
|
||||
const static long cost = M::cost + 2;
|
||||
|
||||
const_ret_type apply ( long r, long c) const
|
||||
{
|
||||
const type temp = this->m(r,c);
|
||||
if (temp >= thresh)
|
||||
return temp;
|
||||
else
|
||||
return thresh;
|
||||
}
|
||||
};
|
||||
|
||||
template <
|
||||
typename EXP
|
||||
>
|
||||
const matrix_op<op_lowerbound<EXP> > lowerbound (
|
||||
const matrix_exp<EXP>& m,
|
||||
const typename EXP::type& thresh
|
||||
)
|
||||
{
|
||||
typedef op_lowerbound<EXP> op;
|
||||
return matrix_op<op>(op(m.ref(), thresh));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename M>
|
||||
struct op_upperbound : basic_op_m<M>
|
||||
{
|
||||
typedef typename M::type type;
|
||||
|
||||
op_upperbound( const M& m_, const type& thresh_) :
|
||||
basic_op_m<M>(m_), thresh(thresh_){}
|
||||
|
||||
const type& thresh;
|
||||
|
||||
typedef const typename M::type const_ret_type;
|
||||
const static long cost = M::cost + 2;
|
||||
|
||||
const_ret_type apply ( long r, long c) const
|
||||
{
|
||||
const type temp = this->m(r,c);
|
||||
if (temp <= thresh)
|
||||
return temp;
|
||||
else
|
||||
return thresh;
|
||||
}
|
||||
};
|
||||
|
||||
template <
|
||||
typename EXP
|
||||
>
|
||||
const matrix_op<op_upperbound<EXP> > upperbound (
|
||||
const matrix_exp<EXP>& m,
|
||||
const typename EXP::type& thresh
|
||||
)
|
||||
{
|
||||
typedef op_upperbound<EXP> op;
|
||||
return matrix_op<op>(op(m.ref(), thresh));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename M>
|
||||
|
@ -1707,6 +1707,42 @@ namespace dlib
|
||||
- R(r,c) == m(r,c)
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
const matrix_exp lowerbound (
|
||||
const matrix_exp& m,
|
||||
const matrix_exp::type& thresh
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns a matrix R such that:
|
||||
- R::type == the same type that was in m
|
||||
- R has the same dimensions as m
|
||||
- for all valid r and c:
|
||||
- if (m(r,c) >= thresh) then
|
||||
- R(r,c) == m(r,c)
|
||||
- else
|
||||
- R(r,c) == thresh
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
const matrix_exp upperbound (
|
||||
const matrix_exp& m,
|
||||
const matrix_exp::type& thresh
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns a matrix R such that:
|
||||
- R::type == the same type that was in m
|
||||
- R has the same dimensions as m
|
||||
- for all valid r and c:
|
||||
- if (m(r,c) <= thresh) then
|
||||
- R(r,c) == m(r,c)
|
||||
- else
|
||||
- R(r,c) == thresh
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -118,6 +118,7 @@ namespace dlib
|
||||
of another node in the graph.
|
||||
!*/
|
||||
|
||||
public:
|
||||
neighbor_iterator(
|
||||
);
|
||||
/*!
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
#include "../matrix.h"
|
||||
#include "optimization_solve_qp_using_smo.h"
|
||||
#include <list>
|
||||
#include <vector>
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
@ -110,7 +110,8 @@ namespace dlib
|
||||
>
|
||||
typename matrix_type::type operator() (
|
||||
const oca_problem<matrix_type>& problem,
|
||||
matrix_type& w
|
||||
matrix_type& w,
|
||||
bool require_nonnegative_w = false
|
||||
) const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
@ -130,10 +131,10 @@ namespace dlib
|
||||
|
||||
const scalar_type C = problem.get_c();
|
||||
|
||||
std::list<vect_type> planes;
|
||||
matrix<scalar_type,0,0,mem_manager_type, layout_type> planes;
|
||||
std::vector<scalar_type> bs, miss_count;
|
||||
|
||||
vect_type temp, alpha;
|
||||
vect_type new_plane, alpha;
|
||||
|
||||
w.set_size(problem.get_num_dimensions(), 1);
|
||||
w = 0;
|
||||
@ -154,7 +155,7 @@ namespace dlib
|
||||
// The flat lower bounding plane is always good to have if we know
|
||||
// what it is.
|
||||
bs.push_back(R_lower_bound);
|
||||
planes.push_back(zeros_matrix<scalar_type>(w.size(),1));
|
||||
planes = zeros_matrix(w);
|
||||
alpha = uniform_matrix<scalar_type>(1,1, C);
|
||||
miss_count.push_back(0);
|
||||
|
||||
@ -169,9 +170,12 @@ namespace dlib
|
||||
|
||||
// add the next cutting plane
|
||||
scalar_type cur_risk;
|
||||
planes.resize(planes.size()+1);
|
||||
problem.get_risk(w, cur_risk, planes.back());
|
||||
bs.push_back(cur_risk - dot(w,planes.back()));
|
||||
problem.get_risk(w, cur_risk, new_plane);
|
||||
if (planes.size() != 0)
|
||||
planes = join_rows(planes, new_plane);
|
||||
else
|
||||
planes = new_plane;
|
||||
bs.push_back(cur_risk - dot(w,new_plane));
|
||||
miss_count.push_back(0);
|
||||
|
||||
// If alpha is empty then initialize it (we must always have sum(alpha) == C).
|
||||
@ -187,24 +191,22 @@ namespace dlib
|
||||
// report current status
|
||||
const scalar_type risk_gap = cur_risk - (cp_obj-wnorm)/C;
|
||||
if (counter > 0 && problem.optimization_status(cur_obj, cur_obj - cp_obj,
|
||||
cur_risk, risk_gap, planes.size(), counter))
|
||||
cur_risk, risk_gap, planes.nc(), counter))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// compute kernel matrix for all the planes
|
||||
K.swap(Ktmp);
|
||||
K.set_size(planes.size(), planes.size());
|
||||
K.set_size(planes.nc(), planes.nc());
|
||||
// copy over the old K matrix
|
||||
set_subm(K, 0,0, Ktmp.nr(), Ktmp.nc()) = Ktmp;
|
||||
|
||||
// now add the new row and column to K
|
||||
long rr = 0;
|
||||
for (typename std::list<vect_type>::iterator r = planes.begin(); r != planes.end(); ++r)
|
||||
for (long c = 0; c < planes.nc(); ++c)
|
||||
{
|
||||
K(rr, Ktmp.nc()) = dot(*r, planes.back());
|
||||
K(Ktmp.nc(), rr) = K(rr,Ktmp.nc());
|
||||
++rr;
|
||||
K(c, Ktmp.nc()) = dot(colm(planes,c), new_plane);
|
||||
K(Ktmp.nc(), c) = K(c,Ktmp.nc());
|
||||
}
|
||||
|
||||
|
||||
@ -216,23 +218,22 @@ namespace dlib
|
||||
eps = 1e-16;
|
||||
// Note that we warm start this optimization by using the alpha from the last
|
||||
// iteration as the starting point.
|
||||
solve_qp_using_smo(K, vector_to_matrix(bs), alpha, eps, sub_max_iter);
|
||||
if (require_nonnegative_w)
|
||||
solve_qp4_using_smo(planes, K, vector_to_matrix(bs), alpha, eps, sub_max_iter);
|
||||
else
|
||||
solve_qp_using_smo(K, vector_to_matrix(bs), alpha, eps, sub_max_iter);
|
||||
|
||||
// construct the w that minimized the subproblem.
|
||||
w = 0;
|
||||
rr = 0;
|
||||
for (typename std::list<vect_type>::iterator i = planes.begin(); i != planes.end(); ++i)
|
||||
w = -(planes*alpha);
|
||||
if (require_nonnegative_w)
|
||||
w = lowerbound(w,0);
|
||||
|
||||
for (long i = 0; i < alpha.size(); ++i)
|
||||
{
|
||||
if (alpha(rr) != 0)
|
||||
{
|
||||
w -= alpha(rr)*(*i);
|
||||
miss_count[rr] = 0;
|
||||
}
|
||||
if (alpha(i) != 0)
|
||||
miss_count[i] = 0;
|
||||
else
|
||||
{
|
||||
miss_count[rr] += 1;
|
||||
}
|
||||
++rr;
|
||||
miss_count[i] += 1;
|
||||
}
|
||||
|
||||
// Compute the lower bound on the true objective given to us by the cutting
|
||||
@ -245,13 +246,11 @@ namespace dlib
|
||||
while (max(vector_to_matrix(miss_count)) >= inactive_thresh)
|
||||
{
|
||||
const long idx = index_of_max(vector_to_matrix(miss_count));
|
||||
typename std::list<vect_type>::iterator i0 = planes.begin();
|
||||
advance(i0, idx);
|
||||
planes.erase(i0);
|
||||
bs.erase(bs.begin()+idx);
|
||||
miss_count.erase(miss_count.begin()+idx);
|
||||
K = removerc(K, idx, idx);
|
||||
alpha = remove_row(alpha,idx);
|
||||
planes = remove_col(planes,idx);
|
||||
}
|
||||
|
||||
++counter;
|
||||
|
@ -124,7 +124,8 @@ namespace dlib
|
||||
For reference, OCA solves optimization problems with the following form:
|
||||
Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
|
||||
|
||||
Where R(w) is a user-supplied convex function and C > 0
|
||||
Where R(w) is a user-supplied convex function and C > 0. Optionally,
|
||||
this object can also add the non-negativity constraint that min(w) >= 0.
|
||||
|
||||
|
||||
For a detailed discussion you should consult the following papers
|
||||
@ -149,7 +150,8 @@ namespace dlib
|
||||
>
|
||||
typename matrix_type::type operator() (
|
||||
const oca_problem<matrix_type>& problem,
|
||||
matrix_type& w
|
||||
matrix_type& w,
|
||||
bool require_nonnegative_w = false
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
@ -160,6 +162,10 @@ namespace dlib
|
||||
- The optimization algorithm runs until problem.optimization_status()
|
||||
indicates it is time to stop.
|
||||
- returns the objective value at the solution #w
|
||||
- if (require_nonnegative_w == true) then
|
||||
- Adds the constraint that every element of w be non-negative.
|
||||
Therefore, if this argument is true then #w won't contain any
|
||||
negative values.
|
||||
!*/
|
||||
|
||||
void set_subproblem_epsilon (
|
||||
|
@ -147,7 +147,7 @@ namespace dlib
|
||||
|
||||
// Check how big the duality gap is and stop when it goes below eps.
|
||||
// The duality gap is the gap between the objective value of the function
|
||||
// we are optimizing and the value of it's primal form. This value is always
|
||||
// we are optimizing and the value of its primal form. This value is always
|
||||
// greater than or equal to the distance to the optimum solution so it is a
|
||||
// good way to decide if we should stop. See the book referenced above for
|
||||
// more information. In particular, see the part about the Wolfe Dual.
|
||||
@ -211,6 +211,176 @@ namespace dlib
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename EXP1,
|
||||
typename EXP2,
|
||||
typename EXP3,
|
||||
typename T, long NR, long NC, typename MM, typename L
|
||||
>
|
||||
unsigned long solve_qp4_using_smo (
|
||||
const matrix_exp<EXP1>& A,
|
||||
const matrix_exp<EXP2>& Q,
|
||||
const matrix_exp<EXP3>& b,
|
||||
matrix<T,NR,NC,MM,L>& alpha,
|
||||
T eps,
|
||||
unsigned long max_iter
|
||||
)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_CASSERT(A.nc() == alpha.size() &&
|
||||
Q.nr() == Q.nc() &&
|
||||
is_col_vector(b) &&
|
||||
is_col_vector(alpha) &&
|
||||
b.size() == alpha.size() &&
|
||||
b.size() == Q.nr() &&
|
||||
alpha.size() > 0 &&
|
||||
min(alpha) >= 0 &&
|
||||
eps > 0 &&
|
||||
max_iter > 0,
|
||||
"\t void solve_qp4_using_smo()"
|
||||
<< "\n\t Invalid arguments were given to this function"
|
||||
<< "\n\t A.nc(): " << A.nc()
|
||||
<< "\n\t Q.nr(): " << Q.nr()
|
||||
<< "\n\t Q.nc(): " << Q.nc()
|
||||
<< "\n\t is_col_vector(b): " << is_col_vector(b)
|
||||
<< "\n\t is_col_vector(alpha): " << is_col_vector(alpha)
|
||||
<< "\n\t b.size(): " << b.size()
|
||||
<< "\n\t alpha.size(): " << alpha.size()
|
||||
<< "\n\t Q.nr(): " << Q.nr()
|
||||
<< "\n\t min(alpha): " << min(alpha)
|
||||
<< "\n\t eps: " << eps
|
||||
<< "\n\t max_iter: " << max_iter
|
||||
);
|
||||
|
||||
const T C = sum(alpha);
|
||||
|
||||
/*
|
||||
For this optimization problem, it is the case that the optimal
|
||||
value of lambda is given by a simple closed form expression if we
|
||||
know the optimal alpha. So what we will do is to just optimize
|
||||
alpha and every now and then we will update lambda with its optimal
|
||||
value. Therefore, we use essentially the same method as the
|
||||
solve_qp_using_smo() routine.
|
||||
*/
|
||||
|
||||
// compute optimal lambda for current alpha
|
||||
matrix<T,NR,1,MM,L> lambda = A*alpha;
|
||||
lambda = lowerbound(lambda, 0);
|
||||
|
||||
// Compute f'(alpha) (i.e. the gradient of f(alpha) with respect to alpha) for the current alpha.
|
||||
matrix<T,NR,NC,MM,L> df = Q*alpha - b - trans(A)*lambda;
|
||||
|
||||
const T tau = 1000*std::numeric_limits<T>::epsilon();
|
||||
|
||||
T big, little;
|
||||
unsigned long iter = 0;
|
||||
for (; iter < max_iter; ++iter)
|
||||
{
|
||||
// Find the two elements of df that satisfy the following:
|
||||
// - little_idx == index_of_min(df)
|
||||
// - big_idx == the index of the largest element in df such that alpha(big_idx) > 0
|
||||
// These two indices will tell us which two alpha values are most in violation of the KKT
|
||||
// optimality conditions.
|
||||
big = -std::numeric_limits<T>::max();
|
||||
long big_idx = 0;
|
||||
little = std::numeric_limits<T>::max();
|
||||
long little_idx = 0;
|
||||
for (long i = 0; i < df.nr(); ++i)
|
||||
{
|
||||
if (df(i) > big && alpha(i) > 0)
|
||||
{
|
||||
big = df(i);
|
||||
big_idx = i;
|
||||
}
|
||||
if (df(i) < little)
|
||||
{
|
||||
little = df(i);
|
||||
little_idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Check how big the duality gap is and stop when it goes below eps.
|
||||
// The duality gap is the gap between the objective value of the function
|
||||
// we are optimizing and the value of its primal form. This value is always
|
||||
// greater than or equal to the distance to the optimum solution so it is a
|
||||
// good way to decide if we should stop.
|
||||
if (trans(alpha)*df - C*little < eps)
|
||||
{
|
||||
// compute optimal lambda and recheck the duality gap to make
|
||||
// sure we have really converged.
|
||||
lambda = A*alpha;
|
||||
lambda = lowerbound(lambda, 0);
|
||||
df = Q*alpha - b - trans(A)*lambda;
|
||||
|
||||
if (trans(alpha)*df - C*min(df) < eps)
|
||||
break;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Save these values, we will need them later.
|
||||
const T old_alpha_big = alpha(big_idx);
|
||||
const T old_alpha_little = alpha(little_idx);
|
||||
|
||||
|
||||
// Now optimize the two variables we just picked.
|
||||
T quad_coef = Q(big_idx,big_idx) + Q(little_idx,little_idx) - 2*Q(big_idx, little_idx);
|
||||
if (quad_coef <= tau)
|
||||
quad_coef = tau;
|
||||
const T delta = (big - little)/quad_coef;
|
||||
alpha(big_idx) -= delta;
|
||||
alpha(little_idx) += delta;
|
||||
|
||||
// Make sure alpha stays feasible. That is, make sure the updated alpha doesn't
|
||||
// violate the non-negativity constraint.
|
||||
if (alpha(big_idx) < 0)
|
||||
{
|
||||
// Since an alpha can't be negative we will just set it to 0 and shift all the
|
||||
// weight to the other alpha.
|
||||
alpha(big_idx) = 0;
|
||||
alpha(little_idx) = old_alpha_big + old_alpha_little;
|
||||
}
|
||||
|
||||
|
||||
// Every 300 iterations
|
||||
if ((iter%300) == 299)
|
||||
{
|
||||
// compute the optimal lambda for the current alpha
|
||||
lambda = A*alpha;
|
||||
lambda = lowerbound(lambda, 0);
|
||||
|
||||
// Perform this form of the update every so often because doing so can help
|
||||
// avoid the buildup of numerical errors you get with the alternate update
|
||||
// below.
|
||||
df = Q*alpha - b - trans(A)*lambda;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Now update the gradient. We will perform the equivalent of: df = Q*alpha - b;
|
||||
const T delta_alpha_big = alpha(big_idx) - old_alpha_big;
|
||||
const T delta_alpha_little = alpha(little_idx) - old_alpha_little;
|
||||
|
||||
for(long k = 0; k < df.nr(); ++k)
|
||||
df(k) += Q(big_idx,k)*delta_alpha_big + Q(little_idx,k)*delta_alpha_little;;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
using namespace std;
|
||||
cout << "SMO: " << endl;
|
||||
cout << " duality gap: "<< trans(alpha)*df - C*min(df) << endl;
|
||||
cout << " KKT gap: "<< big-little << endl;
|
||||
cout << " iter: "<< iter+1 << endl;
|
||||
cout << " eps: "<< eps << endl;
|
||||
*/
|
||||
|
||||
|
||||
return iter+1;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_OPTIMIZATION_SOLVE_QP_UsING_SMO_H__
|
||||
|
@ -52,6 +52,57 @@ namespace dlib
|
||||
converge to eps accuracy then the number returned will be max_iter+1.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename EXP1,
|
||||
typename EXP2,
|
||||
typename EXP3,
|
||||
typename T, long NR, long NC, typename MM, typename L
|
||||
>
|
||||
unsigned long solve_qp4_using_smo (
|
||||
const matrix_exp<EXP1>& A,
|
||||
const matrix_exp<EXP2>& Q,
|
||||
const matrix_exp<EXP3>& b,
|
||||
matrix<T,NR,NC,MM,L>& alpha,
|
||||
T eps,
|
||||
unsigned long max_iter
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- A.nc() == alpha.size()
|
||||
- Q.nr() == Q.nc()
|
||||
- is_col_vector(b) == true
|
||||
- is_col_vector(alpha) == true
|
||||
- b.size() == alpha.size() == Q.nr()
|
||||
- alpha.size() > 0
|
||||
- min(alpha) >= 0
|
||||
- eps > 0
|
||||
- max_iter > 0
|
||||
ensures
|
||||
- Let C == sum(alpha) (i.e. C is the sum of the alpha values you
|
||||
supply to this function)
|
||||
- This function solves the following quadratic program:
|
||||
Minimize: f(alpha,lambda) == 0.5*trans(alpha)*Q*alpha - trans(alpha)*b +
|
||||
0.5*trans(lambda)*lambda - trans(lambda)*A*alpha
|
||||
subject to the following constraints:
|
||||
- sum(alpha) == C (i.e. the sum of alpha values doesn't change)
|
||||
- min(alpha) >= 0 (i.e. all alpha values are nonnegative)
|
||||
- min(lambda) >= 0 (i.e. all lambda values are nonnegative)
|
||||
Where f is convex. This means that Q should be positive-semidefinite.
|
||||
- The solution to the above QP will be stored in #alpha. The optimal
|
||||
lambda is not output since its value is given by the following expression:
|
||||
lowerbound(A*alpha,0)
|
||||
- This function uses a simple implementation of the sequential minimal
|
||||
optimization algorithm. It starts the algorithm with the given alpha
|
||||
and it works on the problem until the duality gap (i.e. how far away
|
||||
we are from the optimum solution) is less than eps. So eps controls
|
||||
how accurate the solution is and smaller values result in better solutions.
|
||||
- At most max_iter iterations of optimization will be performed.
|
||||
- returns the number of iterations performed. If this method fails to
|
||||
converge to eps accuracy then the number returned will be max_iter+1.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -173,8 +173,11 @@ namespace dlib
|
||||
{
|
||||
using dlib::sparse_vector::dot;
|
||||
using dlib::dot;
|
||||
// The reason for using w_size_m1 and not just w.size()-1 is because
|
||||
// doing it this way avoids an inane warning from gcc that can occur in some cases.
|
||||
const long w_size_m1 = w.size()-1;
|
||||
for (long i = 0; i < samples.size(); ++i)
|
||||
dot_prods[i] = dot(colm(w,0,w.size()-1), samples(i)) - w(w.size()-1);
|
||||
dot_prods[i] = dot(colm(w,0,w_size_m1), samples(i)) - w(w_size_m1);
|
||||
|
||||
if (is_first_call)
|
||||
{
|
||||
|
@ -74,6 +74,7 @@ set (tests
|
||||
metaprogramming.cpp
|
||||
multithreaded_object.cpp
|
||||
object_detector.cpp
|
||||
oca.cpp
|
||||
one_vs_all_trainer.cpp
|
||||
one_vs_one_trainer.cpp
|
||||
optimization.cpp
|
||||
|
@ -89,6 +89,7 @@ SRC += member_function_pointer.cpp
|
||||
SRC += metaprogramming.cpp
|
||||
SRC += multithreaded_object.cpp
|
||||
SRC += object_detector.cpp
|
||||
SRC += oca.cpp
|
||||
SRC += one_vs_all_trainer.cpp
|
||||
SRC += one_vs_one_trainer.cpp
|
||||
SRC += optimization.cpp
|
||||
|
@ -607,6 +607,31 @@ namespace
|
||||
DLIB_TEST(equal(0.0/m , zeros_matrix<double>(3,4)));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
matrix<int> m(2,3);
|
||||
m = 1,2,3,
|
||||
4,5,6;
|
||||
matrix<int> M(2,3);
|
||||
M = m;
|
||||
|
||||
DLIB_TEST(upperbound(m,6) == M);
|
||||
DLIB_TEST(upperbound(m,60) == M);
|
||||
DLIB_TEST(lowerbound(m,-2) == M);
|
||||
DLIB_TEST(lowerbound(m,0) == M);
|
||||
|
||||
M = 2,2,3,
|
||||
4,5,6;
|
||||
DLIB_TEST(lowerbound(m,2) == M);
|
||||
|
||||
M = 0,0,0,
|
||||
0,0,0;
|
||||
DLIB_TEST(upperbound(m,0) == M);
|
||||
|
||||
M = 1,2,3,
|
||||
3,3,3;
|
||||
DLIB_TEST(upperbound(m,3) == M);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
84
dlib/test/oca.cpp
Normal file
84
dlib/test/oca.cpp
Normal file
@ -0,0 +1,84 @@
|
||||
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
|
||||
|
||||
#include <dlib/optimization.h>
|
||||
#include <dlib/svm.h>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
#include <vector>
|
||||
|
||||
#include "tester.h"
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using namespace test;
|
||||
using namespace dlib;
|
||||
using namespace std;
|
||||
|
||||
logger dlog("test.oca");
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class test_oca : public tester
|
||||
{
|
||||
|
||||
public:
|
||||
test_oca (
|
||||
) :
|
||||
tester ("test_oca",
|
||||
"Runs tests on the oca component.")
|
||||
{
|
||||
}
|
||||
|
||||
void perform_test(
|
||||
)
|
||||
{
|
||||
print_spinner();
|
||||
|
||||
typedef matrix<double,0,1> w_type;
|
||||
w_type w;
|
||||
|
||||
|
||||
std::vector<w_type> x;
|
||||
w_type temp(2);
|
||||
temp = -1, 1;
|
||||
x.push_back(temp);
|
||||
temp = 1, -1;
|
||||
x.push_back(temp);
|
||||
|
||||
std::vector<double> y;
|
||||
y.push_back(+1);
|
||||
y.push_back(-1);
|
||||
|
||||
w_type true_w(3);
|
||||
|
||||
oca solver;
|
||||
|
||||
// test the version without a non-negativity constraint on w.
|
||||
solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, vector_to_matrix(x), vector_to_matrix(y), false, 1e-12, 40), w, false);
|
||||
dlog << LINFO << w;
|
||||
true_w = -0.5, 0.5, 0;
|
||||
dlog << LINFO << "error: "<< max(abs(w-true_w));
|
||||
DLIB_TEST(max(abs(w-true_w)) < 1e-10);
|
||||
|
||||
print_spinner();
|
||||
|
||||
// test the version with a non-negativity constraint on w.
|
||||
solver(make_oca_problem_c_svm<w_type>(2.0, 3.0, vector_to_matrix(x), vector_to_matrix(y), false, 1e-12, 40), w, true);
|
||||
dlog << LINFO << w;
|
||||
true_w = 0, 1, 0;
|
||||
dlog << LINFO << "error: "<< max(abs(w-true_w));
|
||||
DLIB_TEST(max(abs(w-true_w)) < 1e-10);
|
||||
}
|
||||
|
||||
} a;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -72,6 +72,290 @@ namespace
|
||||
matrix<double> Q, b;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
double compute_objective_value (
|
||||
const matrix<double,0,1>& w,
|
||||
const matrix<double>& A,
|
||||
const matrix<double,0,1>& b,
|
||||
const double C
|
||||
)
|
||||
{
|
||||
return 0.5*dot(w,w) + C*max(trans(A)*w + b);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void test_qp4_test1()
|
||||
{
|
||||
matrix<double> A(3,2);
|
||||
A = 1,2,
|
||||
-3,1,
|
||||
6,7;
|
||||
|
||||
matrix<double,0,1> b(2);
|
||||
b = 1,
|
||||
2;
|
||||
|
||||
const double C = 2;
|
||||
|
||||
matrix<double,0,1> alpha(2), true_alpha(2);
|
||||
alpha = C/2, C/2;
|
||||
|
||||
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
|
||||
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
|
||||
|
||||
dlog << LINFO << "*******************************************************";
|
||||
|
||||
dlog << LINFO << "w: " << trans(w);
|
||||
|
||||
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
|
||||
w = 0;
|
||||
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
|
||||
|
||||
dlog << LINFO << "alpha: " << trans(alpha);
|
||||
true_alpha = 0, 2;
|
||||
dlog << LINFO << "true alpha: "<< trans(true_alpha);
|
||||
|
||||
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
|
||||
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void test_qp4_test2()
|
||||
{
|
||||
matrix<double> A(3,2);
|
||||
A = 1,2,
|
||||
3,-1,
|
||||
6,7;
|
||||
|
||||
matrix<double,0,1> b(2);
|
||||
b = 1,
|
||||
2;
|
||||
|
||||
const double C = 2;
|
||||
|
||||
matrix<double,0,1> alpha(2), true_alpha(2);
|
||||
alpha = C/2, C/2;
|
||||
|
||||
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
|
||||
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
|
||||
|
||||
dlog << LINFO << "*******************************************************";
|
||||
|
||||
dlog << LINFO << "w: " << trans(w);
|
||||
|
||||
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
|
||||
w = 0, 0.25, 0;
|
||||
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
|
||||
|
||||
dlog << LINFO << "alpha: " << trans(alpha);
|
||||
true_alpha = 0.43750, 1.56250;
|
||||
dlog << LINFO << "true alpha: "<< trans(true_alpha);
|
||||
|
||||
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
|
||||
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void test_qp4_test3()
|
||||
{
|
||||
matrix<double> A(3,2);
|
||||
A = 1,2,
|
||||
-3,-1,
|
||||
6,7;
|
||||
|
||||
matrix<double,0,1> b(2);
|
||||
b = 1,
|
||||
2;
|
||||
|
||||
const double C = 2;
|
||||
|
||||
matrix<double,0,1> alpha(2), true_alpha(2);
|
||||
alpha = C/2, C/2;
|
||||
|
||||
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
|
||||
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
|
||||
|
||||
dlog << LINFO << "*******************************************************";
|
||||
|
||||
dlog << LINFO << "w: " << trans(w);
|
||||
|
||||
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
|
||||
w = 0, 2, 0;
|
||||
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
|
||||
|
||||
dlog << LINFO << "alpha: " << trans(alpha);
|
||||
true_alpha = 0, 2;
|
||||
dlog << LINFO << "true alpha: "<< trans(true_alpha);
|
||||
|
||||
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
|
||||
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void test_qp4_test5()
|
||||
{
|
||||
matrix<double> A(3,3);
|
||||
A = 1,2,4,
|
||||
3,1,6,
|
||||
6,7,-2;
|
||||
|
||||
matrix<double,0,1> b(3);
|
||||
b = 1,
|
||||
2,
|
||||
3;
|
||||
|
||||
const double C = 2;
|
||||
|
||||
matrix<double,0,1> alpha(3), true_alpha(3);
|
||||
alpha = C/2, C/2, 0;
|
||||
|
||||
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
|
||||
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
|
||||
|
||||
|
||||
dlog << LINFO << "*******************************************************";
|
||||
|
||||
dlog << LINFO << "w: " << trans(w);
|
||||
|
||||
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
|
||||
w = 0, 0, 0.11111111111111111111;
|
||||
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
|
||||
|
||||
dlog << LINFO << "alpha: " << trans(alpha);
|
||||
true_alpha = 0, 0.432098765432099, 1.567901234567901;
|
||||
dlog << LINFO << "true alpha: "<< trans(true_alpha);
|
||||
|
||||
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
|
||||
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void test_qp4_test4()
|
||||
{
|
||||
matrix<double> A(3,2);
|
||||
A = 1,2,
|
||||
3,1,
|
||||
6,7;
|
||||
|
||||
matrix<double,0,1> b(2);
|
||||
b = 1,
|
||||
2;
|
||||
|
||||
const double C = 2;
|
||||
|
||||
matrix<double,0,1> alpha(2), true_alpha(2);
|
||||
alpha = C/2, C/2;
|
||||
|
||||
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
|
||||
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
|
||||
|
||||
dlog << LINFO << "*******************************************************";
|
||||
|
||||
dlog << LINFO << "w: " << trans(w);
|
||||
|
||||
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
|
||||
w = 0, 0, 0;
|
||||
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
|
||||
|
||||
dlog << LINFO << "alpha: " << trans(alpha);
|
||||
true_alpha = 0, 2;
|
||||
dlog << LINFO << "true alpha: "<< trans(true_alpha);
|
||||
|
||||
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
|
||||
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
|
||||
}
|
||||
|
||||
void test_qp4_test6()
|
||||
{
|
||||
matrix<double> A(3,3);
|
||||
A = 1,2,4,
|
||||
3,1,6,
|
||||
6,7,-2;
|
||||
|
||||
matrix<double,0,1> b(3);
|
||||
b = -1,
|
||||
-2,
|
||||
-3;
|
||||
|
||||
const double C = 2;
|
||||
|
||||
matrix<double,0,1> alpha(3), true_alpha(3);
|
||||
alpha = C/2, C/2, 0;
|
||||
|
||||
solve_qp4_using_smo(A, tmp(trans(A)*A), b, alpha, 1e-9, 800);
|
||||
matrix<double,0,1> w = lowerbound(-A*alpha, 0);
|
||||
|
||||
dlog << LINFO << "*******************************************************";
|
||||
|
||||
dlog << LINFO << "w: " << trans(w);
|
||||
|
||||
dlog << LINFO << "computed obj: "<< compute_objective_value(w,A,b,C);
|
||||
w = 0, 0, 0;
|
||||
dlog << LINFO << "with true w obj: "<< compute_objective_value(w,A,b,C);
|
||||
|
||||
dlog << LINFO << "alpha: " << trans(alpha);
|
||||
true_alpha = 2, 0, 0;
|
||||
dlog << LINFO << "true alpha: "<< trans(true_alpha);
|
||||
|
||||
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
|
||||
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
|
||||
}
|
||||
|
||||
void test_qp4_test7()
|
||||
{
|
||||
matrix<double> A(3,3);
|
||||
A = -1,2,4,
|
||||
-3,1,6,
|
||||
-6,7,-2;
|
||||
|
||||
matrix<double,0,1> b(3);
|
||||
b = -1,
|
||||
-2,
|
||||
3;
|
||||
|
||||
matrix<double> Q(3,3);
|
||||
Q = 4,-5,6,
|
||||
1,-4,2,
|
||||
-9,-4,5;
|
||||
Q = Q*trans(Q);
|
||||
|
||||
const double C = 2;
|
||||
|
||||
matrix<double,0,1> alpha(3), true_alpha(3);
|
||||
alpha = C/2, C/2, 0;
|
||||
|
||||
solve_qp4_using_smo(A, Q, b, alpha, 1e-9, 800);
|
||||
|
||||
dlog << LINFO << "*******************************************************";
|
||||
|
||||
dlog << LINFO << "alpha: " << trans(alpha);
|
||||
true_alpha = 0, 2, 0;
|
||||
dlog << LINFO << "true alpha: "<< trans(true_alpha);
|
||||
|
||||
dlog << LINFO << "alpha error: "<< max(abs(alpha-true_alpha));
|
||||
DLIB_TEST(max(abs(alpha-true_alpha)) < 1e-9);
|
||||
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void test_solve_qp4_using_smo()
|
||||
{
|
||||
test_qp4_test1();
|
||||
test_qp4_test2();
|
||||
test_qp4_test3();
|
||||
test_qp4_test4();
|
||||
test_qp4_test5();
|
||||
test_qp4_test6();
|
||||
test_qp4_test7();
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class opt_qp_solver_tester : public tester
|
||||
@ -98,6 +382,10 @@ namespace
|
||||
void perform_test(
|
||||
)
|
||||
{
|
||||
print_spinner();
|
||||
test_solve_qp4_using_smo();
|
||||
print_spinner();
|
||||
|
||||
++thetime;
|
||||
typedef matrix<double,0,1> sample_type;
|
||||
//dlog << LINFO << "time seed: " << thetime;
|
||||
|
@ -59,14 +59,11 @@ cd examples
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
cmake --build . --config Release
|
||||
</code_box>
|
||||
Then cmake will create whatever is needed to compile the examples on your computer. If you are using Linux
|
||||
or a Mac it will create a makefile and then you can compile everything by typing:
|
||||
<code_box>
|
||||
make
|
||||
</code_box>
|
||||
Or on windows, if you have Visual Studio installed, it will make a Visual Studio project which you can then open
|
||||
and compile by clicking the build button.
|
||||
Note that you also need to have a C++ compiler installed on your system. There are free C++ compilers
|
||||
for most operating systems. For example, Visual Studio Express is free on Windows and GCC is free and
|
||||
works well on Mac OS X and Linux systems.
|
||||
|
||||
<h2>Compiling on Linux From Command Line</h2>
|
||||
From within the examples folder, you can compile any of the examples with a single command like so:
|
||||
@ -87,7 +84,7 @@ it by typing:
|
||||
sudo apt-get install libx11-dev
|
||||
</code_box>
|
||||
|
||||
<h2>Compiling on Windows using gcc</h2>
|
||||
<h2>Compiling on Windows Using GCC</h2>
|
||||
<p>
|
||||
The commands for gcc on windows are the same as above but you may also have to link
|
||||
(via the -l option) to the following libraries: gdi32, comctl32, user32, ws2_32, or imm32.
|
||||
@ -95,7 +92,7 @@ sudo apt-get install libx11-dev
|
||||
windows development than gcc.
|
||||
</p>
|
||||
|
||||
<h2>Compiling on Windows using Visual Studio</h2>
|
||||
<h2>Compiling on Windows Using Visual Studio</h2>
|
||||
<p>
|
||||
All you need to do is create an empty console project. Then add dlib/all/source.cpp to it and add the
|
||||
folder containing the dlib folder to the #include search path. Then you can compile any example program
|
||||
|
@ -557,6 +557,14 @@
|
||||
<name>clamp</name>
|
||||
<link>dlib/matrix/matrix_utilities_abstract.h.html#clamp</link>
|
||||
</item>
|
||||
<item>
|
||||
<name>lowerbound</name>
|
||||
<link>dlib/matrix/matrix_utilities_abstract.h.html#lowerbound</link>
|
||||
</item>
|
||||
<item>
|
||||
<name>upperbound</name>
|
||||
<link>dlib/matrix/matrix_utilities_abstract.h.html#upperbound</link>
|
||||
</item>
|
||||
<item>
|
||||
<name>linspace</name>
|
||||
<link>dlib/matrix/matrix_utilities_abstract.h.html#linspace</link>
|
||||
|
@ -161,7 +161,8 @@
|
||||
|
||||
<li><b>Image Processing</b>
|
||||
<ul>
|
||||
<li>Windows BMP <a href="imaging.html#load_bmp">read</a> and <a href="imaging.html#save_bmp">write</a> support</li>
|
||||
<li>Routines for <a href="imaging.html#load_image">reading</a> and
|
||||
<a href="imaging.html#save_bmp">writing</a> common image formats. </li>
|
||||
<li>Automatic color space conversion between various pixel types</li>
|
||||
<li>Common image operations such as edge finding and morphological operations</li>
|
||||
<li>Implementations of the <a href="imaging.html#get_surf_points">SURF</a>
|
||||
|
@ -43,6 +43,7 @@
|
||||
<item>solve_qp_using_smo</item>
|
||||
<item>solve_qp2_using_smo</item>
|
||||
<item>solve_qp3_using_smo</item>
|
||||
<item>solve_qp4_using_smo</item>
|
||||
<item>oca</item>
|
||||
<item>solve_least_squares</item>
|
||||
<item>solve_least_squares_lm</item>
|
||||
@ -430,6 +431,28 @@ subject to the following constraint:
|
||||
|
||||
</component>
|
||||
|
||||
<!-- ************************************************************************* -->
|
||||
|
||||
<component>
|
||||
<name>solve_qp4_using_smo</name>
|
||||
<file>dlib/optimization.h</file>
|
||||
<spec_file link="true">dlib/optimization/optimization_solve_qp_using_smo_abstract.h</spec_file>
|
||||
<description>
|
||||
This function solves the following quadratic program:
|
||||
<pre>
|
||||
Minimize: f(alpha,lambda) == 0.5*trans(alpha)*Q*alpha - trans(alpha)*b +
|
||||
0.5*trans(lambda)*lambda - trans(lambda)*A*alpha
|
||||
subject to the following constraints:
|
||||
sum(alpha) == C
|
||||
min(alpha) >= 0
|
||||
min(lambda) >= 0
|
||||
Where f is convex. This means that Q should be positive-semidefinite.
|
||||
</pre>
|
||||
|
||||
</description>
|
||||
|
||||
</component>
|
||||
|
||||
<!-- ************************************************************************* -->
|
||||
|
||||
<component>
|
||||
@ -520,7 +543,8 @@ subject to the following constraint:
|
||||
<pre>
|
||||
Minimize: f(w) == 0.5*dot(w,w) + C*R(w)
|
||||
|
||||
Where R(w) is a user-supplied convex function and C > 0
|
||||
Where R(w) is a user-supplied convex function and C > 0. Optionally,
|
||||
this object can also add the non-negativity constraint that min(w) >= 0.
|
||||
</pre>
|
||||
<br/>
|
||||
<br/>
|
||||
|
@ -13,8 +13,8 @@
|
||||
<current>
|
||||
New Stuff:
|
||||
- Image Processing:
|
||||
- Added the option to make the features generated by the poly_image
|
||||
rotationally invariant.
|
||||
- Added the option to make the features generated by poly_image rotationally
|
||||
invariant.
|
||||
- Added a set of routines for warping, scaling, and resizing images.
|
||||
See the new "Scaling and Rotating" section of the image processing
|
||||
documentation for details.
|
||||
@ -27,7 +27,7 @@ New Stuff:
|
||||
- Added the get_option() routines which slightly simplify option parsing
|
||||
from the command line and config files.
|
||||
- Added the 128bit version of Murmur hash.
|
||||
- Added the rls_filter and kalman_filter objects. These are tools for
|
||||
- Added the kalman_filter and rls_filter objects. These are tools for
|
||||
performing Kalman filtering and recursive least squares filtering.
|
||||
- Added the circular_buffer object.
|
||||
|
||||
@ -58,6 +58,9 @@ Other:
|
||||
- Turned the array object into a single implementation object. Now arrays
|
||||
can be created using the normal array<type> obj; syntax. Additionally,
|
||||
all extensions were merged into the array object.
|
||||
- Added an example program which better documents how to create training
|
||||
data for the object detection tools as well as how this data can be used.
|
||||
See the train_object_detector.cpp example for details.
|
||||
|
||||
</current>
|
||||
|
||||
|
@ -80,6 +80,7 @@
|
||||
<term file="optimization.html" name="solve_qp_using_smo"/>
|
||||
<term file="optimization.html" name="solve_qp2_using_smo"/>
|
||||
<term file="optimization.html" name="solve_qp3_using_smo"/>
|
||||
<term file="optimization.html" name="solve_qp4_using_smo"/>
|
||||
<term file="optimization.html" name="oca"/>
|
||||
<term link="optimization.html#find_min_bobyqa" name="BOBYQA"/>
|
||||
<term file="optimization.html" name="find_max"/>
|
||||
@ -490,6 +491,8 @@
|
||||
<term file="dlib/matrix/matrix_utilities_abstract.h.html" name="pixel_to_vector"/>
|
||||
<term file="dlib/matrix/matrix_utilities_abstract.h.html" name="vector_to_pixel"/>
|
||||
<term file="dlib/matrix/matrix_utilities_abstract.h.html" name="clamp"/>
|
||||
<term file="dlib/matrix/matrix_utilities_abstract.h.html" name="lowerbound"/>
|
||||
<term file="dlib/matrix/matrix_utilities_abstract.h.html" name="upperbound"/>
|
||||
|
||||
|
||||
<term file="dlib/svm/sparse_vector_abstract.h.html" name="scale_by"/>
|
||||
|
@ -2,7 +2,7 @@
|
||||
/*
|
||||
|
||||
This is an example showing how you might use dlib to create a reasonably
|
||||
function command line tool for object detection. This example assumes
|
||||
functional command line tool for object detection. This example assumes
|
||||
you are familiar with the contents of at least the following example
|
||||
programs:
|
||||
- object_detector_ex.cpp
|
||||
@ -13,17 +13,15 @@
|
||||
|
||||
This program is a command line tool for learning to detect objects in images.
|
||||
Therefore, to create an object detector it requires a set of annotated training
|
||||
images. To create this annotated data you will need to compile the imglab tool
|
||||
included with dlib. To do this, go to the tools/imglab folder and type the
|
||||
following:
|
||||
images. To create this annotated data you will need to use the imglab tool
|
||||
included with dlib. It is located in the tools/imglab folder and can be compiled
|
||||
using the following commands.
|
||||
cd tools/imglab
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make
|
||||
Note that you may need to install CMake (www.cmake.org) for this to work. Also,
|
||||
if you are using visual studio then you should use the following command to
|
||||
compile instead of "make"
|
||||
cmake --build . --config Release
|
||||
Note that you may need to install CMake (www.cmake.org) for this to work.
|
||||
|
||||
Next, lets assume you have a folder of images called /tmp/images. These images
|
||||
should contain examples of the objects you want to learn to detect. You will
|
||||
@ -35,17 +33,18 @@
|
||||
A window will appear showing all the images. You can use the up and down arrow
|
||||
keys to cycle though the images and the mouse to label objects. In particular,
|
||||
holding the shift key, left clicking, and dragging the mouse will allow you to
|
||||
draw boxes around the objects you which to detect. So next label all the objects
|
||||
draw boxes around the objects you wish to detect. So next, label all the objects
|
||||
with boxes. Note that it is important to label all the objects since any object
|
||||
not labeled is implicitly assumed to be not an object we should detect.
|
||||
|
||||
Once you finish labeling objects go to the file menu, click save, and close the
|
||||
program. This will save the object boxes back to mydataset.xml. You can verify
|
||||
Once you finish labeling objects go to the file menu, click save, and then close
|
||||
the program. This will save the object boxes back to mydataset.xml. You can verify
|
||||
this by opening the tool again with
|
||||
./imglab mydataset.xml
|
||||
and observing that the boxes are present.
|
||||
|
||||
Returning to the present example program, we can now issue the command
|
||||
Returning to the present example program, we can compile it using cmake just as we
|
||||
did with the imglab tool. Once compiled, we can issue the command
|
||||
./train_object_detector -tv mydataset.xml
|
||||
which will train an object detection model based on our labeled data. The model
|
||||
will be saved to the file object_detector.svm. Once this has finished we can use
|
||||
@ -55,8 +54,8 @@
|
||||
be indicated by a red box.
|
||||
|
||||
|
||||
There are also a number of other useful command line options in the current
|
||||
example program which you can explore.
|
||||
There are a number of other useful command line options in the current example
|
||||
program which you can explore below.
|
||||
*/
|
||||
|
||||
|
||||
@ -170,8 +169,8 @@ int main(int argc, char** argv)
|
||||
|
||||
|
||||
image_scanner_type scanner;
|
||||
setup_hashed_features(scanner, images, hash_bits);
|
||||
setup_grid_detection_templates_verbose(scanner, object_locations, grid_size, grid_size);
|
||||
setup_hashed_features(scanner, images, hash_bits);
|
||||
|
||||
structural_object_detection_trainer<image_scanner_type> trainer(scanner);
|
||||
trainer.set_num_threads(threads);
|
||||
|
Loading…
Reference in New Issue
Block a user