Added an implementation of the kernel recursive least squares algorithm

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402228
This commit is contained in:
Davis King 2008-05-13 01:13:18 +00:00
parent 6b3a644be8
commit bf8b42b37a
3 changed files with 437 additions and 0 deletions

View File

@ -4,6 +4,7 @@
#define DLIB_SVM_HEADER
#include "svm/svm.h"
#include "svm/krls.h"
#endif // DLIB_SVm_HEADER

268
dlib/svm/krls.h Normal file
View File

@ -0,0 +1,268 @@
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_KRLs_
#define DLIB_KRLs_
#include <vector>
#include "krls_abstract.h"
#include "../matrix.h"
#include "function.h"
#include "../std_allocator.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <typename kernel_type>
class krls
{
/*!
This is an implementation of the kernel recursive least squares algorithm described in the paper:
The Kernel Recursive Least Squares Algorithm by Yaakov Engel.
!*/
public:
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
explicit krls (
const kernel_type& kernel_,
scalar_type tolerance_ = 0.001
) :
kernel(kernel_),
tolerance(tolerance_)
{
clear();
}
void set_tolerance (scalar_type tolerance_)
{
tolerance = tolerance_;
}
scalar_type get_tolerance() const
{
return tolerance;
}
void clear ()
{
dictionary.clear();
alpha.clear();
K_inv.set_size(0,0);
P.set_size(0,0);
}
scalar_type operator() (
const sample_type& x
) const
{
scalar_type temp = 0;
for (unsigned long i = 0; i < alpha.size(); ++i)
temp += alpha[i]*kernel(dictionary[i], x);
return temp;
}
void train (
const sample_type& x,
scalar_type y
)
{
const scalar_type kx = kern(x,x);
if (alpha.size() == 0)
{
// set initial state since this is the first training example we have seen
K_inv.set_size(1,1);
K_inv(0,0) = 1/kx;
alpha.push_back(y/kx);
dictionary.push_back(x);
P.set_size(1,1);
P(0,0) = 1;
}
else
{
// fill in k
k.set_size(alpha.size());
for (long r = 0; r < k.nr(); ++r)
k(r) = kern(x,dictionary[r]);
// compute the error we would have if we approximated the new x sample
// with the dictionary. That is, do the ALD test from the KRLS paper.
a = K_inv*k;
const scalar_type delta = kx - trans(k)*a;
// if this new vector isn't approximately linearly dependent on the vectors
// in our dictionary.
if (std::abs(delta) > tolerance)
{
// add x to the dictionary
dictionary.push_back(x);
// update K_inv by computing the new one in the temp matrix (equation 3.14)
matrix<scalar_type,0,0,mem_manager_type> temp(K_inv.nr()+1, K_inv.nc()+1);
for (long r = 0; r < K_inv.nr(); ++r)
{
for (long c = 0; c < K_inv.nc(); ++c)
{
temp(r,c) = (K_inv + a*trans(a)/delta)(r,c);
}
}
temp(K_inv.nr(), K_inv.nc()) = 1/delta;
// update the new sides of K_inv
for (long i = 0; i < K_inv.nr(); ++i)
{
temp(K_inv.nr(),i) = -a(i)/delta;
temp(i,K_inv.nr()) = -a(i)/delta;
}
// put temp into K_inv
temp.swap(K_inv);
// Now update the P matrix (equation 3.15)
temp.set_size(P.nr()+1, P.nc()+1);
for (long r = 0; r < P.nr(); ++r)
{
for (long c = 0; c < P.nc(); ++c)
{
temp(r,c) = P(r,c);
}
}
// initialize the new sides of P
for (long i = 0; i < P.nr(); ++i)
{
temp(P.nr(),i) = 0;
temp(i,P.nc()) = 0;
}
temp(P.nr(), P.nc()) = 1;
temp.swap(P);
// now update the alpha vector (equation 3.16)
const scalar_type k_a = (y-trans(k)*vector_to_matrix(alpha))/delta;
for (unsigned long i = 0; i < alpha.size(); ++i)
{
alpha[i] -= a(i)*k_a;
}
alpha.push_back(k_a);
}
else
{
q = P*a/(1+trans(a)*P*a);
// update P (equation 3.12)
temp_matrix = trans(a)*P;
P -= q*temp_matrix;
// update the alpha vector (equation 3.13)
const scalar_type k_a = y-trans(k)*vector_to_matrix(alpha);
for (unsigned long i = 0; i < alpha.size(); ++i)
{
alpha[i] += (K_inv*q*k_a)(i);
}
}
}
}
void swap (
krls& item
)
{
exchange(kernel, item.kernel);
dictionary.swap(item.dictionary);
alpha.swap(item.alpha);
K_inv.swap(item.K_inv);
P.swap(item.P);
exchange(tolerance, item.tolerance);
q.swap(item.q);
a.swap(item.a);
k.swap(item.k);
temp_matrix.swap(item.temp_matrix);
}
unsigned long dictionary_size (
) const { return dictionary.size(); }
decision_function<kernel_type> get_decision_function (
) const
{
return decision_function<kernel_type>(
vector_to_matrix(alpha),
0, // the KRLS algorithm doesn't have a bias term
kernel,
vector_to_matrix(dictionary)
);
}
friend void serialize(const krls& item, std::ostream& out)
{
serialize(item.kernel, out);
serialize(item.dictionary, out);
serialize(item.alpha, out);
serialize(item.K_inv, out);
serialize(item.P, out);
serialize(item.tolerance, out);
}
friend void deserialize(krls& item, std::istream& in)
{
deserialize(item.kernel, in);
deserialize(item.dictionary, in);
deserialize(item.alpha, in);
deserialize(item.K_inv, in);
deserialize(item.P, in);
deserialize(item.tolerance, in);
}
private:
inline scalar_type kern (const sample_type& m1, const sample_type& m2) const
{
return kernel(m1,m2) + 0.001;
}
kernel_type kernel;
typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type;
typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type;
typedef std::vector<sample_type,alloc_sample_type> dictionary_vector_type;
typedef std::vector<scalar_type,alloc_scalar_type> alpha_vector_type;
dictionary_vector_type dictionary;
alpha_vector_type alpha;
matrix<scalar_type,0,0,mem_manager_type> K_inv;
matrix<scalar_type,0,0,mem_manager_type> P;
scalar_type tolerance;
// temp variables here just so we don't have to reconstruct them over and over. Thus,
// they aren't really part of the state of this object.
matrix<scalar_type,0,1,mem_manager_type> q;
matrix<scalar_type,0,1,mem_manager_type> a;
matrix<scalar_type,0,1,mem_manager_type> k;
matrix<scalar_type,1,0,mem_manager_type> temp_matrix;
};
// ----------------------------------------------------------------------------------------
template <typename kernel_type>
void swap(krls<kernel_type>& a, krls<kernel_type>& b)
{ a.swap(b); }
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_KRLs_

168
dlib/svm/krls_abstract.h Normal file
View File

@ -0,0 +1,168 @@
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_KRLs_ABSTRACT_
#ifdef DLIB_KRLs_ABSTRACT_
#include <cmath>
#include "../matrix/matrix_abstract.h"
#include "../algs.h"
#include "../serialize.h"
namespace dlib
{
template <
typename kernel_type
>
class krls
{
/*!
INITIAL VALUE
- dictionary_size() == 0
WHAT THIS OBJECT REPRESENTS
This is an implementation of the kernel recursive least squares algorithm
described in the paper:
The Kernel Recursive Least Squares Algorithm by Yaakov Engel.
The long and short of this algorithm is that it is an online kernel based
regression algorithm. You give it samples (x,y) and it learns the function
f(x) == y. For a detailed description of the algorithm read the above paper.
!*/
public:
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
explicit krls (
const kernel_type& kernel_,
scalar_type tolerance_ = 0.001
);
/*!
ensures
- this object is properly initialized
- #get_tolerance() == tolerance_
- #get_decision_function().kernel_function == kernel_
(i.e. this object will use the given kernel function)
!*/
void set_tolerance (
scalar_type tolerance_
);
/*!
ensures
- #get_tolerance() == tolerance_
!*/
scalar_type get_tolerance(
) const;
/*!
ensures
- returns the tolerance to use for the approximately linearly dependent
test in the KRLS algorithm. This is a number which governs how
accurately this object will approximate the decision function it is
learning. Smaller values generally result in a more accurate
estimate while also resulting in a bigger set of support vectors in
the learned decision function. Bigger tolerances values result in a
less accurate decision function but also in less support vectors.
!*/
void clear (
);
/*!
ensures
- clears out all learned data and puts this object back to its
initial state.
(e.g. #get_decision_function().support_vectors.size() == 0)
- #get_tolerance() == get_tolerance()
(i.e. doesn't change the value of the tolerance)
!*/
scalar_type operator() (
const sample_type& x
) const;
/*!
ensures
- returns the current y estimate for the given x
!*/
void train (
const sample_type& x,
scalar_type y
);
/*!
ensures
- trains this object that the given x should be mapped to the given y
!*/
void swap (
krls& item
);
/*!
ensures
- swaps *this with item
!*/
unsigned long dictionary_size (
) const;
/*!
ensures
- returns the number of "support vectors" in the dictionary. That is,
returns a number equal to get_decision_function().support_vectors.size()
!*/
decision_function<kernel_type> get_decision_function (
) const;
/*!
ensures
- returns a decision function F that represents the function learned
by this object so far. I.e. it is the case that:
- for all x: F(x) == (*this)(x)
!*/
};
// ----------------------------------------------------------------------------------------
template <
typename kernel_type
>
void swap(
krls<kernel_type>& a,
krls<kernel_type>& b
)
{ a.swap(b); }
/*!
provides a global swap function
!*/
template <
typename kernel_type
>
void serialize (
const krls<kernel_type>& item,
std::ostream& out
);
/*!
provides serialization support for krls objects
!*/
template <
typename kernel_type
>
void deserialize (
krls<kernel_type>& item,
std::istream& in
);
/*!
provides serialization support for krls objects
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_KRLs_ABSTRACT_