mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Added an implementation of the kernel recursive least squares algorithm
--HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402228
This commit is contained in:
parent
6b3a644be8
commit
bf8b42b37a
@ -4,6 +4,7 @@
|
||||
#define DLIB_SVM_HEADER
|
||||
|
||||
#include "svm/svm.h"
|
||||
#include "svm/krls.h"
|
||||
|
||||
#endif // DLIB_SVm_HEADER
|
||||
|
||||
|
268
dlib/svm/krls.h
Normal file
268
dlib/svm/krls.h
Normal file
@ -0,0 +1,268 @@
|
||||
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#ifndef DLIB_KRLs_
|
||||
#define DLIB_KRLs_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "krls_abstract.h"
|
||||
#include "../matrix.h"
|
||||
#include "function.h"
|
||||
#include "../std_allocator.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename kernel_type>
|
||||
class krls
|
||||
{
|
||||
/*!
|
||||
This is an implementation of the kernel recursive least squares algorithm described in the paper:
|
||||
The Kernel Recursive Least Squares Algorithm by Yaakov Engel.
|
||||
!*/
|
||||
|
||||
public:
|
||||
typedef typename kernel_type::scalar_type scalar_type;
|
||||
typedef typename kernel_type::sample_type sample_type;
|
||||
typedef typename kernel_type::mem_manager_type mem_manager_type;
|
||||
|
||||
|
||||
explicit krls (
|
||||
const kernel_type& kernel_,
|
||||
scalar_type tolerance_ = 0.001
|
||||
) :
|
||||
kernel(kernel_),
|
||||
tolerance(tolerance_)
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
void set_tolerance (scalar_type tolerance_)
|
||||
{
|
||||
tolerance = tolerance_;
|
||||
}
|
||||
|
||||
scalar_type get_tolerance() const
|
||||
{
|
||||
return tolerance;
|
||||
}
|
||||
|
||||
void clear ()
|
||||
{
|
||||
dictionary.clear();
|
||||
alpha.clear();
|
||||
|
||||
K_inv.set_size(0,0);
|
||||
P.set_size(0,0);
|
||||
}
|
||||
|
||||
scalar_type operator() (
|
||||
const sample_type& x
|
||||
) const
|
||||
{
|
||||
scalar_type temp = 0;
|
||||
for (unsigned long i = 0; i < alpha.size(); ++i)
|
||||
temp += alpha[i]*kernel(dictionary[i], x);
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
void train (
|
||||
const sample_type& x,
|
||||
scalar_type y
|
||||
)
|
||||
{
|
||||
const scalar_type kx = kern(x,x);
|
||||
if (alpha.size() == 0)
|
||||
{
|
||||
// set initial state since this is the first training example we have seen
|
||||
|
||||
K_inv.set_size(1,1);
|
||||
K_inv(0,0) = 1/kx;
|
||||
|
||||
alpha.push_back(y/kx);
|
||||
dictionary.push_back(x);
|
||||
P.set_size(1,1);
|
||||
P(0,0) = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// fill in k
|
||||
k.set_size(alpha.size());
|
||||
for (long r = 0; r < k.nr(); ++r)
|
||||
k(r) = kern(x,dictionary[r]);
|
||||
|
||||
// compute the error we would have if we approximated the new x sample
|
||||
// with the dictionary. That is, do the ALD test from the KRLS paper.
|
||||
a = K_inv*k;
|
||||
const scalar_type delta = kx - trans(k)*a;
|
||||
|
||||
// if this new vector isn't approximately linearly dependent on the vectors
|
||||
// in our dictionary.
|
||||
if (std::abs(delta) > tolerance)
|
||||
{
|
||||
// add x to the dictionary
|
||||
dictionary.push_back(x);
|
||||
|
||||
// update K_inv by computing the new one in the temp matrix (equation 3.14)
|
||||
matrix<scalar_type,0,0,mem_manager_type> temp(K_inv.nr()+1, K_inv.nc()+1);
|
||||
for (long r = 0; r < K_inv.nr(); ++r)
|
||||
{
|
||||
for (long c = 0; c < K_inv.nc(); ++c)
|
||||
{
|
||||
temp(r,c) = (K_inv + a*trans(a)/delta)(r,c);
|
||||
}
|
||||
}
|
||||
temp(K_inv.nr(), K_inv.nc()) = 1/delta;
|
||||
|
||||
// update the new sides of K_inv
|
||||
for (long i = 0; i < K_inv.nr(); ++i)
|
||||
{
|
||||
temp(K_inv.nr(),i) = -a(i)/delta;
|
||||
temp(i,K_inv.nr()) = -a(i)/delta;
|
||||
}
|
||||
// put temp into K_inv
|
||||
temp.swap(K_inv);
|
||||
|
||||
|
||||
// Now update the P matrix (equation 3.15)
|
||||
temp.set_size(P.nr()+1, P.nc()+1);
|
||||
for (long r = 0; r < P.nr(); ++r)
|
||||
{
|
||||
for (long c = 0; c < P.nc(); ++c)
|
||||
{
|
||||
temp(r,c) = P(r,c);
|
||||
}
|
||||
}
|
||||
// initialize the new sides of P
|
||||
for (long i = 0; i < P.nr(); ++i)
|
||||
{
|
||||
temp(P.nr(),i) = 0;
|
||||
temp(i,P.nc()) = 0;
|
||||
}
|
||||
temp(P.nr(), P.nc()) = 1;
|
||||
temp.swap(P);
|
||||
|
||||
// now update the alpha vector (equation 3.16)
|
||||
const scalar_type k_a = (y-trans(k)*vector_to_matrix(alpha))/delta;
|
||||
for (unsigned long i = 0; i < alpha.size(); ++i)
|
||||
{
|
||||
alpha[i] -= a(i)*k_a;
|
||||
}
|
||||
alpha.push_back(k_a);
|
||||
}
|
||||
else
|
||||
{
|
||||
q = P*a/(1+trans(a)*P*a);
|
||||
|
||||
// update P (equation 3.12)
|
||||
temp_matrix = trans(a)*P;
|
||||
P -= q*temp_matrix;
|
||||
|
||||
// update the alpha vector (equation 3.13)
|
||||
const scalar_type k_a = y-trans(k)*vector_to_matrix(alpha);
|
||||
for (unsigned long i = 0; i < alpha.size(); ++i)
|
||||
{
|
||||
alpha[i] += (K_inv*q*k_a)(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void swap (
|
||||
krls& item
|
||||
)
|
||||
{
|
||||
exchange(kernel, item.kernel);
|
||||
dictionary.swap(item.dictionary);
|
||||
alpha.swap(item.alpha);
|
||||
K_inv.swap(item.K_inv);
|
||||
P.swap(item.P);
|
||||
exchange(tolerance, item.tolerance);
|
||||
q.swap(item.q);
|
||||
a.swap(item.a);
|
||||
k.swap(item.k);
|
||||
temp_matrix.swap(item.temp_matrix);
|
||||
}
|
||||
|
||||
unsigned long dictionary_size (
|
||||
) const { return dictionary.size(); }
|
||||
|
||||
decision_function<kernel_type> get_decision_function (
|
||||
) const
|
||||
{
|
||||
return decision_function<kernel_type>(
|
||||
vector_to_matrix(alpha),
|
||||
0, // the KRLS algorithm doesn't have a bias term
|
||||
kernel,
|
||||
vector_to_matrix(dictionary)
|
||||
);
|
||||
}
|
||||
|
||||
friend void serialize(const krls& item, std::ostream& out)
|
||||
{
|
||||
serialize(item.kernel, out);
|
||||
serialize(item.dictionary, out);
|
||||
serialize(item.alpha, out);
|
||||
serialize(item.K_inv, out);
|
||||
serialize(item.P, out);
|
||||
serialize(item.tolerance, out);
|
||||
}
|
||||
|
||||
friend void deserialize(krls& item, std::istream& in)
|
||||
{
|
||||
deserialize(item.kernel, in);
|
||||
deserialize(item.dictionary, in);
|
||||
deserialize(item.alpha, in);
|
||||
deserialize(item.K_inv, in);
|
||||
deserialize(item.P, in);
|
||||
deserialize(item.tolerance, in);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
inline scalar_type kern (const sample_type& m1, const sample_type& m2) const
|
||||
{
|
||||
return kernel(m1,m2) + 0.001;
|
||||
}
|
||||
|
||||
|
||||
kernel_type kernel;
|
||||
|
||||
typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type;
|
||||
typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type;
|
||||
typedef std::vector<sample_type,alloc_sample_type> dictionary_vector_type;
|
||||
typedef std::vector<scalar_type,alloc_scalar_type> alpha_vector_type;
|
||||
|
||||
dictionary_vector_type dictionary;
|
||||
alpha_vector_type alpha;
|
||||
|
||||
matrix<scalar_type,0,0,mem_manager_type> K_inv;
|
||||
matrix<scalar_type,0,0,mem_manager_type> P;
|
||||
|
||||
scalar_type tolerance;
|
||||
|
||||
|
||||
// temp variables here just so we don't have to reconstruct them over and over. Thus,
|
||||
// they aren't really part of the state of this object.
|
||||
matrix<scalar_type,0,1,mem_manager_type> q;
|
||||
matrix<scalar_type,0,1,mem_manager_type> a;
|
||||
matrix<scalar_type,0,1,mem_manager_type> k;
|
||||
matrix<scalar_type,1,0,mem_manager_type> temp_matrix;
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename kernel_type>
|
||||
void swap(krls<kernel_type>& a, krls<kernel_type>& b)
|
||||
{ a.swap(b); }
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_KRLs_
|
||||
|
168
dlib/svm/krls_abstract.h
Normal file
168
dlib/svm/krls_abstract.h
Normal file
@ -0,0 +1,168 @@
|
||||
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_KRLs_ABSTRACT_
|
||||
#ifdef DLIB_KRLs_ABSTRACT_
|
||||
|
||||
#include <cmath>
|
||||
#include "../matrix/matrix_abstract.h"
|
||||
#include "../algs.h"
|
||||
#include "../serialize.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
template <
|
||||
typename kernel_type
|
||||
>
|
||||
class krls
|
||||
{
|
||||
/*!
|
||||
INITIAL VALUE
|
||||
- dictionary_size() == 0
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is an implementation of the kernel recursive least squares algorithm
|
||||
described in the paper:
|
||||
The Kernel Recursive Least Squares Algorithm by Yaakov Engel.
|
||||
|
||||
The long and short of this algorithm is that it is an online kernel based
|
||||
regression algorithm. You give it samples (x,y) and it learns the function
|
||||
f(x) == y. For a detailed description of the algorithm read the above paper.
|
||||
!*/
|
||||
|
||||
public:
|
||||
typedef typename kernel_type::scalar_type scalar_type;
|
||||
typedef typename kernel_type::sample_type sample_type;
|
||||
typedef typename kernel_type::mem_manager_type mem_manager_type;
|
||||
|
||||
|
||||
explicit krls (
|
||||
const kernel_type& kernel_,
|
||||
scalar_type tolerance_ = 0.001
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
- #get_tolerance() == tolerance_
|
||||
- #get_decision_function().kernel_function == kernel_
|
||||
(i.e. this object will use the given kernel function)
|
||||
!*/
|
||||
|
||||
void set_tolerance (
|
||||
scalar_type tolerance_
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_tolerance() == tolerance_
|
||||
!*/
|
||||
|
||||
scalar_type get_tolerance(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the tolerance to use for the approximately linearly dependent
|
||||
test in the KRLS algorithm. This is a number which governs how
|
||||
accurately this object will approximate the decision function it is
|
||||
learning. Smaller values generally result in a more accurate
|
||||
estimate while also resulting in a bigger set of support vectors in
|
||||
the learned decision function. Bigger tolerances values result in a
|
||||
less accurate decision function but also in less support vectors.
|
||||
!*/
|
||||
|
||||
void clear (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- clears out all learned data and puts this object back to its
|
||||
initial state.
|
||||
(e.g. #get_decision_function().support_vectors.size() == 0)
|
||||
- #get_tolerance() == get_tolerance()
|
||||
(i.e. doesn't change the value of the tolerance)
|
||||
!*/
|
||||
|
||||
scalar_type operator() (
|
||||
const sample_type& x
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the current y estimate for the given x
|
||||
!*/
|
||||
|
||||
void train (
|
||||
const sample_type& x,
|
||||
scalar_type y
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- trains this object that the given x should be mapped to the given y
|
||||
!*/
|
||||
|
||||
void swap (
|
||||
krls& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- swaps *this with item
|
||||
!*/
|
||||
|
||||
unsigned long dictionary_size (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of "support vectors" in the dictionary. That is,
|
||||
returns a number equal to get_decision_function().support_vectors.size()
|
||||
!*/
|
||||
|
||||
decision_function<kernel_type> get_decision_function (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns a decision function F that represents the function learned
|
||||
by this object so far. I.e. it is the case that:
|
||||
- for all x: F(x) == (*this)(x)
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename kernel_type
|
||||
>
|
||||
void swap(
|
||||
krls<kernel_type>& a,
|
||||
krls<kernel_type>& b
|
||||
)
|
||||
{ a.swap(b); }
|
||||
/*!
|
||||
provides a global swap function
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename kernel_type
|
||||
>
|
||||
void serialize (
|
||||
const krls<kernel_type>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support for krls objects
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename kernel_type
|
||||
>
|
||||
void deserialize (
|
||||
krls<kernel_type>& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides serialization support for krls objects
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_KRLs_ABSTRACT_
|
||||
|
Loading…
Reference in New Issue
Block a user