- Added a max dictionary setting to the kcentroid object

- Removed the "discount" thing and replaced it with a
     much less confusing pair of scale arguments to the
     training functions.
   - Changed the kcentroid's serialization format.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402313
This commit is contained in:
Davis King 2008-06-14 03:32:09 +00:00
parent 94ebc5c338
commit 3e1edfaa32
2 changed files with 158 additions and 94 deletions

View File

@ -36,52 +36,24 @@ namespace dlib
explicit kcentroid (
const kernel_type& kernel_,
scalar_type tolerance_ = 0.001
scalar_type tolerance_ = 0.001,
unsigned long max_dictionary_size_ = 1000000
) :
kernel(kernel_),
tolerance(tolerance_),
max_dis(1e6)
max_dictionary_size(max_dictionary_size_)
{
clear_dictionary();
}
void set_tolerance (scalar_type tolerance_)
{
// make sure requires clause is not broken
DLIB_ASSERT(tolerance_ >= 0,
"\tvoid kcentroid::set_tolerance"
<< "\n\tinvalid tolerance value"
<< "\n\ttolerance: " << tolerance_
<< "\n\tthis: " << this
);
tolerance = tolerance_;
}
scalar_type get_tolerance() const
{
return tolerance;
}
void set_max_discount (
scalar_type value
)
unsigned long get_max_dictionary_size() const
{
// make sure requires clause is not broken
DLIB_ASSERT(value >= 0,
"\tvoid kcentroid::set_max_discount"
<< "\n\tinvalid discount value"
<< "\n\tvalue: " << value
<< "\n\tthis: " << this
);
max_dis = value;
if (samples_seen > value)
samples_seen = value;
}
scalar_type get_max_discount(
) const
{
return max_dis;
return max_dictionary_size;
}
void clear_dictionary ()
@ -90,6 +62,7 @@ namespace dlib
alpha.clear();
K_inv.set_size(0,0);
K.set_size(0,0);
samples_seen = 0;
bias = 0;
}
@ -105,18 +78,50 @@ namespace dlib
return std::sqrt(kernel(x,x) + bias - 2*temp);
}
double samples_trained (
) const
{
return samples_seen;
}
scalar_type test_and_train (
const sample_type& x
)
{
return train_and_maybe_test(x,true);
++samples_seen;
const double xscale = 1.0/samples_seen;
const double cscale = 1-xscale;
return train_and_maybe_test(x,cscale,xscale,true);
}
void train (
const sample_type& x
)
{
train_and_maybe_test(x,false);
++samples_seen;
const double xscale = 1.0/samples_seen;
const double cscale = 1-xscale;
train_and_maybe_test(x,cscale,xscale,false);
}
scalar_type test_and_train (
const sample_type& x,
double cscale,
double xscale
)
{
++samples_seen;
return train_and_maybe_test(x,cscale,xscale,true);
}
void train (
const sample_type& x,
double cscale,
double xscale
)
{
++samples_seen;
train_and_maybe_test(x,cscale,xscale,false);
}
void swap (
@ -131,7 +136,6 @@ namespace dlib
exchange(tolerance, item.tolerance);
exchange(samples_seen, item.samples_seen);
exchange(bias, item.bias);
exchange(max_dis, item.max_dis);
a.swap(item.a);
k.swap(item.k);
}
@ -149,7 +153,6 @@ namespace dlib
serialize(item.tolerance, out);
serialize(item.samples_seen, out);
serialize(item.bias, out);
serialize(item.max_dis, out);
}
friend void deserialize(kcentroid& item, std::istream& in)
@ -162,13 +165,14 @@ namespace dlib
deserialize(item.tolerance, in);
deserialize(item.samples_seen, in);
deserialize(item.bias, in);
deserialize(item.max_dis, in);
}
private:
scalar_type train_and_maybe_test (
const sample_type& x,
double cscale,
double xscale,
bool do_test
)
{
@ -183,7 +187,7 @@ namespace dlib
K.set_size(1,1);
K(0,0) = kx;
alpha.push_back(1.0);
alpha.push_back(xscale);
dictionary.push_back(x);
}
else
@ -201,12 +205,25 @@ namespace dlib
// compute the error we would have if we approximated the new x sample
// with the dictionary. That is, do the ALD test from the KRLS paper.
a = K_inv*k;
const scalar_type delta = kx - trans(k)*a;
scalar_type delta = kx - trans(k)*a;
// if this new vector isn't approximately linearly dependent on the vectors
// in our dictionary.
if (std::abs(delta) > tolerance)
{
if (dictionary.size() >= max_dictionary_size)
{
// We need to remove one of the old members of the dictionary before
// we proceed with adding a new one. So remove the oldest one.
remove_dictionary_vector(0);
// recompute these guys since they were computed with the old
// kernel matrix
k = remove_row(k,0);
a = K_inv*k;
delta = kx - trans(k)*a;
}
// add x to the dictionary
dictionary.push_back(x);
@ -239,37 +256,68 @@ namespace dlib
// now update the alpha vector
const double alpha_scale = samples_seen/(samples_seen+1);
for (unsigned long i = 0; i < alpha.size(); ++i)
{
alpha[i] *= alpha_scale;
alpha[i] *= cscale;
}
alpha.push_back(1.0-alpha_scale);
alpha.push_back(xscale);
}
else
{
// update the alpha vector so that this new sample has been added into
// the mean vector we are accumulating
const double alpha_scale = samples_seen/(samples_seen+1);
const double a_scale = 1.0-alpha_scale;
for (unsigned long i = 0; i < alpha.size(); ++i)
{
alpha[i] = alpha_scale*alpha[i] + a_scale*a(i);
alpha[i] = cscale*alpha[i] + xscale*a(i);
}
}
}
++samples_seen;
// recompute the bias term
bias = sum(pointwise_multiply(K, vector_to_matrix(alpha)*trans(vector_to_matrix(alpha))));
if (samples_seen > max_dis)
samples_seen = max_dis;
return test_result;
}
void remove_dictionary_vector (
long i
)
/*!
requires
- 0 <= i < dictionary.size()
ensures
- #dictionary.size() == dictionary.size() - 1
- #alpha.size() == alpha.size() - 1
- updates the K_inv matrix so that it is still a proper inverse of the
kernel matrix
- also removes the necessary row and column from the K matrix
- uses the this->a variable so after this function runs that variable
will contain a different value.
!*/
{
// remove the dictionary vector
dictionary.erase(dictionary.begin()+i);
// remove the i'th vector from the inverse kernel matrix. This formula is basically
// just the reverse of the way K_inv is updated by equation 3.14 during normal training.
K_inv = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i);
// now compute the updated alpha values to take account that we just removed one of
// our dictionary vectors
a = (K_inv*remove_row(K,i)*vector_to_matrix(alpha));
// now copy over the new alpha values
alpha.resize(alpha.size()-1);
for (unsigned long k = 0; k < alpha.size(); ++k)
{
alpha[k] = a(k);
}
// update the K matrix as well
K = removerc(K,i,i);
}
typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type;
typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type;
@ -285,9 +333,9 @@ namespace dlib
matrix<scalar_type,0,0,mem_manager_type> K;
scalar_type tolerance;
unsigned long max_dictionary_size;
scalar_type samples_seen;
scalar_type bias;
scalar_type max_dis;
// temp variables here just so we don't have to reconstruct them over and over. Thus,

View File

@ -23,7 +23,7 @@ namespace dlib
INITIAL VALUE
- dictionary_size() == 0
- max_discount() == 1e6
- samples_trained() == 0
WHAT THIS OBJECT REPRESENTS
This is an implementation of an online algorithm for recursively estimating the
@ -34,6 +34,13 @@ namespace dlib
and any test points. So you can use this object to predict how similar a test
point is to the data this object has been trained on (larger distances from the
centroid indicate dissimilarity/anomalous points).
Also note that the algorithm internally keeps a set of "dictionary vectors"
that are used to represent the regression function. You can force the
algorithm to use no more than a set number of vectors by setting
the 3rd constructor argument to whatever you want. However, note that
doing this causes the algorithm to bias it's results towards more
recent training examples.
!*/
public:
@ -44,7 +51,8 @@ namespace dlib
explicit kcentroid (
const kernel_type& kernel_,
scalar_type tolerance_ = 0.001
scalar_type tolerance_ = 0.001,
unsigned long max_dictionary_size_ = 1000000
);
/*!
ensures
@ -52,16 +60,23 @@ namespace dlib
- #get_tolerance() == tolerance_
- #get_decision_function().kernel_function == kernel_
(i.e. this object will use the given kernel function)
- #get_max_dictionary_size() == max_dictionary_size_
!*/
void set_tolerance (
scalar_type tolerance_
);
unsigned long get_max_dictionary_size(
) const;
/*!
requires
- tolerance_ >= 0
ensures
- #get_tolerance() == tolerance_
- returns the maximum number of dictionary vectors this object
will use at a time. That is, dictionary_size() will never be
greater than get_max_dictionary_size().
!*/
scalar_type samples_trained (
) const;
/*!
ensures
- returns the number of samples this object has been trained on so far
!*/
scalar_type get_tolerance(
@ -77,42 +92,12 @@ namespace dlib
less accurate estimate but also in less support vectors.
!*/
void set_max_discount (
scalar_type value
);
/*!
requires
- value > 0
ensures
- #get_max_discount() == value
!*/
scalar_type get_max_discount(
) const;
/*!
ensures
- If you have shown this object N samples so far then it has found
the centroid of those N samples. That is, it has found the average
of all of them in some high dimensional feature space.
- if (N <= get_max_discount()) then
- The next sample you show this object will be added to the centroid
with a weight of 1/(N+1).
- else
- The next sample you show this object will be added to the centroid
with a weight of 1/(get_max_discount()+1).
- If you think your samples are from a stationary source then you
should set the max discount to some really big number. However,
if you think the source isn't stationary then use a smaller number.
This will cause the centroid in this object to be closer to the
centroid of the more recent points.
!*/
void clear_dictionary (
);
/*!
ensures
- clears out all learned data (e.g. #dictionary_size() == 0)
- #samples_seen() == 0
!*/
scalar_type operator() (
@ -125,6 +110,31 @@ namespace dlib
to this object so far.
!*/
void train (
const sample_type& x
);
/*!
ensures
- adds the sample x into the current estimate of the centroid
- also note that calling this function is equivalent to calling
train(x, samples_trained()/(samples_trained()+1.0, 1.0/(samples_trained()+1.0).
That is, this function finds the normal unweighted centroid of all training points.
!*/
void train (
const sample_type& x,
double cscale,
double xscale
);
/*!
ensures
- adds the sample x into the current estimate of the centroid but
uses a user given scale. That is, this function performs:
- new_centroid = cscale*old_centroid + xscale*x
- This function allows you to weight different samples however
you want.
!*/
scalar_type test_and_train (
const sample_type& x
);
@ -137,12 +147,18 @@ namespace dlib
than calling both individually.
!*/
void train (
const sample_type& x
scalar_type test_and_train (
const sample_type& x,
double cscale,
double xscale
);
/*!
ensures
- adds the sample x into the current estimate of the centroid
- calls train(x,cscale,xscale)
- returns (*this)(x)
- The reason this function exists is because train() and operator()
both compute some of the same things. So this function is more efficient
than calling both individually.
!*/
void swap (