mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
- Added a max dictionary setting to the kcentroid object
- Removed the "discount" thing and replaced it with a much less confusing pair of scale arguments to the training functions. - Changed the kcentroid's serialization format. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402313
This commit is contained in:
parent
94ebc5c338
commit
3e1edfaa32
@ -36,52 +36,24 @@ namespace dlib
|
||||
|
||||
explicit kcentroid (
|
||||
const kernel_type& kernel_,
|
||||
scalar_type tolerance_ = 0.001
|
||||
scalar_type tolerance_ = 0.001,
|
||||
unsigned long max_dictionary_size_ = 1000000
|
||||
) :
|
||||
kernel(kernel_),
|
||||
tolerance(tolerance_),
|
||||
max_dis(1e6)
|
||||
max_dictionary_size(max_dictionary_size_)
|
||||
{
|
||||
clear_dictionary();
|
||||
}
|
||||
|
||||
void set_tolerance (scalar_type tolerance_)
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(tolerance_ >= 0,
|
||||
"\tvoid kcentroid::set_tolerance"
|
||||
<< "\n\tinvalid tolerance value"
|
||||
<< "\n\ttolerance: " << tolerance_
|
||||
<< "\n\tthis: " << this
|
||||
);
|
||||
tolerance = tolerance_;
|
||||
}
|
||||
|
||||
scalar_type get_tolerance() const
|
||||
{
|
||||
return tolerance;
|
||||
}
|
||||
|
||||
void set_max_discount (
|
||||
scalar_type value
|
||||
)
|
||||
unsigned long get_max_dictionary_size() const
|
||||
{
|
||||
// make sure requires clause is not broken
|
||||
DLIB_ASSERT(value >= 0,
|
||||
"\tvoid kcentroid::set_max_discount"
|
||||
<< "\n\tinvalid discount value"
|
||||
<< "\n\tvalue: " << value
|
||||
<< "\n\tthis: " << this
|
||||
);
|
||||
max_dis = value;
|
||||
if (samples_seen > value)
|
||||
samples_seen = value;
|
||||
}
|
||||
|
||||
scalar_type get_max_discount(
|
||||
) const
|
||||
{
|
||||
return max_dis;
|
||||
return max_dictionary_size;
|
||||
}
|
||||
|
||||
void clear_dictionary ()
|
||||
@ -90,6 +62,7 @@ namespace dlib
|
||||
alpha.clear();
|
||||
|
||||
K_inv.set_size(0,0);
|
||||
K.set_size(0,0);
|
||||
samples_seen = 0;
|
||||
bias = 0;
|
||||
}
|
||||
@ -105,18 +78,50 @@ namespace dlib
|
||||
return std::sqrt(kernel(x,x) + bias - 2*temp);
|
||||
}
|
||||
|
||||
double samples_trained (
|
||||
) const
|
||||
{
|
||||
return samples_seen;
|
||||
}
|
||||
|
||||
scalar_type test_and_train (
|
||||
const sample_type& x
|
||||
)
|
||||
{
|
||||
return train_and_maybe_test(x,true);
|
||||
++samples_seen;
|
||||
const double xscale = 1.0/samples_seen;
|
||||
const double cscale = 1-xscale;
|
||||
return train_and_maybe_test(x,cscale,xscale,true);
|
||||
}
|
||||
|
||||
void train (
|
||||
const sample_type& x
|
||||
)
|
||||
{
|
||||
train_and_maybe_test(x,false);
|
||||
++samples_seen;
|
||||
const double xscale = 1.0/samples_seen;
|
||||
const double cscale = 1-xscale;
|
||||
train_and_maybe_test(x,cscale,xscale,false);
|
||||
}
|
||||
|
||||
scalar_type test_and_train (
|
||||
const sample_type& x,
|
||||
double cscale,
|
||||
double xscale
|
||||
)
|
||||
{
|
||||
++samples_seen;
|
||||
return train_and_maybe_test(x,cscale,xscale,true);
|
||||
}
|
||||
|
||||
void train (
|
||||
const sample_type& x,
|
||||
double cscale,
|
||||
double xscale
|
||||
)
|
||||
{
|
||||
++samples_seen;
|
||||
train_and_maybe_test(x,cscale,xscale,false);
|
||||
}
|
||||
|
||||
void swap (
|
||||
@ -131,7 +136,6 @@ namespace dlib
|
||||
exchange(tolerance, item.tolerance);
|
||||
exchange(samples_seen, item.samples_seen);
|
||||
exchange(bias, item.bias);
|
||||
exchange(max_dis, item.max_dis);
|
||||
a.swap(item.a);
|
||||
k.swap(item.k);
|
||||
}
|
||||
@ -149,7 +153,6 @@ namespace dlib
|
||||
serialize(item.tolerance, out);
|
||||
serialize(item.samples_seen, out);
|
||||
serialize(item.bias, out);
|
||||
serialize(item.max_dis, out);
|
||||
}
|
||||
|
||||
friend void deserialize(kcentroid& item, std::istream& in)
|
||||
@ -162,13 +165,14 @@ namespace dlib
|
||||
deserialize(item.tolerance, in);
|
||||
deserialize(item.samples_seen, in);
|
||||
deserialize(item.bias, in);
|
||||
deserialize(item.max_dis, in);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
scalar_type train_and_maybe_test (
|
||||
const sample_type& x,
|
||||
double cscale,
|
||||
double xscale,
|
||||
bool do_test
|
||||
)
|
||||
{
|
||||
@ -183,7 +187,7 @@ namespace dlib
|
||||
K.set_size(1,1);
|
||||
K(0,0) = kx;
|
||||
|
||||
alpha.push_back(1.0);
|
||||
alpha.push_back(xscale);
|
||||
dictionary.push_back(x);
|
||||
}
|
||||
else
|
||||
@ -201,12 +205,25 @@ namespace dlib
|
||||
// compute the error we would have if we approximated the new x sample
|
||||
// with the dictionary. That is, do the ALD test from the KRLS paper.
|
||||
a = K_inv*k;
|
||||
const scalar_type delta = kx - trans(k)*a;
|
||||
scalar_type delta = kx - trans(k)*a;
|
||||
|
||||
// if this new vector isn't approximately linearly dependent on the vectors
|
||||
// in our dictionary.
|
||||
if (std::abs(delta) > tolerance)
|
||||
{
|
||||
if (dictionary.size() >= max_dictionary_size)
|
||||
{
|
||||
// We need to remove one of the old members of the dictionary before
|
||||
// we proceed with adding a new one. So remove the oldest one.
|
||||
remove_dictionary_vector(0);
|
||||
|
||||
// recompute these guys since they were computed with the old
|
||||
// kernel matrix
|
||||
k = remove_row(k,0);
|
||||
a = K_inv*k;
|
||||
delta = kx - trans(k)*a;
|
||||
}
|
||||
|
||||
// add x to the dictionary
|
||||
dictionary.push_back(x);
|
||||
|
||||
@ -239,37 +256,68 @@ namespace dlib
|
||||
|
||||
|
||||
// now update the alpha vector
|
||||
const double alpha_scale = samples_seen/(samples_seen+1);
|
||||
for (unsigned long i = 0; i < alpha.size(); ++i)
|
||||
{
|
||||
alpha[i] *= alpha_scale;
|
||||
alpha[i] *= cscale;
|
||||
}
|
||||
alpha.push_back(1.0-alpha_scale);
|
||||
alpha.push_back(xscale);
|
||||
}
|
||||
else
|
||||
{
|
||||
// update the alpha vector so that this new sample has been added into
|
||||
// the mean vector we are accumulating
|
||||
const double alpha_scale = samples_seen/(samples_seen+1);
|
||||
const double a_scale = 1.0-alpha_scale;
|
||||
for (unsigned long i = 0; i < alpha.size(); ++i)
|
||||
{
|
||||
alpha[i] = alpha_scale*alpha[i] + a_scale*a(i);
|
||||
alpha[i] = cscale*alpha[i] + xscale*a(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
++samples_seen;
|
||||
|
||||
// recompute the bias term
|
||||
bias = sum(pointwise_multiply(K, vector_to_matrix(alpha)*trans(vector_to_matrix(alpha))));
|
||||
|
||||
if (samples_seen > max_dis)
|
||||
samples_seen = max_dis;
|
||||
|
||||
return test_result;
|
||||
}
|
||||
|
||||
void remove_dictionary_vector (
|
||||
long i
|
||||
)
|
||||
/*!
|
||||
requires
|
||||
- 0 <= i < dictionary.size()
|
||||
ensures
|
||||
- #dictionary.size() == dictionary.size() - 1
|
||||
- #alpha.size() == alpha.size() - 1
|
||||
- updates the K_inv matrix so that it is still a proper inverse of the
|
||||
kernel matrix
|
||||
- also removes the necessary row and column from the K matrix
|
||||
- uses the this->a variable so after this function runs that variable
|
||||
will contain a different value.
|
||||
!*/
|
||||
{
|
||||
// remove the dictionary vector
|
||||
dictionary.erase(dictionary.begin()+i);
|
||||
|
||||
// remove the i'th vector from the inverse kernel matrix. This formula is basically
|
||||
// just the reverse of the way K_inv is updated by equation 3.14 during normal training.
|
||||
K_inv = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i);
|
||||
|
||||
// now compute the updated alpha values to take account that we just removed one of
|
||||
// our dictionary vectors
|
||||
a = (K_inv*remove_row(K,i)*vector_to_matrix(alpha));
|
||||
|
||||
// now copy over the new alpha values
|
||||
alpha.resize(alpha.size()-1);
|
||||
for (unsigned long k = 0; k < alpha.size(); ++k)
|
||||
{
|
||||
alpha[k] = a(k);
|
||||
}
|
||||
|
||||
// update the K matrix as well
|
||||
K = removerc(K,i,i);
|
||||
}
|
||||
|
||||
|
||||
|
||||
typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type;
|
||||
typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type;
|
||||
@ -285,9 +333,9 @@ namespace dlib
|
||||
matrix<scalar_type,0,0,mem_manager_type> K;
|
||||
|
||||
scalar_type tolerance;
|
||||
unsigned long max_dictionary_size;
|
||||
scalar_type samples_seen;
|
||||
scalar_type bias;
|
||||
scalar_type max_dis;
|
||||
|
||||
|
||||
// temp variables here just so we don't have to reconstruct them over and over. Thus,
|
||||
|
@ -23,7 +23,7 @@ namespace dlib
|
||||
|
||||
INITIAL VALUE
|
||||
- dictionary_size() == 0
|
||||
- max_discount() == 1e6
|
||||
- samples_trained() == 0
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is an implementation of an online algorithm for recursively estimating the
|
||||
@ -34,6 +34,13 @@ namespace dlib
|
||||
and any test points. So you can use this object to predict how similar a test
|
||||
point is to the data this object has been trained on (larger distances from the
|
||||
centroid indicate dissimilarity/anomalous points).
|
||||
|
||||
Also note that the algorithm internally keeps a set of "dictionary vectors"
|
||||
that are used to represent the regression function. You can force the
|
||||
algorithm to use no more than a set number of vectors by setting
|
||||
the 3rd constructor argument to whatever you want. However, note that
|
||||
doing this causes the algorithm to bias it's results towards more
|
||||
recent training examples.
|
||||
!*/
|
||||
|
||||
public:
|
||||
@ -44,7 +51,8 @@ namespace dlib
|
||||
|
||||
explicit kcentroid (
|
||||
const kernel_type& kernel_,
|
||||
scalar_type tolerance_ = 0.001
|
||||
scalar_type tolerance_ = 0.001,
|
||||
unsigned long max_dictionary_size_ = 1000000
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
@ -52,16 +60,23 @@ namespace dlib
|
||||
- #get_tolerance() == tolerance_
|
||||
- #get_decision_function().kernel_function == kernel_
|
||||
(i.e. this object will use the given kernel function)
|
||||
- #get_max_dictionary_size() == max_dictionary_size_
|
||||
!*/
|
||||
|
||||
void set_tolerance (
|
||||
scalar_type tolerance_
|
||||
);
|
||||
unsigned long get_max_dictionary_size(
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- tolerance_ >= 0
|
||||
ensures
|
||||
- #get_tolerance() == tolerance_
|
||||
- returns the maximum number of dictionary vectors this object
|
||||
will use at a time. That is, dictionary_size() will never be
|
||||
greater than get_max_dictionary_size().
|
||||
!*/
|
||||
|
||||
scalar_type samples_trained (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of samples this object has been trained on so far
|
||||
!*/
|
||||
|
||||
scalar_type get_tolerance(
|
||||
@ -77,42 +92,12 @@ namespace dlib
|
||||
less accurate estimate but also in less support vectors.
|
||||
!*/
|
||||
|
||||
void set_max_discount (
|
||||
scalar_type value
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- value > 0
|
||||
ensures
|
||||
- #get_max_discount() == value
|
||||
!*/
|
||||
|
||||
scalar_type get_max_discount(
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- If you have shown this object N samples so far then it has found
|
||||
the centroid of those N samples. That is, it has found the average
|
||||
of all of them in some high dimensional feature space.
|
||||
- if (N <= get_max_discount()) then
|
||||
- The next sample you show this object will be added to the centroid
|
||||
with a weight of 1/(N+1).
|
||||
- else
|
||||
- The next sample you show this object will be added to the centroid
|
||||
with a weight of 1/(get_max_discount()+1).
|
||||
|
||||
- If you think your samples are from a stationary source then you
|
||||
should set the max discount to some really big number. However,
|
||||
if you think the source isn't stationary then use a smaller number.
|
||||
This will cause the centroid in this object to be closer to the
|
||||
centroid of the more recent points.
|
||||
!*/
|
||||
|
||||
void clear_dictionary (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- clears out all learned data (e.g. #dictionary_size() == 0)
|
||||
- #samples_seen() == 0
|
||||
!*/
|
||||
|
||||
scalar_type operator() (
|
||||
@ -125,6 +110,31 @@ namespace dlib
|
||||
to this object so far.
|
||||
!*/
|
||||
|
||||
void train (
|
||||
const sample_type& x
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- adds the sample x into the current estimate of the centroid
|
||||
- also note that calling this function is equivalent to calling
|
||||
train(x, samples_trained()/(samples_trained()+1.0, 1.0/(samples_trained()+1.0).
|
||||
That is, this function finds the normal unweighted centroid of all training points.
|
||||
!*/
|
||||
|
||||
void train (
|
||||
const sample_type& x,
|
||||
double cscale,
|
||||
double xscale
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- adds the sample x into the current estimate of the centroid but
|
||||
uses a user given scale. That is, this function performs:
|
||||
- new_centroid = cscale*old_centroid + xscale*x
|
||||
- This function allows you to weight different samples however
|
||||
you want.
|
||||
!*/
|
||||
|
||||
scalar_type test_and_train (
|
||||
const sample_type& x
|
||||
);
|
||||
@ -137,12 +147,18 @@ namespace dlib
|
||||
than calling both individually.
|
||||
!*/
|
||||
|
||||
void train (
|
||||
const sample_type& x
|
||||
scalar_type test_and_train (
|
||||
const sample_type& x,
|
||||
double cscale,
|
||||
double xscale
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- adds the sample x into the current estimate of the centroid
|
||||
- calls train(x,cscale,xscale)
|
||||
- returns (*this)(x)
|
||||
- The reason this function exists is because train() and operator()
|
||||
both compute some of the same things. So this function is more efficient
|
||||
than calling both individually.
|
||||
!*/
|
||||
|
||||
void swap (
|
||||
|
Loading…
Reference in New Issue
Block a user