dlib/examples/kcentroid_ex.cpp
Davis King 9de97a2e3e Improved this example
--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402341
2008-06-19 02:21:56 +00:00

117 lines
5.5 KiB
C++

/*
This is an example illustrating the use of the kcentroid object
from the dlib C++ Library.
The kcentroid object is an implementation of an algorithm that recursively
computes the centroid (i.e. average) of a set of points. The interesting
thing about dlib::kcentroid is that it does so in a kernel induced feature
space. This means that you can use it as a non-linear one-class classifier.
So you might use it to perform online novelty detection.
This example will train an instance of it on points from the sinc function.
*/
#include <iostream>
#include <vector>
#include "dlib/svm.h"
#include "dlib/statistics.h"
using namespace std;
using namespace dlib;
// Here is the sinc function we will be trying to learn with the krls
// object.
double sinc(double x)
{
if (x == 0)
return 1;
return sin(x)/x;
}
int main()
{
// Here we declare that our samples will be 2 dimensional column vectors.
typedef matrix<double,2,1> sample_type;
// Now we are making a typedef for the kind of kernel we want to use. I picked the
// radial basis kernel because it only has one parameter and generally gives good
// results without much fiddling.
typedef radial_basis_kernel<sample_type> kernel_type;
// Here we declare an instance of the kcentroid object. The first argument to the constructor
// is the kernel we wish to use. The second is a parameter that determines the numerical
// accuracy with which the object will perform part of the learning algorithm. Generally
// smaller values give better results but cause the algorithm to run slower. You just have
// to play with it to decide what balance of speed and accuracy is right for your problem.
// Here we have set it to 0.01.
kcentroid<kernel_type> test(kernel_type(0.1),0.01);
// now we train our object on a few samples of the sinc function.
sample_type m;
for (double x = -15; x <= 8; x += 1)
{
m(0) = x;
m(1) = sinc(x);
test.train(m);
}
running_stats<double> rs;
// Now lets output the distance from the centroid to some points that are from the sinc function.
// These numbers should all be similar. We will also calculate the statistics of these numbers
// by accumulating them into the running_stats object called rs. This will let us easily
// find the mean and standard deviation of the distances for use below.
cout << "Points that are on the sinc function:\n";
m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));
m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));
m(0) = -0; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));
m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));
m(0) = -4.1; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));
m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));
m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));
cout << endl;
// Lets output the distance from the centroid to some points that are NOT from the sinc function.
// These numbers should all be significantly bigger than previous set of numbers. We will also
// use the rs.scale() function to find out how many standard deviations they are away from the
// mean of the test points from the sinc function. So in this case our criterion for "significantly bigger"
// is > 3 or 4 standard deviations away from the above points that actually are on the sinc function.
cout << "Points that are NOT on the sinc function:\n";
m(0) = -1.5; m(1) = sinc(m(0))+4; cout << " " << test(m) << " is standard deviations from sinc: " << rs.scale(test(m)) << endl;
m(0) = -1.5; m(1) = sinc(m(0))+3; cout << " " << test(m) << " is standard deviations from sinc: " << rs.scale(test(m)) << endl;
m(0) = -0; m(1) = -sinc(m(0)); cout << " " << test(m) << " is standard deviations from sinc: " << rs.scale(test(m)) << endl;
m(0) = -0.5; m(1) = -sinc(m(0)); cout << " " << test(m) << " is standard deviations from sinc: " << rs.scale(test(m)) << endl;
m(0) = -4.1; m(1) = sinc(m(0))+2; cout << " " << test(m) << " is standard deviations from sinc: " << rs.scale(test(m)) << endl;
m(0) = -1.5; m(1) = sinc(m(0))+0.9; cout << " " << test(m) << " is standard deviations from sinc: " << rs.scale(test(m)) << endl;
m(0) = -0.5; m(1) = sinc(m(0))+1; cout << " " << test(m) << " is standard deviations from sinc: " << rs.scale(test(m)) << endl;
// The output is as follows:
/*
Points that are on the sinc function:
0.869861
0.869861
0.873182
0.872628
0.870352
0.869861
0.872628
Points that are NOT on the sinc function:
1.06306 is standard deviations from sinc: 125.137
1.0215 is standard deviations from sinc: 98.0313
0.92136 is standard deviations from sinc: 32.717
0.918282 is standard deviations from sinc: 30.7096
0.930931 is standard deviations from sinc: 38.9595
0.897916 is standard deviations from sinc: 17.4264
0.913855 is standard deviations from sinc: 27.822
*/
// So we can see that in this example the kcentroid object correctly indicates that
// the non-sinc points are definitely not points from the sinc function.
}