dlib/examples/kcentroid_ex.cpp

// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
    This is an example illustrating the use of the kcentroid object 
    from the dlib C++ Library.

    The kcentroid object is an implementation of an algorithm that recursively
    computes the centroid (i.e. average) of a set of points.  The interesting
    thing about dlib::kcentroid is that it does so in a kernel induced feature
    space.  This means that you can use it as a non-linear one-class classifier.
    So you might use it to perform online novelty detection (although, it has
    other uses, see the svm_pegasos or kkmeans examples for example).  
    
    This example will train an instance of it on points from the sinc function.

*/

#include <iostream>
#include <vector>

#include "dlib/svm.h"
#include "dlib/statistics.h"

using namespace std;
using namespace dlib;

// Here is the sinc function we will be trying to learn with the krls
// object.
double sinc(double x)
{
    if (x == 0)
        return 1;
    return sin(x)/x;
}

int main()
{
    // Here we declare that our samples will be 2 dimensional column vectors.  
    // (Note that if you don't know the dimensionality of your vectors at compile time
    // you can change the 2 to a 0 and then set the size at runtime)
    typedef matrix<double,2,1> sample_type;

    // Now we are making a typedef for the kind of kernel we want to use.  I picked the
    // radial basis kernel because it only has one parameter and generally gives good
    // results without much fiddling.
    typedef radial_basis_kernel<sample_type> kernel_type;

    // Here we declare an instance of the kcentroid object.  The kcentroid has 3 parameters 
    // you need to set.  The first argument to the constructor is the kernel we wish to 
    // use.  The second is a parameter that determines the numerical accuracy with which 
    // the object will perform the centroid estimation.  Generally, smaller values 
    // give better results but cause the algorithm to attempt to use more dictionary vectors 
    // (and thus run slower and use more memory).  The third argument, however, is the 
    // maximum number of dictionary vectors a kcentroid is allowed to use.  So you can use
    // it to control the runtime complexity.  
    kcentroid<kernel_type> test(kernel_type(0.1),0.01, 15);


    // now we train our object on a few samples of the sinc function.
    sample_type m;
    for (double x = -15; x <= 8; x += 1)
    {
        m(0) = x;
        m(1) = sinc(x);
        test.train(m);
    }

    running_stats<double> rs;

    // Now lets output the distance from the centroid to some points that are from the sinc function.
    // These numbers should all be similar.  We will also calculate the statistics of these numbers
    // by accumulating them into the running_stats object called rs.  This will let us easily
    // find the mean and standard deviation of the distances for use below.
    cout << "Points that are on the sinc function:\n";
    m(0) = -1.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -1.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -0;   m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -0.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -4.1; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -1.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));
    m(0) = -0.5; m(1) = sinc(m(0)); cout << "   " << test(m) << endl;  rs.add(test(m));

    cout << endl;
    // Lets output the distance from the centroid to some points that are NOT from the sinc function.
    // These numbers should all be significantly bigger than previous set of numbers.  We will also
    // use the rs.scale() function to find out how many standard deviations they are away from the 
    // mean of the test points from the sinc function.  So in this case our criterion for "significantly bigger"
    // is > 3 or 4 standard deviations away from the above points that actually are on the sinc function.
    cout << "Points that are NOT on the sinc function:\n";
    m(0) = -1.5; m(1) = sinc(m(0))+4;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -1.5; m(1) = sinc(m(0))+3;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -0;   m(1) = -sinc(m(0));    cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -0.5; m(1) = -sinc(m(0));    cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -4.1; m(1) = sinc(m(0))+2;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -1.5; m(1) = sinc(m(0))+0.9; cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;
    m(0) = -0.5; m(1) = sinc(m(0))+1;   cout << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;

    // And finally print out the mean and standard deviation of points that are actually from sinc().  
    cout << "\nmean: " << rs.mean() << endl;
    cout << "standard deviation: " << rs.stddev() << endl;

    // The output is as follows:
    /*
        Points that are on the sinc function:
            0.869913
            0.869913
            0.873408
            0.872807
            0.870432
            0.869913
            0.872807

        Points that are NOT on the sinc function:
            1.06366 is 119.65 standard deviations from sinc.
            1.02212 is 93.8106 standard deviations from sinc.
            0.921382 is 31.1458 standard deviations from sinc.
            0.918439 is 29.3147 standard deviations from sinc.
            0.931428 is 37.3949 standard deviations from sinc.
            0.898018 is 16.6121 standard deviations from sinc.
            0.914425 is 26.8183 standard deviations from sinc.

            mean: 0.871313
            standard deviation: 0.00160756
    */

    // So we can see that in this example the kcentroid object correctly indicates that 
    // the non-sinc points are definitely not points from the sinc function.
}
Added licensing comments to the example programs. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402875 2009-02-17 09:45:57 +08:00			`// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt`
Added the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402257 2008-05-23 08:26:28 +08:00			`/*`
			`This is an example illustrating the use of the kcentroid object`
			`from the dlib C++ Library.`

			`The kcentroid object is an implementation of an algorithm that recursively`
			`computes the centroid (i.e. average) of a set of points. The interesting`
			`thing about dlib::kcentroid is that it does so in a kernel induced feature`
			`space. This means that you can use it as a non-linear one-class classifier.`
Updated the examples to make more sense with respect to the updated kcentroid. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402939 2009-03-16 11:26:54 +08:00			`So you might use it to perform online novelty detection (although, it has`
			`other uses, see the svm_pegasos or kkmeans examples for example).`
Added the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402257 2008-05-23 08:26:28 +08:00
			`This example will train an instance of it on points from the sinc function.`

			`*/`

			`#include <iostream>`
			`#include <vector>`

			`#include "dlib/svm.h"`
Improved this example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402341 2008-06-19 10:21:56 +08:00			`#include "dlib/statistics.h"`
Added the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402257 2008-05-23 08:26:28 +08:00
			`using namespace std;`
			`using namespace dlib;`

			`// Here is the sinc function we will be trying to learn with the krls`
			`// object.`
			`double sinc(double x)`
			`{`
			`if (x == 0)`
			`return 1;`
			`return sin(x)/x;`
			`}`

			`int main()`
			`{`
			`// Here we declare that our samples will be 2 dimensional column vectors.`
Added some comments about setting the matrix size at runtime. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402548 2008-10-09 07:42:24 +08:00			`// (Note that if you don't know the dimensionality of your vectors at compile time`
			`// you can change the 2 to a 0 and then set the size at runtime)`
Added the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402257 2008-05-23 08:26:28 +08:00			`typedef matrix<double,2,1> sample_type;`

			`// Now we are making a typedef for the kind of kernel we want to use. I picked the`
			`// radial basis kernel because it only has one parameter and generally gives good`
			`// results without much fiddling.`
			`typedef radial_basis_kernel<sample_type> kernel_type;`

Updated the examples to make more sense with respect to the updated kcentroid. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402939 2009-03-16 11:26:54 +08:00			`// Here we declare an instance of the kcentroid object. The kcentroid has 3 parameters`
			`// you need to set. The first argument to the constructor is the kernel we wish to`
			`// use. The second is a parameter that determines the numerical accuracy with which`
			`// the object will perform the centroid estimation. Generally, smaller values`
Updated the example programs so that there isn't this confusing use of the phase "support vectors" all over the place. Also fixed them to compile now that I renamed the support_vectors field in decision_function to basis_vectors. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403279 2009-11-30 02:59:24 +08:00			`// give better results but cause the algorithm to attempt to use more dictionary vectors`
Updated the examples to make more sense with respect to the updated kcentroid. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402939 2009-03-16 11:26:54 +08:00			`// (and thus run slower and use more memory). The third argument, however, is the`
Updated the example programs so that there isn't this confusing use of the phase "support vectors" all over the place. Also fixed them to compile now that I renamed the support_vectors field in decision_function to basis_vectors. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403279 2009-11-30 02:59:24 +08:00			`// maximum number of dictionary vectors a kcentroid is allowed to use. So you can use`
Updated the examples to make more sense with respect to the updated kcentroid. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402939 2009-03-16 11:26:54 +08:00			`// it to control the runtime complexity.`
			`kcentroid<kernel_type> test(kernel_type(0.1),0.01, 15);`
Added the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402257 2008-05-23 08:26:28 +08:00
added more comments to the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402498 2008-09-06 22:50:36 +08:00
Added the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402257 2008-05-23 08:26:28 +08:00			`// now we train our object on a few samples of the sinc function.`
			`sample_type m;`
			`for (double x = -15; x <= 8; x += 1)`
			`{`
			`m(0) = x;`
			`m(1) = sinc(x);`
			`test.train(m);`
			`}`

Improved this example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402341 2008-06-19 10:21:56 +08:00			`running_stats<double> rs;`
Added the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402257 2008-05-23 08:26:28 +08:00
Improved this example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402341 2008-06-19 10:21:56 +08:00			`// Now lets output the distance from the centroid to some points that are from the sinc function.`
			`// These numbers should all be similar. We will also calculate the statistics of these numbers`
			`// by accumulating them into the running_stats object called rs. This will let us easily`
			`// find the mean and standard deviation of the distances for use below.`
Added the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402257 2008-05-23 08:26:28 +08:00			`cout << "Points that are on the sinc function:\n";`
Improved this example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402341 2008-06-19 10:21:56 +08:00			`m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));`
			`m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));`
			`m(0) = -0; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));`
			`m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));`
			`m(0) = -4.1; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));`
			`m(0) = -1.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));`
			`m(0) = -0.5; m(1) = sinc(m(0)); cout << " " << test(m) << endl; rs.add(test(m));`

			`cout << endl;`
			`// Lets output the distance from the centroid to some points that are NOT from the sinc function.`
			`// These numbers should all be significantly bigger than previous set of numbers. We will also`
			`// use the rs.scale() function to find out how many standard deviations they are away from the`
			`// mean of the test points from the sinc function. So in this case our criterion for "significantly bigger"`
			`// is > 3 or 4 standard deviations away from the above points that actually are on the sinc function.`
Added the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402257 2008-05-23 08:26:28 +08:00			`cout << "Points that are NOT on the sinc function:\n";`
made the examples a little more clear. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402345 2008-06-21 22:31:41 +08:00			`m(0) = -1.5; m(1) = sinc(m(0))+4; cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;`
			`m(0) = -1.5; m(1) = sinc(m(0))+3; cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;`
			`m(0) = -0; m(1) = -sinc(m(0)); cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;`
			`m(0) = -0.5; m(1) = -sinc(m(0)); cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;`
			`m(0) = -4.1; m(1) = sinc(m(0))+2; cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;`
			`m(0) = -1.5; m(1) = sinc(m(0))+0.9; cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;`
			`m(0) = -0.5; m(1) = sinc(m(0))+1; cout << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc." << endl;`
Improved this example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402341 2008-06-19 10:21:56 +08:00
Added some comments and cleaned up code slightly. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403420 2010-01-29 08:31:39 +08:00			`// And finally print out the mean and standard deviation of points that are actually from sinc().`
Updated the examples to make more sense with respect to the updated kcentroid. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402939 2009-03-16 11:26:54 +08:00			`cout << "\nmean: " << rs.mean() << endl;`
Added some comments and cleaned up code slightly. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%403420 2010-01-29 08:31:39 +08:00			`cout << "standard deviation: " << rs.stddev() << endl;`
Updated the examples to make more sense with respect to the updated kcentroid. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402939 2009-03-16 11:26:54 +08:00
Improved this example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402341 2008-06-19 10:21:56 +08:00			`// The output is as follows:`
			`/*`
			`Points that are on the sinc function:`
Updated the examples to make more sense with respect to the updated kcentroid. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402939 2009-03-16 11:26:54 +08:00			`0.869913`
			`0.869913`
			`0.873408`
			`0.872807`
			`0.870432`
			`0.869913`
			`0.872807`
Improved this example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402341 2008-06-19 10:21:56 +08:00
			`Points that are NOT on the sinc function:`
Updated the examples to make more sense with respect to the updated kcentroid. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402939 2009-03-16 11:26:54 +08:00			`1.06366 is 119.65 standard deviations from sinc.`
			`1.02212 is 93.8106 standard deviations from sinc.`
			`0.921382 is 31.1458 standard deviations from sinc.`
			`0.918439 is 29.3147 standard deviations from sinc.`
			`0.931428 is 37.3949 standard deviations from sinc.`
			`0.898018 is 16.6121 standard deviations from sinc.`
			`0.914425 is 26.8183 standard deviations from sinc.`

			`mean: 0.871313`
			`standard deviation: 0.00160756`
Improved this example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402341 2008-06-19 10:21:56 +08:00			`*/`

			`// So we can see that in this example the kcentroid object correctly indicates that`
			`// the non-sinc points are definitely not points from the sinc function.`
Added the kcentroid example --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402257 2008-05-23 08:26:28 +08:00			`}`