From cd6f196eb72f13edb42f01634c08b78c28c33e45 Mon Sep 17 00:00:00 2001
From: Davis King <davis@dlib.net>
Date: Sun, 15 Mar 2009 23:24:08 +0000
Subject: [PATCH] Updated the kkmeans example to show how to use the new max sv
 settings of the kcentroid.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402936
---
 examples/kkmeans_ex.cpp | 42 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/examples/kkmeans_ex.cpp b/examples/kkmeans_ex.cpp
index 42259d136..95d8f3b7d 100644
--- a/examples/kkmeans_ex.cpp
+++ b/examples/kkmeans_ex.cpp
@@ -41,28 +41,18 @@ int main()
     typedef radial_basis_kernel<sample_type> kernel_type;
 
 
-    // Here we declare an instance of the kcentroid object.  The first argument to the constructor
-    // is the kernel we wish to use.  The second is a parameter that determines the numerical 
-    // accuracy with which the object will perform part of the learning algorithm.  Generally
-    // smaller values give better results but cause the algorithm to run slower.  You just have
-    // to play with it to decide what balance of speed and accuracy is right for your problem.
-    // Here we have set it to 0.01.
-    //
-    // Also, since we are using the radial basis kernel we have to pick the RBF width parameter.
-    // Here we have it set to 0.1.  But in general, a reasonable way of picking this value is
-    // to start with some initial guess and to just run all the data through the resulting 
-    // kcentroid.  Then print out kc.dictionary_size() to see how many support vectors the 
-    // kcentroid object is using.  A good rule of thumb is that you should have somewhere 
-    // in the range of 10-100 support vectors (but this rule isn't carved in stone).  
-    // So if you aren't in that range then you can change the RBF parameter.  Making it 
-    // smaller will decrease the dictionary size and making it bigger will increase the 
-    // dictionary size.   
-    //
-    // So what I often do is I set the kcentroid's second parameter to 0.01 or 0.001.  Then
-    // I find an RBF kernel parameter that gives me the number of support vectors that I 
-    // feel is appropriate for the problem I'm trying to solve.  Again, this just comes down
-    // to playing with it and getting a feel for how things work.
-    kcentroid<kernel_type> kc(kernel_type(0.1),0.01);
+    // Here we declare an instance of the kcentroid object.  It is the object used to 
+    // represent each of the centers used for clustering.  The kcentroid has 4 parameters 
+    // you need to set.  The first argument to the constructor is the kernel we wish to 
+    // use.  The second is a parameter that determines the numerical accuracy with which 
+    // the object will perform part of the learning algorithm.  Generally, smaller values 
+    // give better results but cause the algorithm to attempt to use more support vectors 
+    // (and thus run slower and use more memory).  The third argument, however, is the 
+    // maximum number of support vectors a kcentroid is allowed to use.  So you can use
+    // it to control the complexity.  Finally, the last argument should always be set to 
+    // false when using a kcentroid for clustering (see the kcentroid docs for details on 
+    // this parameter).
+    kcentroid<kernel_type> kc(kernel_type(0.1),0.01, 8, false);
 
     // Now we make an instance of the kkmeans object and tell it to use kcentroid objects
     // that are configured with the parameters from the kc object we defined above.
@@ -145,6 +135,14 @@ int main()
         cout << test(samples[i+2*num]) << "\n";
     }
 
+    // Now print out how many support vectors each center used.  Note that 
+    // the maximum number of 8 was reached.  If you went back to the kcentroid 
+    // constructor and changed the 8 to some bigger number you would see that these
+    // numbers would go up.  However, 8 is all we need to correctly cluster this dataset.
+    cout << "num sv for center 0: " << test.get_kcentroid(0).dictionary_size() << endl;
+    cout << "num sv for center 1: " << test.get_kcentroid(1).dictionary_size() << endl;
+    cout << "num sv for center 2: " << test.get_kcentroid(2).dictionary_size() << endl;
+
 }