- Added a max dictionary setting to the kcentroid object

- Removed the "discount" thing and replaced it with a much less confusing pair of scale arguments to the training functions. - Changed the kcentroid's serialization format. --HG-- extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402313
2024-11-01 10:14:53 +08:00 · 2008-06-14 03:32:09 +00:00 · 2008-06-14 03:32:09 +00:00 · 3e1edfaa32
commit 3e1edfaa32
parent 94ebc5c338
2 changed files with 158 additions and 94 deletions
--- a/dlib/svm/kcentroid.h
+++ b/dlib/svm/kcentroid.h
@ -36,52 +36,24 @@ namespace dlib

        explicit kcentroid (
            const kernel_type& kernel_, 
-            scalar_type tolerance_ = 0.001
+            scalar_type tolerance_ = 0.001,
+            unsigned long max_dictionary_size_ = 1000000
        ) : 
            kernel(kernel_), 
            tolerance(tolerance_),
-            max_dis(1e6)
+            max_dictionary_size(max_dictionary_size_)
        {
            clear_dictionary();
        }

-        void set_tolerance (scalar_type tolerance_)
-        {
-            // make sure requires clause is not broken
-            DLIB_ASSERT(tolerance_ >= 0,
-                "\tvoid kcentroid::set_tolerance"
-                << "\n\tinvalid tolerance value"
-                << "\n\ttolerance: " << tolerance_
-                << "\n\tthis: " << this
-                );
-            tolerance = tolerance_;
-        }
-
        scalar_type get_tolerance() const
        {
            return tolerance;
        }

-        void set_max_discount (
-            scalar_type value 
-        )
+        unsigned long get_max_dictionary_size() const
        {
-            // make sure requires clause is not broken
-            DLIB_ASSERT(value >= 0,
-                "\tvoid kcentroid::set_max_discount"
-                << "\n\tinvalid discount value"
-                << "\n\tvalue: " << value 
-                << "\n\tthis: " << this
-                );
-            max_dis = value;
-            if (samples_seen > value)
-                samples_seen = value;
-        }
-
-        scalar_type get_max_discount(
-        ) const
-        {
-            return max_dis;
+            return max_dictionary_size;
        }

        void clear_dictionary ()
@ -90,6 +62,7 @@ namespace dlib
            alpha.clear();

            K_inv.set_size(0,0);
+            K.set_size(0,0);
            samples_seen = 0;
            bias = 0;
        }
@ -105,18 +78,50 @@ namespace dlib
            return std::sqrt(kernel(x,x) + bias - 2*temp);
        }

+        double samples_trained (
+        ) const
+        {
+            return samples_seen;
+        }
+
        scalar_type test_and_train (
            const sample_type& x
        )
        {
-            return train_and_maybe_test(x,true);
+            ++samples_seen;
+            const double xscale = 1.0/samples_seen;
+            const double cscale = 1-xscale;
+            return train_and_maybe_test(x,cscale,xscale,true);
        }

        void train (
            const sample_type& x
        )
        {
-            train_and_maybe_test(x,false);
+            ++samples_seen;
+            const double xscale = 1.0/samples_seen;
+            const double cscale = 1-xscale;
+            train_and_maybe_test(x,cscale,xscale,false);
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x,
+            double cscale,
+            double xscale
+        )
+        {
+            ++samples_seen;
+            return train_and_maybe_test(x,cscale,xscale,true);
+        }
+
+        void train (
+            const sample_type& x,
+            double cscale,
+            double xscale
+        )
+        {
+            ++samples_seen;
+            train_and_maybe_test(x,cscale,xscale,false);
        }

        void swap (
@ -131,7 +136,6 @@ namespace dlib
            exchange(tolerance, item.tolerance);
            exchange(samples_seen, item.samples_seen);
            exchange(bias, item.bias);
-            exchange(max_dis, item.max_dis);
            a.swap(item.a);
            k.swap(item.k);
        }
@ -149,7 +153,6 @@ namespace dlib
            serialize(item.tolerance, out);
            serialize(item.samples_seen, out);
            serialize(item.bias, out);
-            serialize(item.max_dis, out);
        }

        friend void deserialize(kcentroid& item, std::istream& in)
@ -162,13 +165,14 @@ namespace dlib
            deserialize(item.tolerance, in);
            deserialize(item.samples_seen, in);
            deserialize(item.bias, in);
-            deserialize(item.max_dis, in);
        }

    private:

        scalar_type train_and_maybe_test (
            const sample_type& x,
+            double cscale,
+            double xscale,
            bool do_test
        )
        {
@ -183,7 +187,7 @@ namespace dlib
                K.set_size(1,1);
                K(0,0) = kx;

-                alpha.push_back(1.0);
+                alpha.push_back(xscale);
                dictionary.push_back(x);
            }
            else
@ -201,12 +205,25 @@ namespace dlib
                // compute the error we would have if we approximated the new x sample
                // with the dictionary.  That is, do the ALD test from the KRLS paper.
                a = K_inv*k;
-                const scalar_type delta = kx - trans(k)*a;
+                scalar_type delta = kx - trans(k)*a;

                // if this new vector isn't approximately linearly dependent on the vectors
                // in our dictionary.
                if (std::abs(delta) > tolerance)
                {
+                    if (dictionary.size() >= max_dictionary_size)
+                    {
+                        // We need to remove one of the old members of the dictionary before
+                        // we proceed with adding a new one.  So remove the oldest one. 
+                        remove_dictionary_vector(0);
+
+                        // recompute these guys since they were computed with the old
+                        // kernel matrix
+                        k = remove_row(k,0);
+                        a = K_inv*k;
+                        delta = kx - trans(k)*a;
+                    }
+
                    // add x to the dictionary
                    dictionary.push_back(x);

@ -239,37 +256,68 @@ namespace dlib


                    // now update the alpha vector 
-                    const double alpha_scale = samples_seen/(samples_seen+1);
                    for (unsigned long i = 0; i < alpha.size(); ++i)
                    {
-                        alpha[i] *= alpha_scale;
+                        alpha[i] *= cscale;
                    }
-                    alpha.push_back(1.0-alpha_scale);
+                    alpha.push_back(xscale);
                }
                else
                {
                    // update the alpha vector so that this new sample has been added into
                    // the mean vector we are accumulating
-                    const double alpha_scale = samples_seen/(samples_seen+1);
-                    const double a_scale = 1.0-alpha_scale;
                    for (unsigned long i = 0; i < alpha.size(); ++i)
                    {
-                        alpha[i] = alpha_scale*alpha[i] + a_scale*a(i);
+                        alpha[i] = cscale*alpha[i] + xscale*a(i);
                    }
                }
            }

-            ++samples_seen;
-
            // recompute the bias term
            bias = sum(pointwise_multiply(K, vector_to_matrix(alpha)*trans(vector_to_matrix(alpha))));
            
-            if (samples_seen > max_dis)
-                samples_seen = max_dis;
-
            return test_result;
        }

+        void remove_dictionary_vector (
+            long i
+        )
+        /*!
+            requires
+                - 0 <= i < dictionary.size()
+            ensures
+                - #dictionary.size() == dictionary.size() - 1
+                - #alpha.size() == alpha.size() - 1
+                - updates the K_inv matrix so that it is still a proper inverse of the
+                  kernel matrix
+                - also removes the necessary row and column from the K matrix
+                - uses the this->a variable so after this function runs that variable
+                  will contain a different value.  
+        !*/
+        {
+            // remove the dictionary vector 
+            dictionary.erase(dictionary.begin()+i);
+
+            // remove the i'th vector from the inverse kernel matrix.  This formula is basically
+            // just the reverse of the way K_inv is updated by equation 3.14 during normal training.
+            K_inv = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i);
+
+            // now compute the updated alpha values to take account that we just removed one of 
+            // our dictionary vectors
+            a = (K_inv*remove_row(K,i)*vector_to_matrix(alpha));
+
+            // now copy over the new alpha values
+            alpha.resize(alpha.size()-1);
+            for (unsigned long k = 0; k < alpha.size(); ++k)
+            {
+                alpha[k] = a(k);
+            }
+
+            // update the K matrix as well
+            K = removerc(K,i,i);
+        }
+
+

        typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type;
        typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type;
@ -285,9 +333,9 @@ namespace dlib
        matrix<scalar_type,0,0,mem_manager_type> K;

        scalar_type tolerance;
+        unsigned long max_dictionary_size;
        scalar_type samples_seen;
        scalar_type bias;
-        scalar_type max_dis;


        // temp variables here just so we don't have to reconstruct them over and over.  Thus, 
--- a/dlib/svm/kcentroid_abstract.h
+++ b/dlib/svm/kcentroid_abstract.h
@ -23,7 +23,7 @@ namespace dlib

            INITIAL VALUE
                - dictionary_size() == 0
-                - max_discount() == 1e6
+                - samples_trained() == 0

            WHAT THIS OBJECT REPRESENTS
                This is an implementation of an online algorithm for recursively estimating the
@ -34,6 +34,13 @@ namespace dlib
                and any test points.  So you can use this object to predict how similar a test
                point is to the data this object has been trained on (larger distances from the
                centroid indicate dissimilarity/anomalous points).
+
+                Also note that the algorithm internally keeps a set of "dictionary vectors" 
+                that are used to represent the regression function.  You can force the 
+                algorithm to use no more than a set number of vectors by setting 
+                the 3rd constructor argument to whatever you want.  However, note that 
+                doing this causes the algorithm to bias it's results towards more 
+                recent training examples.  
        !*/

    public:
@ -44,7 +51,8 @@ namespace dlib

        explicit kcentroid (
            const kernel_type& kernel_, 
-            scalar_type tolerance_ = 0.001
+            scalar_type tolerance_ = 0.001,
+            unsigned long max_dictionary_size_ = 1000000
        );
        /*!
            ensures
@ -52,16 +60,23 @@ namespace dlib
                - #get_tolerance() == tolerance_
                - #get_decision_function().kernel_function == kernel_
                  (i.e. this object will use the given kernel function)
+                - #get_max_dictionary_size() == max_dictionary_size_
        !*/

-        void set_tolerance (
-            scalar_type tolerance_
-        );
+        unsigned long get_max_dictionary_size(
+        ) const;
        /*!
-            requires
-                - tolerance_ >= 0
            ensures
-                - #get_tolerance() == tolerance_
+                - returns the maximum number of dictionary vectors this object
+                  will use at a time.  That is, dictionary_size() will never be
+                  greater than get_max_dictionary_size().
+        !*/
+
+        scalar_type samples_trained (
+        ) const;
+        /*!
+            ensures
+                - returns the number of samples this object has been trained on so far
        !*/

        scalar_type get_tolerance(
@ -77,42 +92,12 @@ namespace dlib
                  less accurate estimate but also in less support vectors.
        !*/

-        void set_max_discount (
-            scalar_type value 
-        );
-        /*!
-            requires
-                - value > 0
-            ensures
-                - #get_max_discount() == value 
-        !*/
-
-        scalar_type get_max_discount(
-        ) const;
-        /*!
-            ensures
-                - If you have shown this object N samples so far then it has found 
-                  the centroid of those N samples.  That is, it has found the average 
-                  of all of them in some high dimensional feature space. 
-                - if (N <= get_max_discount()) then
-                    - The next sample you show this object will be added to the centroid 
-                      with a weight of 1/(N+1).  
-                - else
-                    - The next sample you show this object will be added to the centroid 
-                      with a weight of 1/(get_max_discount()+1).  
-
-                - If you think your samples are from a stationary source then you
-                  should set the max discount to some really big number.  However, 
-                  if you think the source isn't stationary then use a smaller number.
-                  This will cause the centroid in this object to be closer to the 
-                  centroid of the more recent points.
-        !*/
-
        void clear_dictionary (
        );
        /*!
            ensures
                - clears out all learned data (e.g. #dictionary_size() == 0)
+                - #samples_seen() == 0
        !*/

        scalar_type operator() (
@ -125,6 +110,31 @@ namespace dlib
                  to this object so far.
        !*/

+        void train (
+            const sample_type& x
+        );
+        /*!
+            ensures
+                - adds the sample x into the current estimate of the centroid
+                - also note that calling this function is equivalent to calling
+                  train(x, samples_trained()/(samples_trained()+1.0, 1.0/(samples_trained()+1.0).  
+                  That is, this function finds the normal unweighted centroid of all training points.
+        !*/
+
+        void train (
+            const sample_type& x,
+            double cscale,
+            double xscale
+        );
+        /*!
+            ensures
+                - adds the sample x into the current estimate of the centroid but
+                  uses a user given scale.  That is, this function performs:
+                    - new_centroid = cscale*old_centroid + xscale*x
+                - This function allows you to weight different samples however 
+                  you want.
+        !*/
+
        scalar_type test_and_train (
            const sample_type& x
        );
@ -137,12 +147,18 @@ namespace dlib
                  than calling both individually.
        !*/

-        void train (
-            const sample_type& x
+        scalar_type test_and_train (
+            const sample_type& x,
+            double cscale,
+            double xscale
        );
        /*!
            ensures
-                - adds the sample x into the current estimate of the centroid
+                - calls train(x,cscale,xscale)
+                - returns (*this)(x)
+                - The reason this function exists is because train() and operator() 
+                  both compute some of the same things.  So this function is more efficient
+                  than calling both individually.
        !*/

        void swap (