Cleaned up the kkmeans class and made it actually use

the min_change parameter.

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402510
This commit is contained in:
Davis King 2008-09-09 00:43:16 +00:00
parent 290494bb44
commit 5d1aad0304
2 changed files with 19 additions and 8 deletions

View File

@ -101,7 +101,7 @@ namespace dlib
void train (
const T& samples,
const U& initial_centers,
long max_iter = 1000000
long max_iter = 1000
)
{
do_train(vector_to_matrix(samples),vector_to_matrix(initial_centers),max_iter);
@ -132,6 +132,13 @@ namespace dlib
scalar_type min_change_
)
{
// make sure requires clause is not broken
DLIB_ASSERT( 0 <= min_change_ < 1,
"\tvoid kkmeans::set_min_change()"
<< "\n\tInvalid arguments to this function"
<< "\n\tthis: " << this
<< "\n\tmin_change_: " << min_change_
);
min_change = min_change_;
}
@ -184,7 +191,7 @@ namespace dlib
void do_train (
const matrix_type& samples,
const matrix_type2& initial_centers,
long max_iter = 1000000
long max_iter = 1000
)
{
COMPILE_TIME_ASSERT((is_same_type<typename matrix_type::type, sample_type>::value));
@ -214,10 +221,13 @@ namespace dlib
// loop until the centers stabilize
long count = 0;
while (assignment_changed && count < max_iter)
const unsigned long min_num_change = static_cast<unsigned long>(min_change*samples.size());
unsigned long num_changed = min_num_change;
while (assignment_changed && count < max_iter && num_changed >= min_num_change)
{
++count;
assignment_changed = false;
num_changed = 0;
// loop over all the samples and assign them to their closest centers
for (long i = 0; i < samples.size(); ++i)
@ -240,6 +250,7 @@ namespace dlib
{
assignments[i] = best_center;
assignment_changed = true;
++num_changed;
}
}

View File

@ -111,7 +111,7 @@ namespace dlib
);
/*!
requires
- matrix_type and matrix_type2 must either be dlib::matrix objects or convertable to dlib::matrix
- matrix_type and matrix_type2 must either be dlib::matrix objects or convertible to dlib::matrix
via vector_to_matrix()
- matrix_type::type == sample_type (i.e. matrix_type should contain sample_type objects)
- matrix_type2::type == sample_type (i.e. matrix_type2 should contain sample_type objects)
@ -122,8 +122,8 @@ namespace dlib
- performs k-means clustering of the given set of samples. The initial center points
are taken from the initial_centers argument.
- loops over the data and continues to refine the clustering until either less than
get_min_change() fraction of the cluster centers move or we have done max_iter iterations
over the data.
get_min_change() fraction of the data points change clusters or we have done max_iter
iterations over the data.
- After this function finishes you can call the operator() function below
to determine which centroid a given sample is closest to.
!*/
@ -153,8 +153,8 @@ namespace dlib
) const;
/*!
ensures
- returns the minimum fraction of centers that need to change
in an iteration of kmeans for the algorithm to keep going.
- returns the minimum fraction of data points that need to change
centers in an iteration of kmeans for the algorithm to keep going.
!*/
void swap (