Added spectral_cluster()

This commit is contained in:
Davis King 2015-02-11 07:50:27 -05:00
parent 2e5d2c46c6
commit f99e940b28
3 changed files with 122 additions and 0 deletions

View File

@ -5,6 +5,7 @@
#include "clustering/modularity_clustering.h"
#include "clustering/chinese_whispers.h"
#include "clustering/spectral_cluster.h"
#include "svm/kkmeans.h"
#endif // DLIB_CLuSTERING_

View File

@ -0,0 +1,78 @@
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SPECTRAL_CLUSTEr_H_
#define DLIB_SPECTRAL_CLUSTEr_H_
#include "spectral_cluster_abstract.h"
#include <vector>
#include "../matrix.h"
#include "../svm/kkmeans.h"
namespace dlib
{
template <
typename kernel_type,
typename vector_type
>
std::vector<unsigned long> spectral_cluster (
const kernel_type& k,
const vector_type& samples,
const unsigned long num_clusters
)
{
DLIB_CASSERT(num_clusters > 0,
"\t std::vector<unsigned long> spectral_cluster(k,samples,num_clusters)"
<< "\n\t num_clusters can't be 0."
);
if (num_clusters == 1)
{
// nothing to do, just assign everything to the 0 cluster.
return std::vector<unsigned long>(samples.size(), 0);
}
// compute the similarity matrix.
matrix<double> K(samples.size(), samples.size());
for (long r = 0; r < K.nr(); ++r)
for (long c = r+1; c < K.nc(); ++c)
K(r,c) = K(c,r) = (double)k(samples[r], samples[c]);
for (long r = 0; r < K.nr(); ++r)
K(r,r) = 0;
matrix<double,0,1> D(K.nr());
for (long r = 0; r < K.nr(); ++r)
D(r) = sum(rowm(K,r));
D = sqrt(reciprocal(D));
K = diagm(D)*K*diagm(D);
matrix<double> u,w,v;
// Use the normal SVD routine unless the matrix is really big, then use the fast
// approximate version.
if (K.nr() < 1000)
svd3(K,u,w,v);
else
svd_fast(K,u,w,v, num_clusters+100, 5);
// Pick out the eigenvectors associated with the largest eigenvalues.
rsort_columns(v,w);
v = colm(v, range(0,num_clusters-1));
// Now build the normalized spectral vectors, one for each input vector.
std::vector<matrix<double,0,1> > spec_samps, centers;
for (long r = 0; r < v.nr(); ++r)
{
spec_samps.push_back(trans(rowm(v,r)));
spec_samps.back() /= length(spec_samps.back());
}
// Finally do the K-means clustering
pick_initial_centers(num_clusters, centers, spec_samps);
find_clusters_using_kmeans(spec_samps, centers);
// And then compute the cluster assignments based on the output of K-means.
std::vector<unsigned long> assignments;
for (unsigned long i = 0; i < spec_samps.size(); ++i)
assignments.push_back(nearest_center(centers, spec_samps[i]));
return assignments;
}
}
#endif // DLIB_SPECTRAL_CLUSTEr_H_

View File

@ -0,0 +1,43 @@
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SPECTRAL_CLUSTEr_ABSTRACT_H_
#ifdef DLIB_SPECTRAL_CLUSTEr_ABSTRACT_H_
#include <vector>
namespace dlib
{
template <
typename kernel_type,
typename vector_type
>
std::vector<unsigned long> spectral_cluster (
const kernel_type& k,
const vector_type& samples,
const unsigned long num_clusters
);
/*!
requires
- samples must be something with an interface compatible with std::vector.
- The following expression must evaluate to a double or float:
k(samples[i], samples[j])
- num_clusters > 0
ensures
- Performs the spectral clustering algorithm described in the paper:
On spectral clustering: Analysis and an algorithm by Ng, Jordan, and Weiss.
and returns the results.
- This function clusters the input data samples into num_clusters clusters and
returns a vector that indicates which cluster each sample falls into. In
particular, we return an array A such that:
- A.size() == samples.size()
- A[i] == the cluster assignment of samples[i].
- for all valid i: 0 <= A[i] < num_clusters
- The "similarity" of samples[i] with samples[j] is given by
k(samples[i],samples[j]). This means that k() should output a number >= 0
and the number should be larger for samples that are more similar.
!*/
}
#endif // DLIB_SPECTRAL_CLUSTEr_ABSTRACT_H_