2010-05-13 10:46:17 +08:00
|
|
|
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
|
|
|
/*
|
|
|
|
|
|
|
|
This is an example illustrating the use of the linear_manifold_regularizer
|
|
|
|
and empirical_kernel_map from the dlib C++ Library.
|
|
|
|
|
|
|
|
This example program assumes you are familiar with some general elements of
|
|
|
|
the library. In particular, you should have at least read the svm_ex.cpp
|
|
|
|
and matrix_ex.cpp examples. You should also have read the empirical_kernel_map_ex.cpp
|
|
|
|
example program as the present example builds upon it.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
This program shows an example of what is called semi-supervised learning.
|
|
|
|
That is, a small amount of labeled data is augmented with a large amount
|
|
|
|
of unlabeled data. A learning algorithm is then run on all the data
|
|
|
|
and the hope is that by including the unlabeled data we will end up with
|
|
|
|
a better result.
|
|
|
|
|
|
|
|
|
|
|
|
In this particular example we will generate 200,000 sample points of
|
|
|
|
unlabeled data along with 2 samples of labeled data. The sample points
|
2010-05-14 08:52:19 +08:00
|
|
|
will be drawn randomly from two concentric circles. One labeled data
|
|
|
|
point will be drawn from each circle. The goal is to learn to
|
2010-05-13 10:46:17 +08:00
|
|
|
correctly separate the two circles using only the 2 labeled points
|
|
|
|
and the unlabeled data.
|
|
|
|
|
|
|
|
To do this we will first run an approximate form of k nearest neighbors
|
|
|
|
to determine which of the unlabeled samples are closest together. We will
|
2010-05-14 08:52:19 +08:00
|
|
|
then make the manifold assumption, that is, we will assume that points close
|
|
|
|
to each other should share the same classification label.
|
2010-05-13 10:46:17 +08:00
|
|
|
|
|
|
|
Once we have determined which points are near neighbors we will use the
|
|
|
|
empirical_kernel_map and linear_manifold_regularizer to transform all the
|
|
|
|
data points into a new vector space where any linear rule will have similar
|
2010-05-14 08:52:19 +08:00
|
|
|
output for points which we have decided are near neighbors.
|
2010-05-13 10:46:17 +08:00
|
|
|
|
2010-05-14 08:52:19 +08:00
|
|
|
Finally, we will classify all the unlabeled data according to which of
|
|
|
|
the two labeled points are nearest. Normally this would not work but by
|
|
|
|
using the manifold assumption we will be able to successfully classify
|
|
|
|
all the unlabeled data.
|
2010-05-13 10:46:17 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
For further information on this subject you should begin with the following
|
|
|
|
paper as it discusses a very similar application of manifold regularization.
|
|
|
|
|
|
|
|
Beyond the Point Cloud: from Transductive to Semi-supervised Learning
|
|
|
|
by Vikas Sindhwani, Partha Niyogi, and Mikhail Belkin
|
2010-05-14 08:52:19 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
******** SAMPLE PROGRAM OUTPUT ********
|
|
|
|
|
|
|
|
Testing manifold regularization with an intrinsic_regularization_strength of 0.
|
|
|
|
number of edges generated: 49998
|
|
|
|
Running simple test...
|
|
|
|
error: 0.37022
|
|
|
|
error: 0.44036
|
|
|
|
error: 0.376715
|
|
|
|
error: 0.307545
|
|
|
|
error: 0.463455
|
|
|
|
error: 0.426065
|
|
|
|
error: 0.416155
|
|
|
|
error: 0.288295
|
|
|
|
error: 0.400115
|
|
|
|
error: 0.46347
|
|
|
|
|
|
|
|
Testing manifold regularization with an intrinsic_regularization_strength of 10000.
|
|
|
|
number of edges generated: 49998
|
|
|
|
Running simple test...
|
|
|
|
error: 0
|
|
|
|
error: 0
|
|
|
|
error: 0
|
|
|
|
error: 0
|
|
|
|
error: 0
|
|
|
|
error: 0
|
|
|
|
error: 0
|
|
|
|
error: 0
|
|
|
|
error: 0
|
|
|
|
error: 0
|
|
|
|
|
|
|
|
|
2010-05-13 10:46:17 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "dlib/manifold_regularization.h"
|
|
|
|
#include "dlib/svm.h"
|
|
|
|
#include "dlib/rand.h"
|
|
|
|
#include "dlib/statistics.h"
|
|
|
|
#include <iostream>
|
|
|
|
#include <vector>
|
2010-06-13 23:23:39 +08:00
|
|
|
#include <ctime>
|
2010-05-13 10:46:17 +08:00
|
|
|
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
using namespace dlib;
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
// First lets make a typedef for the kind of samples we will be using.
|
|
|
|
typedef matrix<double, 0, 1> sample_type;
|
|
|
|
|
|
|
|
// We will be using the radial_basis_kernel in this example program.
|
|
|
|
typedef radial_basis_kernel<sample_type> kernel_type;
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
void generate_circle (
|
|
|
|
std::vector<sample_type>& samples,
|
|
|
|
double radius,
|
|
|
|
const long num
|
|
|
|
);
|
|
|
|
/*!
|
|
|
|
requires
|
|
|
|
- num > 0
|
|
|
|
- radius > 0
|
|
|
|
ensures
|
|
|
|
- generates num points centered at (0,0) with the given radius. Adds these
|
|
|
|
points into the given samples vector.
|
|
|
|
!*/
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
void test_manifold_regularization (
|
|
|
|
const double intrinsic_regularization_strength
|
|
|
|
);
|
|
|
|
/*!
|
|
|
|
ensures
|
|
|
|
- Runs an example test using the linear_manifold_regularizer with the given
|
|
|
|
intrinsic_regularization_strength.
|
|
|
|
!*/
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
int main()
|
|
|
|
{
|
|
|
|
// Run the test without any manifold regularization.
|
|
|
|
test_manifold_regularization(0);
|
|
|
|
|
|
|
|
// Run the test with manifold regularization. You can think of this number as
|
|
|
|
// a measure of how much we trust the manifold assumption. So if you are really
|
|
|
|
// confident that you can select neighboring points which should have the same
|
|
|
|
// classification then make this number big.
|
|
|
|
test_manifold_regularization(10000.0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
void test_manifold_regularization (
|
|
|
|
const double intrinsic_regularization_strength
|
|
|
|
)
|
|
|
|
{
|
|
|
|
cout << "Testing manifold regularization with an intrinsic_regularization_strength of "
|
|
|
|
<< intrinsic_regularization_strength << ".\n";
|
|
|
|
|
|
|
|
std::vector<sample_type> samples;
|
|
|
|
|
|
|
|
// Declare an instance of the kernel we will be using.
|
|
|
|
const kernel_type kern(0.1);
|
|
|
|
|
|
|
|
const unsigned long num_points = 100000;
|
|
|
|
|
|
|
|
// create a large dataset with two concentric circles. There will be 100000 points on each circle
|
|
|
|
// for a total of 200000 samples.
|
2010-05-14 08:52:19 +08:00
|
|
|
generate_circle(samples, 2, num_points); // circle of radius 2
|
|
|
|
generate_circle(samples, 4, num_points); // circle of radius 4
|
2010-05-13 10:46:17 +08:00
|
|
|
|
|
|
|
// Create a set of sample_pairs that tells us which samples are "close" and should thus
|
|
|
|
// be classified similarly. These edges will be used to define the manifold regularizer.
|
|
|
|
// To find these edges we use a simple function that samples point pairs randomly and
|
|
|
|
// returns the top 5% with the shortest edges.
|
|
|
|
std::vector<sample_pair> edges;
|
|
|
|
find_percent_shortest_edges_randomly(samples, squared_euclidean_distance(), 0.05, 1000000, time(0), edges);
|
|
|
|
|
|
|
|
cout << "number of edges generated: " << edges.size() << endl;
|
|
|
|
|
|
|
|
empirical_kernel_map<kernel_type> ekm;
|
|
|
|
|
|
|
|
// Since the circles are not linearly separable we will use an empirical kernel map to
|
2010-05-14 08:52:19 +08:00
|
|
|
// map them into a space where they are separable. We create an empirical_kernel_map
|
2010-05-13 10:46:17 +08:00
|
|
|
// using a random subset of our data samples as basis samples. Note, however, that even
|
|
|
|
// though the circles are linearly separable in this new space given by the empirical_kernel_map
|
|
|
|
// we still won't be able to correctly classify all the points given just the 2 labeled examples.
|
|
|
|
// We will need to make use of the nearest neighbor information stored in edges. To do that
|
2010-05-14 08:52:19 +08:00
|
|
|
// we will use the linear_manifold_regularizer.
|
2010-05-13 10:46:17 +08:00
|
|
|
ekm.load(kern, randomly_subsample(samples, 50));
|
|
|
|
|
|
|
|
// Project all the samples into the span of our 50 basis samples
|
|
|
|
for (unsigned long i = 0; i < samples.size(); ++i)
|
|
|
|
samples[i] = ekm.project(samples[i]);
|
|
|
|
|
|
|
|
|
2010-05-14 08:52:19 +08:00
|
|
|
// Now create the manifold regularizer. The result is a transformation matrix that
|
|
|
|
// embodies the manifold assumption discussed above.
|
2010-05-13 10:46:17 +08:00
|
|
|
linear_manifold_regularizer<sample_type> lmr;
|
2010-05-17 03:11:48 +08:00
|
|
|
// use_gaussian_weights is a function object that tells lmr how to weight each edge. In this
|
|
|
|
// case we let the weight decay as edges get longer. So shorter edges are more important than
|
|
|
|
// longer edges.
|
2010-05-13 10:46:17 +08:00
|
|
|
lmr.build(samples, edges, use_gaussian_weights(0.1));
|
2010-05-14 08:52:19 +08:00
|
|
|
const matrix<double> T = lmr.get_transformation_matrix(intrinsic_regularization_strength);
|
2010-05-13 10:46:17 +08:00
|
|
|
|
|
|
|
// Apply the transformation generated by the linear_manifold_regularizer to
|
|
|
|
// all our samples.
|
|
|
|
for (unsigned long i = 0; i < samples.size(); ++i)
|
|
|
|
samples[i] = T*samples[i];
|
|
|
|
|
|
|
|
|
|
|
|
// For convenience, generate a projection_function and merge the transformation
|
2010-05-17 03:11:48 +08:00
|
|
|
// matrix T into it. That is, we will have: proj(x) == T*ekm.project(x).
|
2010-05-13 10:46:17 +08:00
|
|
|
projection_function<kernel_type> proj = ekm.get_projection_function();
|
|
|
|
proj.weights = T*proj.weights;
|
|
|
|
|
|
|
|
cout << "Running simple test..." << endl;
|
|
|
|
|
|
|
|
// Pick 2 different labeled points. One on the inner circle and another on the outer.
|
|
|
|
// For each of these test points we will see if using the single plane that separates
|
2010-05-14 08:52:19 +08:00
|
|
|
// them is a good way to separate the concentric circles. We also do this a bunch
|
2010-05-13 10:46:17 +08:00
|
|
|
// of times with different randomly chosen points so we can see how robust the result is.
|
|
|
|
for (int itr = 0; itr < 10; ++itr)
|
|
|
|
{
|
|
|
|
std::vector<sample_type> test_points;
|
2010-05-14 08:52:19 +08:00
|
|
|
// generate a random point from the radius 2 circle
|
|
|
|
generate_circle(test_points, 2, 1);
|
|
|
|
// generate a random point from the radius 4 circle
|
|
|
|
generate_circle(test_points, 4, 1);
|
2010-05-13 10:46:17 +08:00
|
|
|
|
|
|
|
// project the two test points into kernel space. Recall that this projection_function
|
|
|
|
// has the manifold regularizer incorporated into it.
|
|
|
|
const sample_type class1_point = proj(test_points[0]);
|
|
|
|
const sample_type class2_point = proj(test_points[1]);
|
|
|
|
|
|
|
|
double num_wrong = 0;
|
|
|
|
|
|
|
|
// Now attempt to classify all the data samples according to which point
|
|
|
|
// they are closest to. The output of this program shows that without manifold
|
|
|
|
// regularization this test will fail but with it it will perfectly classify
|
|
|
|
// all the points.
|
|
|
|
for (unsigned long i = 0; i < samples.size(); ++i)
|
|
|
|
{
|
|
|
|
double distance_to_class1 = length(samples[i] - class1_point);
|
|
|
|
double distance_to_class2 = length(samples[i] - class2_point);
|
|
|
|
|
|
|
|
bool predicted_as_class_1 = (distance_to_class1 < distance_to_class2);
|
|
|
|
|
|
|
|
bool really_is_class_1 = (i < num_points);
|
|
|
|
|
|
|
|
// now count how many times we make a mistake
|
|
|
|
if (predicted_as_class_1 != really_is_class_1)
|
|
|
|
++num_wrong;
|
|
|
|
}
|
|
|
|
|
|
|
|
cout << "error: "<< num_wrong/samples.size() << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
cout << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
2011-05-05 05:24:12 +08:00
|
|
|
dlib::rand rnd;
|
2010-05-13 10:46:17 +08:00
|
|
|
|
|
|
|
void generate_circle (
|
|
|
|
std::vector<sample_type>& samples,
|
|
|
|
double radius,
|
|
|
|
const long num
|
|
|
|
)
|
|
|
|
{
|
|
|
|
sample_type m(2,1);
|
|
|
|
|
|
|
|
for (long i = 0; i < num; ++i)
|
|
|
|
{
|
|
|
|
double sign = 1;
|
|
|
|
if (rnd.get_random_double() < 0.5)
|
|
|
|
sign = -1;
|
|
|
|
m(0) = 2*radius*rnd.get_random_double()-radius;
|
|
|
|
m(1) = sign*sqrt(radius*radius - m(0)*m(0));
|
|
|
|
|
|
|
|
samples.push_back(m);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|