|
|
@ -2587,6 +2587,202 @@ namespace
|
|
|
|
DLIB_TEST_MSG(error_after < error_before, "multi channel error increased after training");
|
|
|
|
DLIB_TEST_MSG(error_after < error_before, "multi channel error increased after training");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void test_loss_binary_log_per_pixel_learned_params_on_trivial_two_pixel_task()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
print_spinner();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
::std::vector<matrix<float>> x({ matrix<float,2,1>({ -1, 1 }) });
|
|
|
|
|
|
|
|
::std::vector<matrix<float>> y({ matrix<float,2,1>({ -1, 1 }) });
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
using net_type = loss_binary_log_per_pixel<con<1,1,1,1,1,input<matrix<float>>>>;
|
|
|
|
|
|
|
|
net_type net;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dnn_trainer<net_type> trainer(net, sgd(0,0));
|
|
|
|
|
|
|
|
trainer.set_learning_rate(1e7);
|
|
|
|
|
|
|
|
trainer.set_max_num_epochs(1);
|
|
|
|
|
|
|
|
trainer.train(x, y);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const tensor& learned_params = layer<1>(net).layer_details().get_layer_params();
|
|
|
|
|
|
|
|
const float* learned_params_data = learned_params.host();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DLIB_TEST(learned_params_data[0] > 1e5);
|
|
|
|
|
|
|
|
DLIB_TEST(abs(learned_params_data[1]) < 1);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void test_loss_binary_log_per_pixel_outputs_on_trivial_task()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
print_spinner();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
constexpr int input_height = 7;
|
|
|
|
|
|
|
|
constexpr int input_width = 5;
|
|
|
|
|
|
|
|
constexpr int output_height = input_height;
|
|
|
|
|
|
|
|
constexpr int output_width = input_width;
|
|
|
|
|
|
|
|
constexpr int num_samples = 7;
|
|
|
|
|
|
|
|
constexpr int filter_height = 3;
|
|
|
|
|
|
|
|
constexpr int filter_width = 3;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
::std::vector<matrix<double>> x(num_samples);
|
|
|
|
|
|
|
|
::std::vector<matrix<float>> y(num_samples);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
matrix<double> xtmp(input_height, input_width);
|
|
|
|
|
|
|
|
matrix<float> ytmp(output_height, output_width);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
::std::default_random_engine generator(16);
|
|
|
|
|
|
|
|
::std::normal_distribution<double> n01(0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const auto z = 0.674490; // This should give us a 50/50 split between the classes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Generate training data: random inputs x, and the corresponding target outputs y
|
|
|
|
|
|
|
|
for (int ii = 0; ii < num_samples; ++ii) {
|
|
|
|
|
|
|
|
for (int jj = 0; jj < input_height; ++jj) {
|
|
|
|
|
|
|
|
for (int kk = 0; kk < input_width; ++kk) {
|
|
|
|
|
|
|
|
xtmp(jj, kk) = n01(generator);
|
|
|
|
|
|
|
|
ytmp(jj, kk) = std::abs(xtmp(jj, kk)) > z ? 1.f : -1.f;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
x[ii] = xtmp;
|
|
|
|
|
|
|
|
y[ii] = ytmp;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
using net_type = loss_binary_log_per_pixel<con<1,1,1,1,1,relu<con<10,1,1,1,1,input<matrix<double>>>>>>;
|
|
|
|
|
|
|
|
net_type net;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dnn_trainer<net_type> trainer(net, sgd(0, 0.9));
|
|
|
|
|
|
|
|
trainer.set_learning_rate(1);
|
|
|
|
|
|
|
|
trainer.set_max_num_epochs(800);
|
|
|
|
|
|
|
|
trainer.train(x, y);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// The learning task is easy, so the net should have no problem
|
|
|
|
|
|
|
|
// getting all the outputs right.
|
|
|
|
|
|
|
|
const auto response = net(x);
|
|
|
|
|
|
|
|
for (int ii = 0; ii < num_samples; ++ii)
|
|
|
|
|
|
|
|
for (int jj = 0; jj < output_height; ++jj)
|
|
|
|
|
|
|
|
for (int kk = 0; kk < output_width; ++kk)
|
|
|
|
|
|
|
|
DLIB_TEST((response[ii](jj,kk) > 0) == (y[ii](jj,kk) > 0));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void test_loss_binary_log_per_pixel_with_noise_and_pixels_to_ignore()
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
// Test learning when some pixels are to be ignored, etc.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print_spinner();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
constexpr int input_height = 5;
|
|
|
|
|
|
|
|
constexpr int input_width = 7;
|
|
|
|
|
|
|
|
constexpr int output_height = input_height;
|
|
|
|
|
|
|
|
constexpr int output_width = input_width;
|
|
|
|
|
|
|
|
const int num_samples = 1000;
|
|
|
|
|
|
|
|
const double ignore_probability = 0.5;
|
|
|
|
|
|
|
|
const double noise_probability = 0.05;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
::std::default_random_engine generator(16);
|
|
|
|
|
|
|
|
::std::bernoulli_distribution ignore(ignore_probability);
|
|
|
|
|
|
|
|
::std::bernoulli_distribution noise_occurrence(noise_probability);
|
|
|
|
|
|
|
|
::std::bernoulli_distribution noisy_label(0.5);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
::std::vector<matrix<double>> x(num_samples);
|
|
|
|
|
|
|
|
::std::vector<matrix<float>> y(num_samples);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
::std::vector<int> truth_histogram(2);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
matrix<double> xtmp(input_height, input_width);
|
|
|
|
|
|
|
|
matrix<float> ytmp(output_height, output_width);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// The function to be learned.
|
|
|
|
|
|
|
|
const auto ground_truth = [](const matrix<double>& x, int row, int column) {
|
|
|
|
|
|
|
|
double sum = 0.0;
|
|
|
|
|
|
|
|
const int first_column = std::max(0, column - 1);
|
|
|
|
|
|
|
|
const int last_column = std::min(static_cast<int>(x.nc() - 1), column + 1);
|
|
|
|
|
|
|
|
for (int c = first_column; c <= last_column; ++c) {
|
|
|
|
|
|
|
|
sum += x(row, c);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
DLIB_TEST(sum < 2.0 * (last_column - first_column + 1));
|
|
|
|
|
|
|
|
return sum > (last_column - first_column + 1);
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for ( int ii = 0; ii < num_samples; ++ii ) {
|
|
|
|
|
|
|
|
for ( int jj = 0; jj < input_height; ++jj ) {
|
|
|
|
|
|
|
|
for ( int kk = 0; kk < input_width; ++kk ) {
|
|
|
|
|
|
|
|
// Generate numbers between 0 and 2.
|
|
|
|
|
|
|
|
double value = static_cast<double>(ii + jj + kk) / 10.0;
|
|
|
|
|
|
|
|
value -= (static_cast<int>(value) / 2) * 2;
|
|
|
|
|
|
|
|
DLIB_TEST(value >= 0.0 && value < 2.0);
|
|
|
|
|
|
|
|
xtmp(jj, kk) = value;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
x[ii] = xtmp;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for ( int jj = 0; jj < output_height; ++jj ) {
|
|
|
|
|
|
|
|
for ( int kk = 0; kk < output_width; ++kk ) {
|
|
|
|
|
|
|
|
const bool truth = ground_truth(x[ii], jj, kk);
|
|
|
|
|
|
|
|
++truth_histogram[truth];
|
|
|
|
|
|
|
|
if (ignore(generator)) {
|
|
|
|
|
|
|
|
ytmp(jj, kk) = 0.f;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (noise_occurrence(generator)) {
|
|
|
|
|
|
|
|
ytmp(jj, kk) = noisy_label(generator) ? 1.f : -1.f;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
else {
|
|
|
|
|
|
|
|
ytmp(jj, kk) = truth ? 1.f : -1.f;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
y[ii] = ytmp;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const int num_total_elements = num_samples * output_height * output_width;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{ // Require a reasonably balanced truth histogram in order to make sure that a trivial classifier is not enough
|
|
|
|
|
|
|
|
const int required_min_histogram_value = static_cast<int>(::std::ceil(num_total_elements / 2.0 * 0.375));
|
|
|
|
|
|
|
|
for (auto histogram_value : truth_histogram) {
|
|
|
|
|
|
|
|
DLIB_TEST_MSG(histogram_value >= required_min_histogram_value,
|
|
|
|
|
|
|
|
"Histogram value = " << histogram_value << ", required = " << required_min_histogram_value);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
using net_type = loss_binary_log_per_pixel<con<1,1,input_width,1,1,input<matrix<double>>>>;
|
|
|
|
|
|
|
|
net_type net;
|
|
|
|
|
|
|
|
sgd defsolver(0,0.9);
|
|
|
|
|
|
|
|
dnn_trainer<net_type> trainer(net, defsolver);
|
|
|
|
|
|
|
|
trainer.set_learning_rate(0.1);
|
|
|
|
|
|
|
|
trainer.set_min_learning_rate(0.01);
|
|
|
|
|
|
|
|
trainer.set_mini_batch_size(50);
|
|
|
|
|
|
|
|
trainer.set_max_num_epochs(170);
|
|
|
|
|
|
|
|
trainer.train(x, y);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const ::std::vector<matrix<float>> predictions = net(x);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int num_correct = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for ( int ii = 0; ii < num_samples; ++ii ) {
|
|
|
|
|
|
|
|
const matrix<float>& prediction = predictions[ii];
|
|
|
|
|
|
|
|
DLIB_TEST(prediction.nr() == output_height);
|
|
|
|
|
|
|
|
DLIB_TEST(prediction.nc() == output_width);
|
|
|
|
|
|
|
|
for ( int jj = 0; jj < output_height; ++jj )
|
|
|
|
|
|
|
|
for ( int kk = 0; kk < output_width; ++kk )
|
|
|
|
|
|
|
|
if ( (prediction(jj, kk) > 0.f) == ground_truth(x[ii], jj, kk) )
|
|
|
|
|
|
|
|
++num_correct;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// First some sanity checks.
|
|
|
|
|
|
|
|
const int num_correct_max = num_total_elements;
|
|
|
|
|
|
|
|
DLIB_TEST(num_correct_max == ::std::accumulate(truth_histogram.begin(), truth_histogram.end(), 0));
|
|
|
|
|
|
|
|
DLIB_TEST_MSG(num_correct <= num_correct_max,
|
|
|
|
|
|
|
|
"Number of correctly classified elements = " << num_correct << ", max = " << num_correct_max);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// This is the real test, verifying that we have actually learned something.
|
|
|
|
|
|
|
|
const int num_correct_required = static_cast<int>(::std::ceil(0.9 * num_correct_max));
|
|
|
|
|
|
|
|
DLIB_TEST_MSG(num_correct >= num_correct_required,
|
|
|
|
|
|
|
|
"Number of correctly classified elements = " << num_correct << ", required = " << num_correct_required);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
void test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task()
|
|
|
|
void test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task()
|
|
|
@ -3429,6 +3625,9 @@ namespace
|
|
|
|
test_multioutput_linear_regression();
|
|
|
|
test_multioutput_linear_regression();
|
|
|
|
test_simple_autoencoder();
|
|
|
|
test_simple_autoencoder();
|
|
|
|
test_loss_mean_squared_per_channel_and_pixel();
|
|
|
|
test_loss_mean_squared_per_channel_and_pixel();
|
|
|
|
|
|
|
|
test_loss_binary_log_per_pixel_learned_params_on_trivial_two_pixel_task();
|
|
|
|
|
|
|
|
test_loss_binary_log_per_pixel_outputs_on_trivial_task();
|
|
|
|
|
|
|
|
test_loss_binary_log_per_pixel_with_noise_and_pixels_to_ignore();
|
|
|
|
test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task();
|
|
|
|
test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task();
|
|
|
|
test_loss_multiclass_per_pixel_activations_on_trivial_single_pixel_task();
|
|
|
|
test_loss_multiclass_per_pixel_activations_on_trivial_single_pixel_task();
|
|
|
|
test_loss_multiclass_per_pixel_outputs_on_trivial_task();
|
|
|
|
test_loss_multiclass_per_pixel_outputs_on_trivial_task();
|
|
|
|