diff --git a/dlib/dnn/loss.h b/dlib/dnn/loss.h index 53e26dd37..326482085 100644 --- a/dlib/dnn/loss.h +++ b/dlib/dnn/loss.h @@ -1677,6 +1677,137 @@ namespace dlib template using loss_mean_squared = add_loss_layer; +// ---------------------------------------------------------------------------------------- + + class loss_epsilon_insensitive_ + { + public: + + typedef float training_label_type; + typedef float output_label_type; + + loss_epsilon_insensitive_() = default; + loss_epsilon_insensitive_(double eps) : eps(eps) + { + DLIB_CASSERT(eps >= 0, "You can't set a negative error epsilon."); + } + + double get_epsilon () const { return eps; } + void set_epsilon(double e) + { + DLIB_CASSERT(e >= 0, "You can't set a negative error epsilon."); + eps = e; + } + + template < + typename SUB_TYPE, + typename label_iterator + > + void to_label ( + const tensor& input_tensor, + const SUB_TYPE& sub, + label_iterator iter + ) const + { + DLIB_CASSERT(sub.sample_expansion_factor() == 1); + + const tensor& output_tensor = sub.get_output(); + + DLIB_CASSERT(output_tensor.nr() == 1 && + output_tensor.nc() == 1 && + output_tensor.k() == 1); + DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); + + const float* out_data = output_tensor.host(); + for (long i = 0; i < output_tensor.num_samples(); ++i) + { + *iter++ = out_data[i]; + } + } + + + template < + typename const_label_iterator, + typename SUBNET + > + double compute_loss_value_and_gradient ( + const tensor& input_tensor, + const_label_iterator truth, + SUBNET& sub + ) const + { + const tensor& output_tensor = sub.get_output(); + tensor& grad = sub.get_gradient_input(); + + DLIB_CASSERT(sub.sample_expansion_factor() == 1); + DLIB_CASSERT(input_tensor.num_samples() != 0); + DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0); + DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples()); + DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples()); + DLIB_CASSERT(output_tensor.nr() == 1 && + output_tensor.nc() == 1 && + output_tensor.k() == 1); + DLIB_CASSERT(grad.nr() == 1 && + grad.nc() == 1 && + grad.k() == 1); + + // The loss we output is the average loss over the mini-batch. + const double scale = 1.0/output_tensor.num_samples(); + double loss = 0; + float* g = grad.host_write_only(); + const float* out_data = output_tensor.host(); + for (long i = 0; i < output_tensor.num_samples(); ++i) + { + const float y = *truth++; + const float err = out_data[i]-y; + if (err > eps) + { + loss += scale*(err-eps); + g[i] = scale; + } + else if (err < -eps) + { + loss += scale*(eps-err); + g[i] = -scale; + } + } + return loss; + } + + friend void serialize(const loss_epsilon_insensitive_& item, std::ostream& out) + { + serialize("loss_epsilon_insensitive_", out); + serialize(item.eps, out); + } + + friend void deserialize(loss_epsilon_insensitive_& item, std::istream& in) + { + std::string version; + deserialize(version, in); + if (version != "loss_epsilon_insensitive_") + throw serialization_error("Unexpected version found while deserializing dlib::loss_epsilon_insensitive_."); + deserialize(item.eps, in); + } + + friend std::ostream& operator<<(std::ostream& out, const loss_epsilon_insensitive_& item) + { + out << "loss_epsilon_insensitive epsilon: " << item.eps; + return out; + } + + friend void to_xml(const loss_epsilon_insensitive_& item, std::ostream& out) + { + out << ""; + } + + private: + double eps = 1; + + }; + + template + using loss_epsilon_insensitive = add_loss_layer; + // ---------------------------------------------------------------------------------------- class loss_mean_squared_multioutput_ diff --git a/dlib/dnn/loss_abstract.h b/dlib/dnn/loss_abstract.h index 7ef8f702d..fdb81cb54 100644 --- a/dlib/dnn/loss_abstract.h +++ b/dlib/dnn/loss_abstract.h @@ -779,6 +779,106 @@ namespace dlib template using loss_ranking = add_loss_layer; +// ---------------------------------------------------------------------------------------- + + class loss_epsilon_insensitive_ + { + /*! + WHAT THIS OBJECT REPRESENTS + This object implements the loss layer interface defined above by + EXAMPLE_LOSS_LAYER_. In particular, it implements the epsilon insensitive + loss, which is appropriate for regression problems. In particular, this + loss function is; + loss(y1,y2) = abs(y1-y2)= 0 + ensures + - #get_epsilon() == eps + !*/ + + double get_epsilon ( + ) const; + /*! + ensures + - returns the epsilon value used in the loss function. Mistakes in the + regressor smaller than get_epsilon() are ignored by the loss function. + !*/ + + void set_epsilon( + double eps + ); + /*! + requires + - eps >= 0 + ensures + - #get_epsilon() == eps + !*/ + + template < + typename SUB_TYPE, + typename label_iterator + > + void to_label ( + const tensor& input_tensor, + const SUB_TYPE& sub, + label_iterator iter + ) const; + /*! + This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except + it has the additional calling requirements that: + - sub.get_output().nr() == 1 + - sub.get_output().nc() == 1 + - sub.get_output().k() == 1 + - sub.get_output().num_samples() == input_tensor.num_samples() + - sub.sample_expansion_factor() == 1 + and the output label is the predicted continuous variable. + !*/ + + template < + typename const_label_iterator, + typename SUBNET + > + double compute_loss_value_and_gradient ( + const tensor& input_tensor, + const_label_iterator truth, + SUBNET& sub + ) const; + /*! + This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient() + except it has the additional calling requirements that: + - sub.get_output().nr() == 1 + - sub.get_output().nc() == 1 + - sub.get_output().k() == 1 + - sub.get_output().num_samples() == input_tensor.num_samples() + - sub.sample_expansion_factor() == 1 + !*/ + + }; + + template + using loss_epsilon_insensitive = add_loss_layer; + // ---------------------------------------------------------------------------------------- class loss_mean_squared_ diff --git a/dlib/test/dnn.cpp b/dlib/test/dnn.cpp index 703b89ec6..61077e642 100644 --- a/dlib/test/dnn.cpp +++ b/dlib/test/dnn.cpp @@ -2116,6 +2116,53 @@ namespace } +// ---------------------------------------------------------------------------------------- + + void test_simple_linear_regression_eil() + { + print_spinner(); + const int num_samples = 1000; + ::std::vector> x(num_samples); + ::std::vector y(num_samples); + ::std::default_random_engine generator(16); + ::std::normal_distribution distribution(0,0.0001); + const float true_intercept = 50.0; + const float true_slope = 10.0; + for ( int ii = 0; ii < num_samples; ++ii ) + { + const double val = static_cast(ii)/10; + matrix tmp(1,1); + tmp = val; + x[ii] = tmp; + y[ii] = (true_intercept + true_slope*static_cast(val) + distribution(generator)); + } + + using net_type = loss_epsilon_insensitive>>>; + net_type net(0.01); + layer<1>(net).layer_details().set_bias_learning_rate_multiplier(300); + sgd defsolver(0,0.9); + dnn_trainer trainer(net, defsolver); + trainer.set_learning_rate(1e-5); + trainer.set_min_learning_rate(1e-8); + trainer.set_mini_batch_size(50); + trainer.set_max_num_epochs(570); + trainer.train(x, y); + + const float slope = layer<1>(net).layer_details().get_weights().host()[0]; + const float slope_error = abs(true_slope - slope); + const float intercept = layer<1>(net).layer_details().get_biases().host()[0]; + const float intercept_error = abs(true_intercept - intercept); + const float eps_slope = 0.01, eps_intercept = 0.1; + + dlog << LINFO << "slope_error: "<< slope_error; + dlog << LINFO << "intercept_error: "<< intercept_error; + DLIB_TEST_MSG(slope_error <= eps_slope, + "Expected slope = " << true_slope << " Estimated slope = " << slope << " Error limit = " << eps_slope); + DLIB_TEST_MSG(intercept_error <= eps_intercept, + "Expected intercept = " << true_intercept << " Estimated intercept = " << intercept << " Error limit = " << eps_intercept); + + } + // ---------------------------------------------------------------------------------------- void test_simple_linear_regression_with_mult_prev() @@ -2950,6 +2997,7 @@ namespace test_copy_tensor_add_to_cpu(); test_concat(); test_simple_linear_regression(); + test_simple_linear_regression_eil(); test_simple_linear_regression_with_mult_prev(); test_multioutput_linear_regression(); test_simple_autoencoder();