mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Added loss_epsilon_insensitive_ layer
This commit is contained in:
parent
6137540b27
commit
2b0a4a6f6d
131
dlib/dnn/loss.h
131
dlib/dnn/loss.h
@ -1677,6 +1677,137 @@ namespace dlib
|
||||
template <typename SUBNET>
|
||||
using loss_mean_squared = add_loss_layer<loss_mean_squared_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class loss_epsilon_insensitive_
|
||||
{
|
||||
public:
|
||||
|
||||
typedef float training_label_type;
|
||||
typedef float output_label_type;
|
||||
|
||||
loss_epsilon_insensitive_() = default;
|
||||
loss_epsilon_insensitive_(double eps) : eps(eps)
|
||||
{
|
||||
DLIB_CASSERT(eps >= 0, "You can't set a negative error epsilon.");
|
||||
}
|
||||
|
||||
double get_epsilon () const { return eps; }
|
||||
void set_epsilon(double e)
|
||||
{
|
||||
DLIB_CASSERT(e >= 0, "You can't set a negative error epsilon.");
|
||||
eps = e;
|
||||
}
|
||||
|
||||
template <
|
||||
typename SUB_TYPE,
|
||||
typename label_iterator
|
||||
>
|
||||
void to_label (
|
||||
const tensor& input_tensor,
|
||||
const SUB_TYPE& sub,
|
||||
label_iterator iter
|
||||
) const
|
||||
{
|
||||
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
|
||||
|
||||
const tensor& output_tensor = sub.get_output();
|
||||
|
||||
DLIB_CASSERT(output_tensor.nr() == 1 &&
|
||||
output_tensor.nc() == 1 &&
|
||||
output_tensor.k() == 1);
|
||||
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
|
||||
|
||||
const float* out_data = output_tensor.host();
|
||||
for (long i = 0; i < output_tensor.num_samples(); ++i)
|
||||
{
|
||||
*iter++ = out_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <
|
||||
typename const_label_iterator,
|
||||
typename SUBNET
|
||||
>
|
||||
double compute_loss_value_and_gradient (
|
||||
const tensor& input_tensor,
|
||||
const_label_iterator truth,
|
||||
SUBNET& sub
|
||||
) const
|
||||
{
|
||||
const tensor& output_tensor = sub.get_output();
|
||||
tensor& grad = sub.get_gradient_input();
|
||||
|
||||
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
|
||||
DLIB_CASSERT(input_tensor.num_samples() != 0);
|
||||
DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
|
||||
DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
|
||||
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
|
||||
DLIB_CASSERT(output_tensor.nr() == 1 &&
|
||||
output_tensor.nc() == 1 &&
|
||||
output_tensor.k() == 1);
|
||||
DLIB_CASSERT(grad.nr() == 1 &&
|
||||
grad.nc() == 1 &&
|
||||
grad.k() == 1);
|
||||
|
||||
// The loss we output is the average loss over the mini-batch.
|
||||
const double scale = 1.0/output_tensor.num_samples();
|
||||
double loss = 0;
|
||||
float* g = grad.host_write_only();
|
||||
const float* out_data = output_tensor.host();
|
||||
for (long i = 0; i < output_tensor.num_samples(); ++i)
|
||||
{
|
||||
const float y = *truth++;
|
||||
const float err = out_data[i]-y;
|
||||
if (err > eps)
|
||||
{
|
||||
loss += scale*(err-eps);
|
||||
g[i] = scale;
|
||||
}
|
||||
else if (err < -eps)
|
||||
{
|
||||
loss += scale*(eps-err);
|
||||
g[i] = -scale;
|
||||
}
|
||||
}
|
||||
return loss;
|
||||
}
|
||||
|
||||
friend void serialize(const loss_epsilon_insensitive_& item, std::ostream& out)
|
||||
{
|
||||
serialize("loss_epsilon_insensitive_", out);
|
||||
serialize(item.eps, out);
|
||||
}
|
||||
|
||||
friend void deserialize(loss_epsilon_insensitive_& item, std::istream& in)
|
||||
{
|
||||
std::string version;
|
||||
deserialize(version, in);
|
||||
if (version != "loss_epsilon_insensitive_")
|
||||
throw serialization_error("Unexpected version found while deserializing dlib::loss_epsilon_insensitive_.");
|
||||
deserialize(item.eps, in);
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const loss_epsilon_insensitive_& item)
|
||||
{
|
||||
out << "loss_epsilon_insensitive epsilon: " << item.eps;
|
||||
return out;
|
||||
}
|
||||
|
||||
friend void to_xml(const loss_epsilon_insensitive_& item, std::ostream& out)
|
||||
{
|
||||
out << "<loss_epsilon_insensitive_ epsilon='" << item.eps << "'/>";
|
||||
}
|
||||
|
||||
private:
|
||||
double eps = 1;
|
||||
|
||||
};
|
||||
|
||||
template <typename SUBNET>
|
||||
using loss_epsilon_insensitive = add_loss_layer<loss_epsilon_insensitive_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class loss_mean_squared_multioutput_
|
||||
|
@ -779,6 +779,106 @@ namespace dlib
|
||||
template <typename SUBNET>
|
||||
using loss_ranking = add_loss_layer<loss_ranking_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class loss_epsilon_insensitive_
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object implements the loss layer interface defined above by
|
||||
EXAMPLE_LOSS_LAYER_. In particular, it implements the epsilon insensitive
|
||||
loss, which is appropriate for regression problems. In particular, this
|
||||
loss function is;
|
||||
loss(y1,y2) = abs(y1-y2)<epsilon ? 0 : abs(y1-y2)-epsilon
|
||||
|
||||
Therefore, the loss is basically just the abs() loss except there is a dead
|
||||
zone around zero, causing the loss to not care about mistakes of magnitude
|
||||
smaller than epsilon.
|
||||
!*/
|
||||
public:
|
||||
|
||||
typedef float training_label_type;
|
||||
typedef float output_label_type;
|
||||
|
||||
loss_epsilon_insensitive_(
|
||||
) = default;
|
||||
/*!
|
||||
ensures
|
||||
- #get_epsilon() == 1
|
||||
!*/
|
||||
|
||||
loss_epsilon_insensitive_(
|
||||
double eps
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- eps >= 0
|
||||
ensures
|
||||
- #get_epsilon() == eps
|
||||
!*/
|
||||
|
||||
double get_epsilon (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the epsilon value used in the loss function. Mistakes in the
|
||||
regressor smaller than get_epsilon() are ignored by the loss function.
|
||||
!*/
|
||||
|
||||
void set_epsilon(
|
||||
double eps
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- eps >= 0
|
||||
ensures
|
||||
- #get_epsilon() == eps
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename SUB_TYPE,
|
||||
typename label_iterator
|
||||
>
|
||||
void to_label (
|
||||
const tensor& input_tensor,
|
||||
const SUB_TYPE& sub,
|
||||
label_iterator iter
|
||||
) const;
|
||||
/*!
|
||||
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
|
||||
it has the additional calling requirements that:
|
||||
- sub.get_output().nr() == 1
|
||||
- sub.get_output().nc() == 1
|
||||
- sub.get_output().k() == 1
|
||||
- sub.get_output().num_samples() == input_tensor.num_samples()
|
||||
- sub.sample_expansion_factor() == 1
|
||||
and the output label is the predicted continuous variable.
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename const_label_iterator,
|
||||
typename SUBNET
|
||||
>
|
||||
double compute_loss_value_and_gradient (
|
||||
const tensor& input_tensor,
|
||||
const_label_iterator truth,
|
||||
SUBNET& sub
|
||||
) const;
|
||||
/*!
|
||||
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
|
||||
except it has the additional calling requirements that:
|
||||
- sub.get_output().nr() == 1
|
||||
- sub.get_output().nc() == 1
|
||||
- sub.get_output().k() == 1
|
||||
- sub.get_output().num_samples() == input_tensor.num_samples()
|
||||
- sub.sample_expansion_factor() == 1
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
template <typename SUBNET>
|
||||
using loss_epsilon_insensitive = add_loss_layer<loss_epsilon_insensitive_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class loss_mean_squared_
|
||||
|
@ -2116,6 +2116,53 @@ namespace
|
||||
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void test_simple_linear_regression_eil()
|
||||
{
|
||||
print_spinner();
|
||||
const int num_samples = 1000;
|
||||
::std::vector<matrix<double>> x(num_samples);
|
||||
::std::vector<float> y(num_samples);
|
||||
::std::default_random_engine generator(16);
|
||||
::std::normal_distribution<float> distribution(0,0.0001);
|
||||
const float true_intercept = 50.0;
|
||||
const float true_slope = 10.0;
|
||||
for ( int ii = 0; ii < num_samples; ++ii )
|
||||
{
|
||||
const double val = static_cast<double>(ii)/10;
|
||||
matrix<double> tmp(1,1);
|
||||
tmp = val;
|
||||
x[ii] = tmp;
|
||||
y[ii] = (true_intercept + true_slope*static_cast<float>(val) + distribution(generator));
|
||||
}
|
||||
|
||||
using net_type = loss_epsilon_insensitive<fc<1, input<matrix<double>>>>;
|
||||
net_type net(0.01);
|
||||
layer<1>(net).layer_details().set_bias_learning_rate_multiplier(300);
|
||||
sgd defsolver(0,0.9);
|
||||
dnn_trainer<net_type> trainer(net, defsolver);
|
||||
trainer.set_learning_rate(1e-5);
|
||||
trainer.set_min_learning_rate(1e-8);
|
||||
trainer.set_mini_batch_size(50);
|
||||
trainer.set_max_num_epochs(570);
|
||||
trainer.train(x, y);
|
||||
|
||||
const float slope = layer<1>(net).layer_details().get_weights().host()[0];
|
||||
const float slope_error = abs(true_slope - slope);
|
||||
const float intercept = layer<1>(net).layer_details().get_biases().host()[0];
|
||||
const float intercept_error = abs(true_intercept - intercept);
|
||||
const float eps_slope = 0.01, eps_intercept = 0.1;
|
||||
|
||||
dlog << LINFO << "slope_error: "<< slope_error;
|
||||
dlog << LINFO << "intercept_error: "<< intercept_error;
|
||||
DLIB_TEST_MSG(slope_error <= eps_slope,
|
||||
"Expected slope = " << true_slope << " Estimated slope = " << slope << " Error limit = " << eps_slope);
|
||||
DLIB_TEST_MSG(intercept_error <= eps_intercept,
|
||||
"Expected intercept = " << true_intercept << " Estimated intercept = " << intercept << " Error limit = " << eps_intercept);
|
||||
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void test_simple_linear_regression_with_mult_prev()
|
||||
@ -2950,6 +2997,7 @@ namespace
|
||||
test_copy_tensor_add_to_cpu();
|
||||
test_concat();
|
||||
test_simple_linear_regression();
|
||||
test_simple_linear_regression_eil();
|
||||
test_simple_linear_regression_with_mult_prev();
|
||||
test_multioutput_linear_regression();
|
||||
test_simple_autoencoder();
|
||||
|
Loading…
Reference in New Issue
Block a user