loss multibinary log (#2141)

* add loss_multilabel_log

* add alias template for loss_multilabel_log

* add missing assert

* increment truth iterator

* rename loss to loss_multibinary_log

* rename loss to loss_multibinary_log

* explicitly capture dims in lambda
This commit is contained in:
Adrià Arrufat 2020-08-24 11:15:16 +09:00 committed by GitHub
parent d7ca478b79
commit dd06c1169b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 265 additions and 0 deletions

View File

@ -5,6 +5,7 @@
#include "loss_abstract.h"
#include "core.h"
#include "utilities.h"
#include "../matrix.h"
#include "../cuda/tensor_tools.h"
#include "../geometry.h"
@ -784,6 +785,123 @@ namespace dlib
inline bool operator== (const loss_multimulticlass_log_::classifier_output& lhs, const std::string& rhs)
{ return rhs == static_cast<const std::string&>(lhs); }
// ----------------------------------------------------------------------------------------
class loss_multibinary_log_
{
public:
typedef std::vector<float> training_label_type;
typedef std::vector<float> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const
{
const tensor& output_tensor = sub.get_output();
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
DLIB_CASSERT(output_tensor.nr() == 1 && output_tensor.nc() == 1);
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
// Note that output_tensor.k() should match the number of labels.
const float* out_data = output_tensor.host();
for (long i = 0; i < output_tensor.num_samples(); ++i)
{
output_label_type predictions(output_tensor.k(), 0);
for (long k = 0; k < output_tensor.k(); ++k)
{
predictions[k] = out_data[i * output_tensor.k() + k];
}
*iter++ = std::move(predictions);
}
}
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const
{
const tensor& output_tensor = sub.get_output();
tensor& grad = sub.get_gradient_input();
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
DLIB_CASSERT(input_tensor.num_samples() != 0);
DLIB_CASSERT(input_tensor.num_samples() % sub.sample_expansion_factor() == 0);
DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
DLIB_CASSERT(output_tensor.nr() == 1 && output_tensor.nc() == 1);
DLIB_CASSERT(grad.nr() == 1 && grad.nc() == 1);
tt::sigmoid(grad, output_tensor);
// The loss we output is the average loss over the mini-batch.
const double scale = 1.0 / output_tensor.num_samples();
double loss = 0;
float* g = grad.host();
const float* out_data = output_tensor.host();
for (long i = 0; i < output_tensor.num_samples(); ++i, ++truth)
{
for (long k = 0; k < output_tensor.k(); ++k)
{
const float y = (*truth)[k];
DLIB_CASSERT(y != 0, "y: " << y);
const size_t idx = i * output_tensor.k() + k;
if (y > 0)
{
const float temp = log1pexp(-out_data[idx]);
loss += y * scale * temp;
g[idx] = y * scale * (g[idx] - 1);
}
else
{
const float temp = -(-out_data[idx] - log1pexp(-out_data[idx]));
loss += -y * scale * temp;
g[idx] = -y * scale * g[idx];
}
}
}
return loss;
}
friend void serialize(const loss_multibinary_log_&, std::ostream& out)
{
serialize("loss_multibinary_log_", out);
}
friend void deserialize(loss_multibinary_log_&, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "loss_multibinary_log_")
throw serialization_error("Unexpected version found while deserializing dlib::loss_multibinary_log_.");
}
friend std::ostream& operator<<(std::ostream& out, const loss_multibinary_log_& )
{
out << "loss_multibinary_log";
return out;
}
friend void to_xml(const loss_multibinary_log_& /*item*/, std::ostream& out)
{
out << "<loss_multibinary_log/>";
}
};
template <typename SUBNET>
using loss_multibinary_log = add_loss_layer<loss_multibinary_log_, SUBNET>;
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

View File

@ -693,6 +693,89 @@ namespace dlib
inline bool operator== (const loss_multimulticlass_log_::classifier_output& lhs, const std::string& rhs)
{ return rhs == static_cast<const std::string&>(lhs); }
// ----------------------------------------------------------------------------------------
class loss_multibinary_log_
{
/*!
WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, it implements a collection of
binary classifiers using the log loss, which is appropriate for
binary classification problems where each sample can belong to zero
or more categories. Therefore, there are two possible classes of labels:
positive (> 0) and negative (< 0) when using this loss.
The absolute value of the label represents its weight. Putting a larger
weight on a sample increases its importance of getting its prediction
correct during training. A good rule of thumb is to use weights with
absolute value 1 unless you have a very unbalanced training dataset,
in that case, give larger weight to the class with less training examples.
This loss will cause the network to produce outputs > 0 when predicting a
member of the positive classes and values < 0 otherwise.
To be more specific, this object contains a sigmoid layer followed by a
cross-entropy layer.
An example will make its use clear. So suppose, for example, that you want
to make a classifier for cats and dogs, but what happens if they both
appear in one image? Or none of them? This layer allows you to handle
those use cases by using the following labels:
- std::vector<float> dog_label = {1.f, -1.f};
- std::vector<float> cat_label = {-1.f , 1.f};
- std::vector<float> both_label = {1.f, 1.f};
- std::vector<float> none_label = {-1.f, -1.f};
!*/
public:
typedef std::vector<float> training_label_type;
typedef std::vector<float> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
it has the additional calling requirements that:
- sub.get_output().nr() == 1
- sub.get_output().nc() == 1
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
and the output labels are the raw scores for each classified object. If a score
is > 0 then the classifier is predicting the +1 class for that category, otherwise
it is predicting the -1 class.
!*/
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
except it has the additional calling requirements that:
- sub.get_output().nr() == 1
- sub.get_output().nc() == 1
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
- all values pointed to by truth are std::vectors of non-zero elements.
Nominally they should be +1 or -1, each indicating the desired class label.
!*/
};
template <typename SUBNET>
using loss_multibinary_log = add_loss_layer<loss_multibinary_log_, SUBNET>;
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

View File

@ -3348,6 +3348,69 @@ namespace
}
}
// ----------------------------------------------------------------------------------------
void test_loss_multibinary_log()
{
print_spinner();
dlib::rand rnd;
const long dims = 3;
const std::vector<float> empty_label(2, -1.f);
std::vector<matrix<float, 0, 1>> samples;
std::vector<std::vector<float>> labels(128, empty_label);
for (size_t i = 0; i < labels.size(); ++i)
{
matrix<float, 0, 1> x = matrix_cast<float>(randm(dims, 1)) * rnd.get_double_in_range(1, 9);
const auto norm = sqrt(sum(squared(x)));
if (norm < 3)
{
labels[i][0] = 1.f;
}
else if (3 <= norm && norm < 6)
{
labels[i][0] = 1.f;
labels[i][1] = 1.f;
}
else
{
labels[i][1] = 1.f;
}
samples.push_back(std::move(x));
}
using net_type = loss_multibinary_log<fc<2, relu<bn_fc<fc<10, input<matrix<float, 0, 1>>>>>>>;
net_type net;
auto compute_error = [&net, &samples, &labels, dims]()
{
const auto preds = net(samples);
double num_wrong = 0;
for (size_t i = 0; i < labels.size(); ++i)
{
for (size_t j = 0; j < labels[i].size(); ++j)
{
if (labels[i][j] == 1 && preds[i][j] < 0 ||
labels[i][j] == 0 && preds[i][j] > 0)
++num_wrong;
}
}
return num_wrong / labels.size() / dims;
};
dnn_trainer<net_type> trainer(net);
const auto error_before = compute_error();
trainer.set_learning_rate(0.1);
trainer.set_iterations_without_progress_threshold(10);
trainer.set_mini_batch_size(128);
trainer.set_min_learning_rate(1e-3);
trainer.train(samples, labels);
const auto error_after = compute_error();
DLIB_TEST_MSG(error_after < error_before && error_after == 0, "multibinary_log error increased after training");
}
// ----------------------------------------------------------------------------------------
void test_tensor_resize_bilinear(long samps, long k, long nr, long nc, long onr, long onc)
@ -3771,6 +3834,7 @@ namespace
test_loss_multiclass_per_pixel_with_noise_and_pixels_to_ignore();
test_loss_multiclass_per_pixel_weighted();
test_loss_multiclass_log_weighted();
test_loss_multibinary_log();
test_serialization();
test_loss_dot();
test_loss_multimulticlass_log();