mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
loss multibinary log (#2141)
* add loss_multilabel_log * add alias template for loss_multilabel_log * add missing assert * increment truth iterator * rename loss to loss_multibinary_log * rename loss to loss_multibinary_log * explicitly capture dims in lambda
This commit is contained in:
parent
d7ca478b79
commit
dd06c1169b
118
dlib/dnn/loss.h
118
dlib/dnn/loss.h
@ -5,6 +5,7 @@
|
||||
|
||||
#include "loss_abstract.h"
|
||||
#include "core.h"
|
||||
#include "utilities.h"
|
||||
#include "../matrix.h"
|
||||
#include "../cuda/tensor_tools.h"
|
||||
#include "../geometry.h"
|
||||
@ -784,6 +785,123 @@ namespace dlib
|
||||
inline bool operator== (const loss_multimulticlass_log_::classifier_output& lhs, const std::string& rhs)
|
||||
{ return rhs == static_cast<const std::string&>(lhs); }
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class loss_multibinary_log_
|
||||
{
|
||||
public:
|
||||
typedef std::vector<float> training_label_type;
|
||||
typedef std::vector<float> output_label_type;
|
||||
|
||||
template <
|
||||
typename SUB_TYPE,
|
||||
typename label_iterator
|
||||
>
|
||||
void to_label (
|
||||
const tensor& input_tensor,
|
||||
const SUB_TYPE& sub,
|
||||
label_iterator iter
|
||||
) const
|
||||
{
|
||||
const tensor& output_tensor = sub.get_output();
|
||||
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
|
||||
DLIB_CASSERT(output_tensor.nr() == 1 && output_tensor.nc() == 1);
|
||||
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
|
||||
|
||||
// Note that output_tensor.k() should match the number of labels.
|
||||
|
||||
const float* out_data = output_tensor.host();
|
||||
for (long i = 0; i < output_tensor.num_samples(); ++i)
|
||||
{
|
||||
output_label_type predictions(output_tensor.k(), 0);
|
||||
for (long k = 0; k < output_tensor.k(); ++k)
|
||||
{
|
||||
predictions[k] = out_data[i * output_tensor.k() + k];
|
||||
}
|
||||
*iter++ = std::move(predictions);
|
||||
}
|
||||
}
|
||||
|
||||
template <
|
||||
typename const_label_iterator,
|
||||
typename SUBNET
|
||||
>
|
||||
double compute_loss_value_and_gradient (
|
||||
const tensor& input_tensor,
|
||||
const_label_iterator truth,
|
||||
SUBNET& sub
|
||||
) const
|
||||
{
|
||||
const tensor& output_tensor = sub.get_output();
|
||||
tensor& grad = sub.get_gradient_input();
|
||||
|
||||
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
|
||||
DLIB_CASSERT(input_tensor.num_samples() != 0);
|
||||
DLIB_CASSERT(input_tensor.num_samples() % sub.sample_expansion_factor() == 0);
|
||||
DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
|
||||
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
|
||||
DLIB_CASSERT(output_tensor.nr() == 1 && output_tensor.nc() == 1);
|
||||
DLIB_CASSERT(grad.nr() == 1 && grad.nc() == 1);
|
||||
|
||||
tt::sigmoid(grad, output_tensor);
|
||||
|
||||
// The loss we output is the average loss over the mini-batch.
|
||||
const double scale = 1.0 / output_tensor.num_samples();
|
||||
double loss = 0;
|
||||
float* g = grad.host();
|
||||
const float* out_data = output_tensor.host();
|
||||
for (long i = 0; i < output_tensor.num_samples(); ++i, ++truth)
|
||||
{
|
||||
for (long k = 0; k < output_tensor.k(); ++k)
|
||||
{
|
||||
const float y = (*truth)[k];
|
||||
DLIB_CASSERT(y != 0, "y: " << y);
|
||||
const size_t idx = i * output_tensor.k() + k;
|
||||
if (y > 0)
|
||||
{
|
||||
const float temp = log1pexp(-out_data[idx]);
|
||||
loss += y * scale * temp;
|
||||
g[idx] = y * scale * (g[idx] - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
const float temp = -(-out_data[idx] - log1pexp(-out_data[idx]));
|
||||
loss += -y * scale * temp;
|
||||
g[idx] = -y * scale * g[idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
return loss;
|
||||
}
|
||||
|
||||
friend void serialize(const loss_multibinary_log_&, std::ostream& out)
|
||||
{
|
||||
serialize("loss_multibinary_log_", out);
|
||||
}
|
||||
|
||||
friend void deserialize(loss_multibinary_log_&, std::istream& in)
|
||||
{
|
||||
std::string version;
|
||||
deserialize(version, in);
|
||||
if (version != "loss_multibinary_log_")
|
||||
throw serialization_error("Unexpected version found while deserializing dlib::loss_multibinary_log_.");
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const loss_multibinary_log_& )
|
||||
{
|
||||
out << "loss_multibinary_log";
|
||||
return out;
|
||||
}
|
||||
|
||||
friend void to_xml(const loss_multibinary_log_& /*item*/, std::ostream& out)
|
||||
{
|
||||
out << "<loss_multibinary_log/>";
|
||||
}
|
||||
};
|
||||
|
||||
template <typename SUBNET>
|
||||
using loss_multibinary_log = add_loss_layer<loss_multibinary_log_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
|
@ -693,6 +693,89 @@ namespace dlib
|
||||
inline bool operator== (const loss_multimulticlass_log_::classifier_output& lhs, const std::string& rhs)
|
||||
{ return rhs == static_cast<const std::string&>(lhs); }
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class loss_multibinary_log_
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object implements the loss layer interface defined above by
|
||||
EXAMPLE_LOSS_LAYER_. In particular, it implements a collection of
|
||||
binary classifiers using the log loss, which is appropriate for
|
||||
binary classification problems where each sample can belong to zero
|
||||
or more categories. Therefore, there are two possible classes of labels:
|
||||
positive (> 0) and negative (< 0) when using this loss.
|
||||
The absolute value of the label represents its weight. Putting a larger
|
||||
weight on a sample increases its importance of getting its prediction
|
||||
correct during training. A good rule of thumb is to use weights with
|
||||
absolute value 1 unless you have a very unbalanced training dataset,
|
||||
in that case, give larger weight to the class with less training examples.
|
||||
|
||||
This loss will cause the network to produce outputs > 0 when predicting a
|
||||
member of the positive classes and values < 0 otherwise.
|
||||
|
||||
To be more specific, this object contains a sigmoid layer followed by a
|
||||
cross-entropy layer.
|
||||
|
||||
An example will make its use clear. So suppose, for example, that you want
|
||||
to make a classifier for cats and dogs, but what happens if they both
|
||||
appear in one image? Or none of them? This layer allows you to handle
|
||||
those use cases by using the following labels:
|
||||
- std::vector<float> dog_label = {1.f, -1.f};
|
||||
- std::vector<float> cat_label = {-1.f , 1.f};
|
||||
- std::vector<float> both_label = {1.f, 1.f};
|
||||
- std::vector<float> none_label = {-1.f, -1.f};
|
||||
!*/
|
||||
public:
|
||||
typedef std::vector<float> training_label_type;
|
||||
typedef std::vector<float> output_label_type;
|
||||
|
||||
template <
|
||||
typename SUB_TYPE,
|
||||
typename label_iterator
|
||||
>
|
||||
void to_label (
|
||||
const tensor& input_tensor,
|
||||
const SUB_TYPE& sub,
|
||||
label_iterator iter
|
||||
) const;
|
||||
/*!
|
||||
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
|
||||
it has the additional calling requirements that:
|
||||
- sub.get_output().nr() == 1
|
||||
- sub.get_output().nc() == 1
|
||||
- sub.get_output().num_samples() == input_tensor.num_samples()
|
||||
- sub.sample_expansion_factor() == 1
|
||||
and the output labels are the raw scores for each classified object. If a score
|
||||
is > 0 then the classifier is predicting the +1 class for that category, otherwise
|
||||
it is predicting the -1 class.
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename const_label_iterator,
|
||||
typename SUBNET
|
||||
>
|
||||
double compute_loss_value_and_gradient (
|
||||
const tensor& input_tensor,
|
||||
const_label_iterator truth,
|
||||
SUBNET& sub
|
||||
) const;
|
||||
/*!
|
||||
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
|
||||
except it has the additional calling requirements that:
|
||||
- sub.get_output().nr() == 1
|
||||
- sub.get_output().nc() == 1
|
||||
- sub.get_output().num_samples() == input_tensor.num_samples()
|
||||
- sub.sample_expansion_factor() == 1
|
||||
- all values pointed to by truth are std::vectors of non-zero elements.
|
||||
Nominally they should be +1 or -1, each indicating the desired class label.
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
template <typename SUBNET>
|
||||
using loss_multibinary_log = add_loss_layer<loss_multibinary_log_, SUBNET>;
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
|
@ -3348,6 +3348,69 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void test_loss_multibinary_log()
|
||||
{
|
||||
print_spinner();
|
||||
dlib::rand rnd;
|
||||
|
||||
const long dims = 3;
|
||||
const std::vector<float> empty_label(2, -1.f);
|
||||
std::vector<matrix<float, 0, 1>> samples;
|
||||
std::vector<std::vector<float>> labels(128, empty_label);
|
||||
|
||||
for (size_t i = 0; i < labels.size(); ++i)
|
||||
{
|
||||
matrix<float, 0, 1> x = matrix_cast<float>(randm(dims, 1)) * rnd.get_double_in_range(1, 9);
|
||||
const auto norm = sqrt(sum(squared(x)));
|
||||
if (norm < 3)
|
||||
{
|
||||
labels[i][0] = 1.f;
|
||||
}
|
||||
else if (3 <= norm && norm < 6)
|
||||
{
|
||||
labels[i][0] = 1.f;
|
||||
labels[i][1] = 1.f;
|
||||
}
|
||||
else
|
||||
{
|
||||
labels[i][1] = 1.f;
|
||||
}
|
||||
samples.push_back(std::move(x));
|
||||
}
|
||||
|
||||
using net_type = loss_multibinary_log<fc<2, relu<bn_fc<fc<10, input<matrix<float, 0, 1>>>>>>>;
|
||||
net_type net;
|
||||
|
||||
auto compute_error = [&net, &samples, &labels, dims]()
|
||||
{
|
||||
const auto preds = net(samples);
|
||||
double num_wrong = 0;
|
||||
for (size_t i = 0; i < labels.size(); ++i)
|
||||
{
|
||||
for (size_t j = 0; j < labels[i].size(); ++j)
|
||||
{
|
||||
if (labels[i][j] == 1 && preds[i][j] < 0 ||
|
||||
labels[i][j] == 0 && preds[i][j] > 0)
|
||||
++num_wrong;
|
||||
}
|
||||
}
|
||||
return num_wrong / labels.size() / dims;
|
||||
};
|
||||
|
||||
dnn_trainer<net_type> trainer(net);
|
||||
const auto error_before = compute_error();
|
||||
trainer.set_learning_rate(0.1);
|
||||
trainer.set_iterations_without_progress_threshold(10);
|
||||
trainer.set_mini_batch_size(128);
|
||||
trainer.set_min_learning_rate(1e-3);
|
||||
trainer.train(samples, labels);
|
||||
const auto error_after = compute_error();
|
||||
|
||||
DLIB_TEST_MSG(error_after < error_before && error_after == 0, "multibinary_log error increased after training");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void test_tensor_resize_bilinear(long samps, long k, long nr, long nc, long onr, long onc)
|
||||
@ -3771,6 +3834,7 @@ namespace
|
||||
test_loss_multiclass_per_pixel_with_noise_and_pixels_to_ignore();
|
||||
test_loss_multiclass_per_pixel_weighted();
|
||||
test_loss_multiclass_log_weighted();
|
||||
test_loss_multibinary_log();
|
||||
test_serialization();
|
||||
test_loss_dot();
|
||||
test_loss_multimulticlass_log();
|
||||
|
Loading…
Reference in New Issue
Block a user