add loss_mean_squared_per_channel (#1863)

add loss_mean_squared_per_channel_and_pixel
This commit is contained in:
Adrià Arrufat 2019-08-28 20:25:08 +09:00 committed by Davis E. King
parent efd4e27488
commit 170877da88
3 changed files with 266 additions and 2 deletions

View File

@ -2891,7 +2891,149 @@ namespace dlib
// ----------------------------------------------------------------------------------------
class loss_dot_
template<long _num_channels>
class loss_mean_squared_per_channel_and_pixel_
{
public:
typedef std::array<matrix<float>, _num_channels> training_label_type;
typedef std::array<matrix<float>, _num_channels> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const
{
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
const tensor& output_tensor = sub.get_output();
DLIB_CASSERT(output_tensor.k() == _num_channels, "output k = " << output_tensor.k());
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
const float* out_data = output_tensor.host();
for (long i = 0; i < output_tensor.num_samples(); ++i, ++iter)
{
for (long k = 0; k < output_tensor.k(); ++k)
{
(*iter)[k].set_size(output_tensor.nr(), output_tensor.nc());
for (long r = 0; r < output_tensor.nr(); ++r)
{
for (long c = 0; c < output_tensor.nc(); ++c)
{
(*iter)[k].operator()(r, c) = out_data[tensor_index(output_tensor, i, k, r, c)];
}
}
}
}
}
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const
{
const tensor& output_tensor = sub.get_output();
tensor& grad = sub.get_gradient_input();
DLIB_CASSERT(sub.sample_expansion_factor() == 1);
DLIB_CASSERT(input_tensor.num_samples() != 0);
DLIB_CASSERT(input_tensor.num_samples() % sub.sample_expansion_factor() == 0);
DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
DLIB_CASSERT(output_tensor.k() == _num_channels);
DLIB_CASSERT(output_tensor.nr() == grad.nr() &&
output_tensor.nc() == grad.nc() &&
output_tensor.k() == grad.k());
for (long idx = 0; idx < output_tensor.num_samples(); ++idx)
{
const_label_iterator truth_matrix_ptr = (truth + idx);
DLIB_CASSERT((*truth_matrix_ptr).size() == _num_channels);
for (long k = 0; k < output_tensor.k(); ++k)
{
DLIB_CASSERT((*truth_matrix_ptr)[k].nr() == output_tensor.nr() &&
(*truth_matrix_ptr)[k].nc() == output_tensor.nc(),
"truth size = " << (*truth_matrix_ptr)[k].nr() << " x " << (*truth_matrix_ptr)[k].nc() << ", "
"output size = " << output_tensor.nr() << " x " << output_tensor.nc());
}
}
// The loss we output is the average loss over the mini-batch, and also over each element of the matrix output.
const double scale = 1.0 / (output_tensor.num_samples() * output_tensor.k() * output_tensor.nr() * output_tensor.nc());
double loss = 0;
float* const g = grad.host();
const float* out_data = output_tensor.host();
for (long i = 0; i < output_tensor.num_samples(); ++i, ++truth)
{
for (long k = 0; k < output_tensor.k(); ++k)
{
for (long r = 0; r < output_tensor.nr(); ++r)
{
for (long c = 0; c < output_tensor.nc(); ++c)
{
const float y = (*truth)[k].operator()(r, c);
const size_t idx = tensor_index(output_tensor, i, k, r, c);
const float temp1 = y - out_data[idx];
const float temp2 = scale*temp1;
loss += temp2*temp1;
g[idx] = -temp2;
}
}
}
}
return loss;
}
friend void serialize(const loss_mean_squared_per_channel_and_pixel_& , std::ostream& out)
{
serialize("loss_mean_squared_per_channel_and_pixel_", out);
}
friend void deserialize(loss_mean_squared_per_channel_and_pixel_& , std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "loss_mean_squared_per_channel_and_pixel_")
throw serialization_error("Unexpected version found while deserializing dlib::loss_mean_squared_per_channel_and_pixel_.");
}
friend std::ostream& operator<<(std::ostream& out, const loss_mean_squared_per_channel_and_pixel_& )
{
out << "loss_mean_squared_per_channel_and_pixel";
return out;
}
friend void to_xml(const loss_mean_squared_per_channel_and_pixel_& /*item*/, std::ostream& out)
{
out << "<loss_mean_squared_per_channel_and_pixel/>";
}
private:
static size_t tensor_index(const tensor& t, long sample, long k, long row, long column)
{
// See: https://github.com/davisking/dlib/blob/4dfeb7e186dd1bf6ac91273509f687293bd4230a/dlib/dnn/tensor_abstract.h#L38
return ((sample * t.k() + k) * t.nr() + row) * t.nc() + column;
}
};
template <long num_channels, typename SUBNET>
using loss_mean_squared_per_channel_and_pixel = add_loss_layer<loss_mean_squared_per_channel_and_pixel_<num_channels>, SUBNET>;
// ----------------------------------------------------------------------------------------
class loss_dot_
{
public:

View File

@ -1495,7 +1495,68 @@ namespace dlib
// ----------------------------------------------------------------------------------------
class loss_dot_
template<long _num_channels>
class loss_mean_squared_per_channel_and_pixel_
{
/*!
WHAT THIS OBJECT REPRESENTS
This object implements the loss layer interface defined above by
EXAMPLE_LOSS_LAYER_. In particular, it implements the mean squared loss,
which is appropriate for regression problems. It is basically just like
loss_mean_squared_per_pixel_ except that it computes the loss over all
channels, not just the first one.
!*/
public:
typedef std::array<matrix<float>, _num_channels> training_label_type;
typedef std::array<matrix<float>, _num_channels> output_label_type;
template <
typename SUB_TYPE,
typename label_iterator
>
void to_label (
const tensor& input_tensor,
const SUB_TYPE& sub,
label_iterator iter
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
it has the additional calling requirements that:
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.get_output().k() == _num_channels
- sub.sample_expansion_factor() == 1
and the output labels are the predicted continuous variables.
!*/
template <
typename const_label_iterator,
typename SUBNET
>
double compute_loss_value_and_gradient (
const tensor& input_tensor,
const_label_iterator truth,
SUBNET& sub
) const;
/*!
This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
except it has the additional calling requirements that:
- sub.get_output().k() == _num_channels
- sub.get_output().num_samples() == input_tensor.num_samples()
- sub.sample_expansion_factor() == 1
- for all idx such that 0 <= idx < sub.get_output().num_samples():
- sub.get_output().nr() == (*(truth + idx)).nr()
- sub.get_output().nc() == (*(truth + idx)).nc()
!*/
};
template <long num_channels, typename SUBNET>
using loss_mean_squared_per_channel_and_pixel = add_loss_layer<loss_mean_squared_per_channel_and_pixel_<num_channels>, SUBNET>;
// ----------------------------------------------------------------------------------------
class loss_dot_
{
/*!
WHAT THIS OBJECT REPRESENTS

View File

@ -2495,6 +2495,66 @@ namespace
DLIB_TEST_MSG(error_after < 1e-6, "Autoencoder error after training = " << error_after);
}
// ----------------------------------------------------------------------------------------
void test_loss_mean_squared_per_channel_and_pixel()
{
print_spinner();
const int num_samples = 1000;
const long num_channels = 2;
const long dimension = 3;
::std::vector<matrix<float>> inputs;
::std::vector<::std::array<matrix<float>, num_channels>> labels;
for (int i = 0; i < num_samples; ++i)
{
matrix<float> x = matrix_cast<float>(randm(5, dimension));
matrix<float> w = matrix_cast<float>(randm(num_channels, 5));
matrix<float> y = w * x;
DLIB_CASSERT(y.nr() == num_channels);
::std::array<matrix<float>, num_channels> y_arr;
// convert y to an array of matrices
for (long c = 0; c < num_channels; ++c)
{
y_arr[c] = rowm(y, c);
}
inputs.push_back(::std::move(x));
labels.push_back(::std::move(y_arr));
}
const long num_outputs = num_channels * dimension;
using net_type = loss_mean_squared_per_channel_and_pixel<num_channels,
extract<0, num_channels, 1, dimension,
fc<num_outputs,
relu<bn_fc<fc<500,
input<matrix<float>>>>>>>>;
net_type net;
const auto compute_error = [&inputs, &labels, &net, num_channels]()
{
const auto out = net(inputs);
double error = 0.0;
for (size_t i = 0; i < out.size(); ++i)
{
for (size_t c = 0; c < num_channels; ++c)
{
error += mean(squared(out[i][c] - labels[i][c]));
}
}
return error / out.size() / num_channels;
};
const auto error_before = compute_error();
dnn_trainer<net_type> trainer(net);
trainer.set_learning_rate(0.1);
trainer.set_iterations_without_progress_threshold(500);
trainer.set_min_learning_rate(1e-6);
trainer.set_mini_batch_size(50);
trainer.train(inputs, labels);
const auto error_after = compute_error();
DLIB_TEST_MSG(error_after < error_before, "multi channel error increased after training");
}
// ----------------------------------------------------------------------------------------
void test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task()
@ -3252,6 +3312,7 @@ namespace
test_simple_linear_regression_with_mult_prev();
test_multioutput_linear_regression();
test_simple_autoencoder();
test_loss_mean_squared_per_channel_and_pixel();
test_loss_multiclass_per_pixel_learned_params_on_trivial_single_pixel_task();
test_loss_multiclass_per_pixel_activations_on_trivial_single_pixel_task();
test_loss_multiclass_per_pixel_outputs_on_trivial_task();