Changed the loss layer interface to use two typedefs, output_label_type and

training_label_type instead of a single label_type.  This way, the label
type used for training can be distinct from the type output by the network.
This change breaks backwards compatibility with the previous API.
This commit is contained in:
Davis King 2016-11-05 21:12:33 -04:00
parent 25ccbc4294
commit 1c19a80a39
6 changed files with 92 additions and 67 deletions

View File

@ -2120,12 +2120,12 @@ namespace dlib
{
private:
// We don't want anyone making these no_label_type objects. They are here only to
// allow add_loss_layer::label_type and dnn_trainer::label_type to exist which avoids
// needing to overload add_loss_layer and dnn_trainer for supervised an unsupervised
// losses. It also can be a type to use in template metaprogramming to indicate
// "no label". So here we make the constructor private with the exception that
// add_loss_layer objects can make it (again, just to simplify add_loss_layer's
// implementation).
// allow add_loss_layer::training_label_type and dnn_trainer::training_label_type
// to exist which avoids needing to overload add_loss_layer and dnn_trainer for
// supervised an unsupervised losses. It also can be a type to use in template
// metaprogramming to indicate "no label". So here we make the constructor private
// with the exception that add_loss_layer objects can make it (again, just to
// simplify add_loss_layer's implementation).
no_label_type(){};
template <typename LOSS_DETAILS, typename SUBNET> friend class add_loss_layer;
template < typename net_type, typename solver_type > friend class dnn_trainer;
@ -2137,14 +2137,25 @@ namespace dlib
class add_loss_layer
{
template <typename T, typename enabled=void>
struct get_loss_layer_label_type
struct get_loss_layer_training_label_type
{
typedef no_label_type type;
};
template <typename T>
struct get_loss_layer_label_type<T,typename std::enable_if<sizeof(typename T::label_type)!=0>::type>
struct get_loss_layer_training_label_type<T,typename std::enable_if<sizeof(typename T::training_label_type)!=0>::type>
{
typedef typename T::label_type type;
typedef typename T::training_label_type type;
};
template <typename T, typename enabled=void>
struct get_loss_layer_output_label_type
{
typedef no_label_type type;
};
template <typename T>
struct get_loss_layer_output_label_type<T,typename std::enable_if<sizeof(typename T::output_label_type)!=0>::type>
{
typedef typename T::output_label_type type;
};
public:
@ -2154,7 +2165,8 @@ namespace dlib
const static size_t num_layers = subnet_type::num_layers + 1;
// Note that the loss layer doesn't count as an additional computational layer.
const static size_t num_computational_layers = subnet_type::num_computational_layers;
typedef typename get_loss_layer_label_type<LOSS_DETAILS>::type label_type;
typedef typename get_loss_layer_training_label_type<LOSS_DETAILS>::type training_label_type;
typedef typename get_loss_layer_output_label_type<LOSS_DETAILS>::type output_label_type;
static_assert(is_nonloss_layer_type<SUBNET>::value,
"SUBNET must be of type add_layer, add_skip_layer, or add_tag_layer.");
@ -2250,19 +2262,19 @@ namespace dlib
(*this)(temp_tensor, obegin);
}
const label_type& operator() (const input_type& x)
const output_label_type& operator() (const input_type& x)
{
(*this)(&x, &x+1, &temp_label);
return temp_label;
}
template <typename iterable_type>
std::vector<label_type> operator() (
std::vector<output_label_type> operator() (
const iterable_type& data,
size_t batch_size = 128
)
{
std::vector<label_type> results(std::distance(data.begin(), data.end()));
std::vector<output_label_type> results(std::distance(data.begin(), data.end()));
auto o = results.begin();
auto i = data.begin();
auto num_remaining = results.size();
@ -2426,7 +2438,7 @@ namespace dlib
// These two objects don't logically contribute to the state of this object. They
// are here to prevent them from being reallocated over and over.
label_type temp_label;
output_label_type temp_label;
resizable_tensor temp_tensor;
};

View File

@ -619,9 +619,12 @@ namespace dlib
typedef typename subnet_type::input_type input_type;
const static size_t num_computational_layers = subnet_type::num_computational_layers;
const static size_t num_layers = subnet_type::num_layers + 1;
// If LOSS_DETAILS is an unsupervised loss then label_type==no_label_type.
// If LOSS_DETAILS is an unsupervised loss then training_label_type==no_label_type.
// Otherwise it is defined as follows:
typedef typename LOSS_DETAILS::label_type label_type;
typedef typename LOSS_DETAILS::training_label_type training_label_type;
// Similarly, if LOSS_DETAILS doesn't provide any output conversion then
// output_label_type==no_label_type.
typedef typename LOSS_DETAILS::output_label_type output_label_type;
@ -768,7 +771,7 @@ namespace dlib
- x.num_samples()%sample_expansion_factor() == 0
- x.num_samples() > 0
- obegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor() label_type elements.
x.num_samples()/sample_expansion_factor() output_label_type elements.
ensures
- runs x through the network and writes the output to the range at obegin.
- loss_details().to_label() is used to write the network output into
@ -786,7 +789,7 @@ namespace dlib
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
- obegin == iterator pointing to the start of a range of
std::distance(ibegin,iend) label_type elements.
std::distance(ibegin,iend) output_label_type elements.
ensures
- runs [ibegin,iend) through the network and writes the output to the range
at obegin.
@ -796,18 +799,18 @@ namespace dlib
// -------------
const label_type& operator() (
const output_label_type& operator() (
const input_type& x
);
/*!
ensures
- runs a single object, x, through the network and returns the output.
- loss_details().to_label() is used to convert the network output into a
label_type.
output_label_type.
!*/
template <typename iterable_type>
std::vector<label_type> operator() (
std::vector<output_label_type> operator() (
const iterable_type& data,
size_t batch_size = 128
);
@ -826,7 +829,7 @@ namespace dlib
items. Using a batch_size > 1 can be faster because it better exploits
the available hardware parallelism.
- loss_details().to_label() is used to convert the network output into a
label_type.
output_label_type.
!*/
// -------------
@ -844,7 +847,7 @@ namespace dlib
- x.num_samples()%sample_expansion_factor() == 0
- x.num_samples() > 0
- lbegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor() label_type elements.
x.num_samples()/sample_expansion_factor() training_label_type elements.
ensures
- runs x through the network, compares the output to the expected output
pointed to by lbegin, and returns the resulting loss.
@ -864,7 +867,7 @@ namespace dlib
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
- lbegin == iterator pointing to the start of a range of
std::distance(ibegin,iend) label_type elements.
std::distance(ibegin,iend) training_label_type elements.
ensures
- runs [ibegin,iend) through the network, compares the output to the
expected output pointed to by lbegin, and returns the resulting loss.
@ -880,7 +883,7 @@ namespace dlib
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type.
- sample_expansion_factor() != 0
(i.e. to_tensor() must have been called to set sample_expansion_factor()
to something non-zero.)
@ -898,7 +901,7 @@ namespace dlib
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type.
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
ensures
@ -921,7 +924,7 @@ namespace dlib
- x.num_samples()%sample_expansion_factor() == 0
- x.num_samples() > 0
- lbegin == iterator pointing to the start of a range of
x.num_samples()/sample_expansion_factor() label_type elements.
x.num_samples()/sample_expansion_factor() training_label_type elements.
ensures
- runs x through the network, compares the output to the expected output
pointed to by lbegin, and computes parameter and data gradients with
@ -944,7 +947,7 @@ namespace dlib
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
- lbegin == iterator pointing to the start of a range of
std::distance(ibegin,iend) label_type elements.
std::distance(ibegin,iend) training_label_type elements.
ensures
- runs [ibegin,iend) through the network, compares the output to the
expected output pointed to by lbegin, and computes parameter and data
@ -961,7 +964,7 @@ namespace dlib
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type.
- sample_expansion_factor() != 0
(i.e. to_tensor() must have been called to set sample_expansion_factor()
to something non-zero.)
@ -982,7 +985,7 @@ namespace dlib
);
/*!
requires
- LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type.
- LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type.
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
ensures

View File

@ -21,7 +21,8 @@ namespace dlib
{
public:
typedef float label_type;
typedef float training_label_type;
typedef float output_label_type;
template <
typename SUB_TYPE,
@ -128,7 +129,8 @@ namespace dlib
{
public:
typedef float label_type;
typedef float training_label_type;
typedef float output_label_type;
template <
typename SUB_TYPE,
@ -244,7 +246,8 @@ namespace dlib
{
public:
typedef unsigned long label_type;
typedef unsigned long training_label_type;
typedef unsigned long output_label_type;
template <
typename SUB_TYPE,
@ -468,7 +471,8 @@ namespace dlib
public:
typedef std::vector<mmod_rect> label_type;
typedef std::vector<mmod_rect> training_label_type;
typedef std::vector<mmod_rect> output_label_type;
loss_mmod_() {}
@ -494,7 +498,7 @@ namespace dlib
DLIB_CASSERT(sub.sample_expansion_factor() == 1, sub.sample_expansion_factor());
std::vector<intermediate_detection> dets_accum;
label_type final_dets;
output_label_type final_dets;
for (long i = 0; i < output_tensor.num_samples(); ++i)
{
tensor_to_dets(input_tensor, output_tensor, i, dets_accum, adjust_threshold, sub);
@ -865,7 +869,7 @@ namespace dlib
{
public:
typedef unsigned long label_type;
typedef unsigned long training_label_type;
template <

View File

@ -33,14 +33,16 @@ namespace dlib
Finally, note that there are two broad flavors of loss layer, supervised
and unsupervised. The EXAMPLE_LOSS_LAYER_ as shown here is a supervised
layer. To make an unsupervised loss you simply leave out the label_type
typedef, to_label(), and the truth iterator argument to
layer. To make an unsupervised loss you simply leave out the
training_label_type typedef and the truth iterator argument to
compute_loss_value_and_gradient().
!*/
public:
typedef whatever_type_you_use_for_labels label_type;
// In most cases training_label_type and output_label_type will be the same type.
typedef whatever_type_you_use_for_training_labels training_label_type;
typedef whatever_type_you_use_for_outout_labels output_label_type;
EXAMPLE_LOSS_LAYER_ (
);
@ -77,9 +79,9 @@ namespace dlib
- input_tensor.num_samples()%sub.sample_expansion_factor() == 0.
- iter == an iterator pointing to the beginning of a range of
input_tensor.num_samples()/sub.sample_expansion_factor() elements. Moreover,
they must be label_type elements.
they must be output_label_type elements.
ensures
- Converts the output of the provided network to label_type objects and
- Converts the output of the provided network to output_label_type objects and
stores the results into the range indicated by iter. In particular, for
all valid i, it will be the case that:
*(iter+i/sub.sample_expansion_factor()) is populated based on the output of
@ -108,7 +110,7 @@ namespace dlib
layer<i>(sub).get_output().
- truth == an iterator pointing to the beginning of a range of
input_tensor.num_samples()/sub.sample_expansion_factor() elements. Moreover,
they must be label_type elements.
they must be training_label_type elements.
- for all valid i:
- *(truth+i/sub.sample_expansion_factor()) is the label of the ith sample in
input_tensor.
@ -167,7 +169,8 @@ namespace dlib
!*/
public:
typedef float label_type;
typedef float training_label_type;
typedef float output_label_type;
template <
typename SUB_TYPE,
@ -234,7 +237,8 @@ namespace dlib
!*/
public:
typedef float label_type;
typedef float training_label_type;
typedef float output_label_type;
template <
typename SUB_TYPE,
@ -306,7 +310,8 @@ namespace dlib
public:
typedef unsigned long label_type;
typedef unsigned long training_label_type;
typedef unsigned long output_label_type;
template <
typename SUB_TYPE,
@ -443,7 +448,8 @@ namespace dlib
public:
typedef std::vector<mmod_rect> label_type;
typedef std::vector<mmod_rect> training_label_type;
typedef std::vector<mmod_rect> output_label_type;
loss_mmod_(
);

View File

@ -30,20 +30,20 @@ namespace dlib
namespace impl
{
template <typename label_type>
template <typename training_label_type>
struct dnn_job_t
{
dnn_job_t() = default;
dnn_job_t(const dnn_job_t&) = delete;
dnn_job_t& operator=(const dnn_job_t&) = delete;
std::vector<std::vector<label_type>> labels;
std::vector<std::vector<training_label_type>> labels;
std::vector<resizable_tensor> t;
std::vector<int> have_data; // have_data[i] is true if there is data in labels[i] and t[i].
};
template <typename label_type>
void swap(dnn_job_t<label_type>& a, dnn_job_t<label_type>& b)
template <typename training_label_type>
void swap(dnn_job_t<training_label_type>& a, dnn_job_t<training_label_type>& b)
{
a.labels.swap(b.labels);
a.t.swap(b.t);
@ -63,12 +63,12 @@ namespace dlib
static_assert(is_loss_layer_type<net_type>::value,
"The last layer in a network must be a loss layer.");
typedef typename net_type::label_type label_type;
typedef typename net_type::training_label_type training_label_type;
typedef typename net_type::input_type input_type;
const static size_t num_computational_layers = net_type::num_computational_layers;
const static size_t num_layers = net_type::num_layers;
private:
typedef impl::dnn_job_t<label_type> job_t;
typedef impl::dnn_job_t<training_label_type> job_t;
public:
dnn_trainer() = delete;
@ -184,7 +184,7 @@ namespace dlib
void train_one_step (
const std::vector<input_type>& data,
const std::vector<label_type>& labels
const std::vector<training_label_type>& labels
)
{
DLIB_CASSERT(data.size() == labels.size());
@ -261,7 +261,7 @@ namespace dlib
void train (
const std::vector<input_type>& data,
const std::vector<label_type>& labels
const std::vector<training_label_type>& labels
)
{
DLIB_CASSERT(data.size() == labels.size() && data.size() > 0);
@ -322,7 +322,7 @@ namespace dlib
{
DLIB_CASSERT(data.size() > 0);
const bool has_unsupervised_loss = std::is_same<no_label_type, label_type>::value;
const bool has_unsupervised_loss = std::is_same<no_label_type, training_label_type>::value;
static_assert(has_unsupervised_loss,
"You can only call this version of train() when using an unsupervised loss.");
@ -562,7 +562,7 @@ namespace dlib
void thread() try
{
label_type pick_which_run_update;
training_label_type pick_which_run_update;
job_t next_job;
std::vector<dlib::future<double>> losses(devices.size());
@ -591,7 +591,7 @@ namespace dlib
++main_iteration_counter;
// Call compute_parameter_gradients() and update_parameters() but pick the
// right version for unsupervised or supervised training based on the type
// of label_type.
// of training_label_type.
for (size_t i = 0; i < devices.size(); ++i)
tp[i]->add_task_by_value([&,i](double& loss){ loss = compute_parameter_gradients(i, next_job, pick_which_run_update); }, losses[i]);
// aggregate loss values from all the network computations.
@ -988,7 +988,7 @@ namespace dlib
data_iterator dend
)
{
typename std::vector<label_type>::iterator nothing;
typename std::vector<training_label_type>::iterator nothing;
send_job(dbegin, dend, nothing);
}

View File

@ -47,7 +47,7 @@ namespace dlib
public:
typedef typename net_type::label_type label_type;
typedef typename net_type::training_label_type training_label_type;
typedef typename net_type::input_type input_type;
const static size_t num_computational_layers = net_type::num_computational_layers;
@ -341,14 +341,14 @@ namespace dlib
void train (
const std::vector<input_type>& data,
const std::vector<label_type>& labels
const std::vector<training_label_type>& labels
);
/*!
requires
- data.size() == labels.size()
- data.size() > 0
- net_type uses a supervised loss.
i.e. net_type::label_type != no_label_type.
i.e. net_type::training_label_type != no_label_type.
ensures
- Trains a supervised neural network based on the given training data.
The goal of training is to find the network parameters that minimize
@ -374,7 +374,7 @@ namespace dlib
requires
- data.size() > 0
- net_type uses an unsupervised loss.
i.e. net_type::label_type == no_label_type.
i.e. net_type::training_label_type == no_label_type.
ensures
- Trains an unsupervised neural network based on the given training data.
The goal of training is to find the network parameters that minimize
@ -395,14 +395,14 @@ namespace dlib
void train_one_step (
const std::vector<input_type>& data,
const std::vector<label_type>& labels
const std::vector<training_label_type>& labels
);
/*!
requires
- data.size() == labels.size()
- data.size() > 0
- net_type uses a supervised loss.
i.e. net_type::label_type != no_label_type.
i.e. net_type::training_label_type != no_label_type.
ensures
- Performs one stochastic gradient update step based on the mini-batch of
data and labels supplied to this function. In particular, calling
@ -433,7 +433,7 @@ namespace dlib
- std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable
- std::distance(dbegin, dend) > 0
- net_type uses a supervised loss.
i.e. net_type::label_type != no_label_type.
i.e. net_type::training_label_type != no_label_type.
ensures
- Performs one stochastic gradient update step based on the mini-batch of
data and labels supplied to this function. In particular, calling
@ -457,7 +457,7 @@ namespace dlib
requires
- data.size() > 0
- net_type uses an unsupervised loss.
i.e. net_type::label_type == no_label_type.
i.e. net_type::training_label_type == no_label_type.
ensures
- Performs one stochastic gradient update step based on the mini-batch of
data supplied to this function. In particular, calling train_one_step()
@ -485,7 +485,7 @@ namespace dlib
requires
- std::distance(dbegin, dend) > 0
- net_type uses an unsupervised loss.
i.e. net_type::label_type == no_label_type.
i.e. net_type::training_label_type == no_label_type.
ensures
- Performs one stochastic gradient update step based on the mini-batch of
data supplied to this function. In particular, calling train_one_step()