From 1c19a80a39ff763bc4dc8d7362c081b361a28869 Mon Sep 17 00:00:00 2001 From: Davis King Date: Sat, 5 Nov 2016 21:12:33 -0400 Subject: [PATCH] Changed the loss layer interface to use two typedefs, output_label_type and training_label_type instead of a single label_type. This way, the label type used for training can be distinct from the type output by the network. This change breaks backwards compatibility with the previous API. --- dlib/dnn/core.h | 40 ++++++++++++++++++++++++------------- dlib/dnn/core_abstract.h | 35 +++++++++++++++++--------------- dlib/dnn/loss.h | 16 +++++++++------ dlib/dnn/loss_abstract.h | 26 ++++++++++++++---------- dlib/dnn/trainer.h | 24 +++++++++++----------- dlib/dnn/trainer_abstract.h | 18 ++++++++--------- 6 files changed, 92 insertions(+), 67 deletions(-) diff --git a/dlib/dnn/core.h b/dlib/dnn/core.h index 6d806f49b..493ecb41b 100644 --- a/dlib/dnn/core.h +++ b/dlib/dnn/core.h @@ -2120,12 +2120,12 @@ namespace dlib { private: // We don't want anyone making these no_label_type objects. They are here only to - // allow add_loss_layer::label_type and dnn_trainer::label_type to exist which avoids - // needing to overload add_loss_layer and dnn_trainer for supervised an unsupervised - // losses. It also can be a type to use in template metaprogramming to indicate - // "no label". So here we make the constructor private with the exception that - // add_loss_layer objects can make it (again, just to simplify add_loss_layer's - // implementation). + // allow add_loss_layer::training_label_type and dnn_trainer::training_label_type + // to exist which avoids needing to overload add_loss_layer and dnn_trainer for + // supervised an unsupervised losses. It also can be a type to use in template + // metaprogramming to indicate "no label". So here we make the constructor private + // with the exception that add_loss_layer objects can make it (again, just to + // simplify add_loss_layer's implementation). no_label_type(){}; template friend class add_loss_layer; template < typename net_type, typename solver_type > friend class dnn_trainer; @@ -2137,14 +2137,25 @@ namespace dlib class add_loss_layer { template - struct get_loss_layer_label_type + struct get_loss_layer_training_label_type { typedef no_label_type type; }; template - struct get_loss_layer_label_type::type> + struct get_loss_layer_training_label_type::type> { - typedef typename T::label_type type; + typedef typename T::training_label_type type; + }; + + template + struct get_loss_layer_output_label_type + { + typedef no_label_type type; + }; + template + struct get_loss_layer_output_label_type::type> + { + typedef typename T::output_label_type type; }; public: @@ -2154,7 +2165,8 @@ namespace dlib const static size_t num_layers = subnet_type::num_layers + 1; // Note that the loss layer doesn't count as an additional computational layer. const static size_t num_computational_layers = subnet_type::num_computational_layers; - typedef typename get_loss_layer_label_type::type label_type; + typedef typename get_loss_layer_training_label_type::type training_label_type; + typedef typename get_loss_layer_output_label_type::type output_label_type; static_assert(is_nonloss_layer_type::value, "SUBNET must be of type add_layer, add_skip_layer, or add_tag_layer."); @@ -2250,19 +2262,19 @@ namespace dlib (*this)(temp_tensor, obegin); } - const label_type& operator() (const input_type& x) + const output_label_type& operator() (const input_type& x) { (*this)(&x, &x+1, &temp_label); return temp_label; } template - std::vector operator() ( + std::vector operator() ( const iterable_type& data, size_t batch_size = 128 ) { - std::vector results(std::distance(data.begin(), data.end())); + std::vector results(std::distance(data.begin(), data.end())); auto o = results.begin(); auto i = data.begin(); auto num_remaining = results.size(); @@ -2426,7 +2438,7 @@ namespace dlib // These two objects don't logically contribute to the state of this object. They // are here to prevent them from being reallocated over and over. - label_type temp_label; + output_label_type temp_label; resizable_tensor temp_tensor; }; diff --git a/dlib/dnn/core_abstract.h b/dlib/dnn/core_abstract.h index 9ad37bbe0..7d256634e 100644 --- a/dlib/dnn/core_abstract.h +++ b/dlib/dnn/core_abstract.h @@ -619,9 +619,12 @@ namespace dlib typedef typename subnet_type::input_type input_type; const static size_t num_computational_layers = subnet_type::num_computational_layers; const static size_t num_layers = subnet_type::num_layers + 1; - // If LOSS_DETAILS is an unsupervised loss then label_type==no_label_type. + // If LOSS_DETAILS is an unsupervised loss then training_label_type==no_label_type. // Otherwise it is defined as follows: - typedef typename LOSS_DETAILS::label_type label_type; + typedef typename LOSS_DETAILS::training_label_type training_label_type; + // Similarly, if LOSS_DETAILS doesn't provide any output conversion then + // output_label_type==no_label_type. + typedef typename LOSS_DETAILS::output_label_type output_label_type; @@ -768,7 +771,7 @@ namespace dlib - x.num_samples()%sample_expansion_factor() == 0 - x.num_samples() > 0 - obegin == iterator pointing to the start of a range of - x.num_samples()/sample_expansion_factor() label_type elements. + x.num_samples()/sample_expansion_factor() output_label_type elements. ensures - runs x through the network and writes the output to the range at obegin. - loss_details().to_label() is used to write the network output into @@ -786,7 +789,7 @@ namespace dlib - [ibegin, iend) is an iterator range over input_type objects. - std::distance(ibegin,iend) > 0 - obegin == iterator pointing to the start of a range of - std::distance(ibegin,iend) label_type elements. + std::distance(ibegin,iend) output_label_type elements. ensures - runs [ibegin,iend) through the network and writes the output to the range at obegin. @@ -796,18 +799,18 @@ namespace dlib // ------------- - const label_type& operator() ( + const output_label_type& operator() ( const input_type& x ); /*! ensures - runs a single object, x, through the network and returns the output. - loss_details().to_label() is used to convert the network output into a - label_type. + output_label_type. !*/ template - std::vector operator() ( + std::vector operator() ( const iterable_type& data, size_t batch_size = 128 ); @@ -826,7 +829,7 @@ namespace dlib items. Using a batch_size > 1 can be faster because it better exploits the available hardware parallelism. - loss_details().to_label() is used to convert the network output into a - label_type. + output_label_type. !*/ // ------------- @@ -844,7 +847,7 @@ namespace dlib - x.num_samples()%sample_expansion_factor() == 0 - x.num_samples() > 0 - lbegin == iterator pointing to the start of a range of - x.num_samples()/sample_expansion_factor() label_type elements. + x.num_samples()/sample_expansion_factor() training_label_type elements. ensures - runs x through the network, compares the output to the expected output pointed to by lbegin, and returns the resulting loss. @@ -864,7 +867,7 @@ namespace dlib - [ibegin, iend) is an iterator range over input_type objects. - std::distance(ibegin,iend) > 0 - lbegin == iterator pointing to the start of a range of - std::distance(ibegin,iend) label_type elements. + std::distance(ibegin,iend) training_label_type elements. ensures - runs [ibegin,iend) through the network, compares the output to the expected output pointed to by lbegin, and returns the resulting loss. @@ -880,7 +883,7 @@ namespace dlib ); /*! requires - - LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type. + - LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. - sample_expansion_factor() != 0 (i.e. to_tensor() must have been called to set sample_expansion_factor() to something non-zero.) @@ -898,7 +901,7 @@ namespace dlib ); /*! requires - - LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type. + - LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. - [ibegin, iend) is an iterator range over input_type objects. - std::distance(ibegin,iend) > 0 ensures @@ -921,7 +924,7 @@ namespace dlib - x.num_samples()%sample_expansion_factor() == 0 - x.num_samples() > 0 - lbegin == iterator pointing to the start of a range of - x.num_samples()/sample_expansion_factor() label_type elements. + x.num_samples()/sample_expansion_factor() training_label_type elements. ensures - runs x through the network, compares the output to the expected output pointed to by lbegin, and computes parameter and data gradients with @@ -944,7 +947,7 @@ namespace dlib - [ibegin, iend) is an iterator range over input_type objects. - std::distance(ibegin,iend) > 0 - lbegin == iterator pointing to the start of a range of - std::distance(ibegin,iend) label_type elements. + std::distance(ibegin,iend) training_label_type elements. ensures - runs [ibegin,iend) through the network, compares the output to the expected output pointed to by lbegin, and computes parameter and data @@ -961,7 +964,7 @@ namespace dlib ); /*! requires - - LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type. + - LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. - sample_expansion_factor() != 0 (i.e. to_tensor() must have been called to set sample_expansion_factor() to something non-zero.) @@ -982,7 +985,7 @@ namespace dlib ); /*! requires - - LOSS_DETAILS is an unsupervised loss. i.e. label_type==no_label_type. + - LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. - [ibegin, iend) is an iterator range over input_type objects. - std::distance(ibegin,iend) > 0 ensures diff --git a/dlib/dnn/loss.h b/dlib/dnn/loss.h index cf75adba7..93bf04425 100644 --- a/dlib/dnn/loss.h +++ b/dlib/dnn/loss.h @@ -21,7 +21,8 @@ namespace dlib { public: - typedef float label_type; + typedef float training_label_type; + typedef float output_label_type; template < typename SUB_TYPE, @@ -128,7 +129,8 @@ namespace dlib { public: - typedef float label_type; + typedef float training_label_type; + typedef float output_label_type; template < typename SUB_TYPE, @@ -244,7 +246,8 @@ namespace dlib { public: - typedef unsigned long label_type; + typedef unsigned long training_label_type; + typedef unsigned long output_label_type; template < typename SUB_TYPE, @@ -468,7 +471,8 @@ namespace dlib public: - typedef std::vector label_type; + typedef std::vector training_label_type; + typedef std::vector output_label_type; loss_mmod_() {} @@ -494,7 +498,7 @@ namespace dlib DLIB_CASSERT(sub.sample_expansion_factor() == 1, sub.sample_expansion_factor()); std::vector dets_accum; - label_type final_dets; + output_label_type final_dets; for (long i = 0; i < output_tensor.num_samples(); ++i) { tensor_to_dets(input_tensor, output_tensor, i, dets_accum, adjust_threshold, sub); @@ -865,7 +869,7 @@ namespace dlib { public: - typedef unsigned long label_type; + typedef unsigned long training_label_type; template < diff --git a/dlib/dnn/loss_abstract.h b/dlib/dnn/loss_abstract.h index 6193e90e0..e8227c5f3 100644 --- a/dlib/dnn/loss_abstract.h +++ b/dlib/dnn/loss_abstract.h @@ -33,14 +33,16 @@ namespace dlib Finally, note that there are two broad flavors of loss layer, supervised and unsupervised. The EXAMPLE_LOSS_LAYER_ as shown here is a supervised - layer. To make an unsupervised loss you simply leave out the label_type - typedef, to_label(), and the truth iterator argument to + layer. To make an unsupervised loss you simply leave out the + training_label_type typedef and the truth iterator argument to compute_loss_value_and_gradient(). !*/ public: - typedef whatever_type_you_use_for_labels label_type; + // In most cases training_label_type and output_label_type will be the same type. + typedef whatever_type_you_use_for_training_labels training_label_type; + typedef whatever_type_you_use_for_outout_labels output_label_type; EXAMPLE_LOSS_LAYER_ ( ); @@ -77,9 +79,9 @@ namespace dlib - input_tensor.num_samples()%sub.sample_expansion_factor() == 0. - iter == an iterator pointing to the beginning of a range of input_tensor.num_samples()/sub.sample_expansion_factor() elements. Moreover, - they must be label_type elements. + they must be output_label_type elements. ensures - - Converts the output of the provided network to label_type objects and + - Converts the output of the provided network to output_label_type objects and stores the results into the range indicated by iter. In particular, for all valid i, it will be the case that: *(iter+i/sub.sample_expansion_factor()) is populated based on the output of @@ -108,7 +110,7 @@ namespace dlib layer(sub).get_output(). - truth == an iterator pointing to the beginning of a range of input_tensor.num_samples()/sub.sample_expansion_factor() elements. Moreover, - they must be label_type elements. + they must be training_label_type elements. - for all valid i: - *(truth+i/sub.sample_expansion_factor()) is the label of the ith sample in input_tensor. @@ -167,7 +169,8 @@ namespace dlib !*/ public: - typedef float label_type; + typedef float training_label_type; + typedef float output_label_type; template < typename SUB_TYPE, @@ -234,7 +237,8 @@ namespace dlib !*/ public: - typedef float label_type; + typedef float training_label_type; + typedef float output_label_type; template < typename SUB_TYPE, @@ -306,7 +310,8 @@ namespace dlib public: - typedef unsigned long label_type; + typedef unsigned long training_label_type; + typedef unsigned long output_label_type; template < typename SUB_TYPE, @@ -443,7 +448,8 @@ namespace dlib public: - typedef std::vector label_type; + typedef std::vector training_label_type; + typedef std::vector output_label_type; loss_mmod_( ); diff --git a/dlib/dnn/trainer.h b/dlib/dnn/trainer.h index 47617f65d..95410b2ea 100644 --- a/dlib/dnn/trainer.h +++ b/dlib/dnn/trainer.h @@ -30,20 +30,20 @@ namespace dlib namespace impl { - template + template struct dnn_job_t { dnn_job_t() = default; dnn_job_t(const dnn_job_t&) = delete; dnn_job_t& operator=(const dnn_job_t&) = delete; - std::vector> labels; + std::vector> labels; std::vector t; std::vector have_data; // have_data[i] is true if there is data in labels[i] and t[i]. }; - template - void swap(dnn_job_t& a, dnn_job_t& b) + template + void swap(dnn_job_t& a, dnn_job_t& b) { a.labels.swap(b.labels); a.t.swap(b.t); @@ -63,12 +63,12 @@ namespace dlib static_assert(is_loss_layer_type::value, "The last layer in a network must be a loss layer."); - typedef typename net_type::label_type label_type; + typedef typename net_type::training_label_type training_label_type; typedef typename net_type::input_type input_type; const static size_t num_computational_layers = net_type::num_computational_layers; const static size_t num_layers = net_type::num_layers; private: - typedef impl::dnn_job_t job_t; + typedef impl::dnn_job_t job_t; public: dnn_trainer() = delete; @@ -184,7 +184,7 @@ namespace dlib void train_one_step ( const std::vector& data, - const std::vector& labels + const std::vector& labels ) { DLIB_CASSERT(data.size() == labels.size()); @@ -261,7 +261,7 @@ namespace dlib void train ( const std::vector& data, - const std::vector& labels + const std::vector& labels ) { DLIB_CASSERT(data.size() == labels.size() && data.size() > 0); @@ -322,7 +322,7 @@ namespace dlib { DLIB_CASSERT(data.size() > 0); - const bool has_unsupervised_loss = std::is_same::value; + const bool has_unsupervised_loss = std::is_same::value; static_assert(has_unsupervised_loss, "You can only call this version of train() when using an unsupervised loss."); @@ -562,7 +562,7 @@ namespace dlib void thread() try { - label_type pick_which_run_update; + training_label_type pick_which_run_update; job_t next_job; std::vector> losses(devices.size()); @@ -591,7 +591,7 @@ namespace dlib ++main_iteration_counter; // Call compute_parameter_gradients() and update_parameters() but pick the // right version for unsupervised or supervised training based on the type - // of label_type. + // of training_label_type. for (size_t i = 0; i < devices.size(); ++i) tp[i]->add_task_by_value([&,i](double& loss){ loss = compute_parameter_gradients(i, next_job, pick_which_run_update); }, losses[i]); // aggregate loss values from all the network computations. @@ -988,7 +988,7 @@ namespace dlib data_iterator dend ) { - typename std::vector::iterator nothing; + typename std::vector::iterator nothing; send_job(dbegin, dend, nothing); } diff --git a/dlib/dnn/trainer_abstract.h b/dlib/dnn/trainer_abstract.h index 5258a2339..a8920f6f6 100644 --- a/dlib/dnn/trainer_abstract.h +++ b/dlib/dnn/trainer_abstract.h @@ -47,7 +47,7 @@ namespace dlib public: - typedef typename net_type::label_type label_type; + typedef typename net_type::training_label_type training_label_type; typedef typename net_type::input_type input_type; const static size_t num_computational_layers = net_type::num_computational_layers; @@ -341,14 +341,14 @@ namespace dlib void train ( const std::vector& data, - const std::vector& labels + const std::vector& labels ); /*! requires - data.size() == labels.size() - data.size() > 0 - net_type uses a supervised loss. - i.e. net_type::label_type != no_label_type. + i.e. net_type::training_label_type != no_label_type. ensures - Trains a supervised neural network based on the given training data. The goal of training is to find the network parameters that minimize @@ -374,7 +374,7 @@ namespace dlib requires - data.size() > 0 - net_type uses an unsupervised loss. - i.e. net_type::label_type == no_label_type. + i.e. net_type::training_label_type == no_label_type. ensures - Trains an unsupervised neural network based on the given training data. The goal of training is to find the network parameters that minimize @@ -395,14 +395,14 @@ namespace dlib void train_one_step ( const std::vector& data, - const std::vector& labels + const std::vector& labels ); /*! requires - data.size() == labels.size() - data.size() > 0 - net_type uses a supervised loss. - i.e. net_type::label_type != no_label_type. + i.e. net_type::training_label_type != no_label_type. ensures - Performs one stochastic gradient update step based on the mini-batch of data and labels supplied to this function. In particular, calling @@ -433,7 +433,7 @@ namespace dlib - std::advance(lbegin, std::distance(dbegin, dend) - 1) is dereferencable - std::distance(dbegin, dend) > 0 - net_type uses a supervised loss. - i.e. net_type::label_type != no_label_type. + i.e. net_type::training_label_type != no_label_type. ensures - Performs one stochastic gradient update step based on the mini-batch of data and labels supplied to this function. In particular, calling @@ -457,7 +457,7 @@ namespace dlib requires - data.size() > 0 - net_type uses an unsupervised loss. - i.e. net_type::label_type == no_label_type. + i.e. net_type::training_label_type == no_label_type. ensures - Performs one stochastic gradient update step based on the mini-batch of data supplied to this function. In particular, calling train_one_step() @@ -485,7 +485,7 @@ namespace dlib requires - std::distance(dbegin, dend) > 0 - net_type uses an unsupervised loss. - i.e. net_type::label_type == no_label_type. + i.e. net_type::training_label_type == no_label_type. ensures - Performs one stochastic gradient update step based on the mini-batch of data supplied to this function. In particular, calling train_one_step()