From afe19fcb8bef83335274c28d1a6ffac421ef1888 Mon Sep 17 00:00:00 2001 From: Davis King Date: Sat, 5 Sep 2020 18:33:04 -0400 Subject: [PATCH] Made the DNN layer visiting routines more convenient. Now the user doesn't have to supply a visitor capable of visiting all layers, but instead just the ones they are interested in. Also added visit_computational_layers() and visit_computational_layers_range() since those capture a very common use case more concisely than visit_layers(). That is, users generally want to mess with the computational layers specifically as those are the stateful layers. --- dlib/dnn/core.h | 279 +++++++++++++++--------------- dlib/dnn/core_abstract.h | 81 ++++++++- dlib/dnn/trainer.h | 2 +- dlib/test/dnn.cpp | 16 +- examples/dnn_introduction3_ex.cpp | 22 +-- 5 files changed, 236 insertions(+), 164 deletions(-) diff --git a/dlib/dnn/core.h b/dlib/dnn/core.h index 416fb2507..e18a5a1fa 100644 --- a/dlib/dnn/core.h +++ b/dlib/dnn/core.h @@ -3471,140 +3471,6 @@ namespace dlib return impl_test_layer(l, 0.01); } -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template - struct vlp_loop - { - template - static typename std::enable_if::value>::type invoke_functor(T&& , size_t& , U&& ) - { - // intentionally left empty - } - - template - static typename std::enable_if::value>::type invoke_functor(T&& v , size_t& comp_i, U&& l ) - { - v(comp_i, l.layer_details().get_layer_params()); - ++comp_i; - } - - template < - typename net_type, - typename visitor - > - static void visit( - size_t comp_i, - net_type& net, - visitor&& v - ) - { - invoke_functor(v, comp_i, layer(net)); - vlp_loop::visit(comp_i, net,v); - } - }; - - template - struct vlp_loop - { - template < - typename net_type, - typename visitor - > - static void visit( - size_t, - net_type&, - visitor&& - ) - { - // Base case of recursion. Don't do anything. - } - }; - - } - - template < - typename net_type, - typename visitor - > - void visit_layer_parameters( - net_type& net, - visitor v - ) - { - size_t comp_i = 0; - impl::vlp_loop<0, net_type::num_layers>::visit(comp_i, net, v); - } - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - template - struct vlpg_loop - { - template - static typename std::enable_if::value>::type invoke_functor(T&& , size_t& , U&& ) - { - // intentionally left empty - } - - template - static typename std::enable_if::value>::type invoke_functor(T&& v , size_t& comp_i, U&& l ) - { - v(comp_i, l.get_parameter_gradient()); - ++comp_i; - } - - template < - typename net_type, - typename visitor - > - static void visit( - size_t comp_i, - net_type& net, - visitor&& v - ) - { - invoke_functor(v, comp_i, layer(net)); - vlpg_loop::visit(comp_i, net,v); - } - }; - - template - struct vlpg_loop - { - template < - typename net_type, - typename visitor - > - static void visit( - size_t, - net_type&, - visitor&& - ) - { - // Base case of recursion. Don't do anything. - } - }; - - } - - template < - typename net_type, - typename visitor - > - void visit_layer_parameter_gradients( - net_type& net, - visitor v - ) - { - size_t comp_i = 0; - impl::vlpg_loop<0, net_type::num_layers>::visit(comp_i, net, v); - } - // ---------------------------------------------------------------------------------------- namespace impl @@ -3621,7 +3487,9 @@ namespace dlib visitor&& v ) { - v(i, layer(net)); + // Call whatever version of the visitor the user provided. + call_if_valid(v, i, layer(net)); + call_if_valid(v, layer(net)); vl_loop::visit(net,v); } }; @@ -3655,7 +3523,9 @@ namespace dlib ) { vl_loop_backwards::visit(net,v); - v(i, layer(net)); + // Call whatever version of the visitor the user provided. + call_if_valid(v, i, layer(net)); + call_if_valid(v, layer(net)); } }; @@ -3751,7 +3621,7 @@ namespace dlib visitor&& v ) { - v(next_net); + call_if_valid(v, next_net); vl_until_tag::visit(net,layer(net),v); } @@ -3766,7 +3636,7 @@ namespace dlib visitor&& v ) { - v(next_net); + call_if_valid(v, next_net); } template < @@ -3780,7 +3650,7 @@ namespace dlib visitor&& v ) { - v(next_net); + call_if_valid(v, next_net); } }; } @@ -3798,6 +3668,137 @@ namespace dlib impl::vl_until_tag<0,tag_id>::visit(net, net, v); } +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template < + typename visitor + > + class visitor_computational_layer + { + public: + explicit visitor_computational_layer(visitor& v) : v_(v) {} + + template + void operator()(size_t idx, add_layer& l) const + { + // Call whatever version of the visitor the user provided. + call_if_valid(v_, idx, l.layer_details()); + call_if_valid(v_, l.layer_details()); + } + private: + + visitor& v_; + }; + } + + template < + typename net_type, + typename visitor + > + void visit_computational_layers( + net_type& net, + visitor v + ) + { + visit_layers(net, impl::visitor_computational_layer(v)); + } + + template < + size_t begin, + size_t end, + typename net_type, + typename visitor + > + void visit_computational_layers_range( + net_type& net, + visitor v + ) + { + visit_layers_range(net, impl::visitor_computational_layer(v)); + } + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template < + typename visitor + > + class visit_layer_parameters + { + public: + explicit visit_layer_parameters(visitor& v) : v_(v) {} + + template + void operator()(layer& l) + { + // Call whatever version of the visitor the user provided. + const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_layer_params()) || + call_if_valid(v_, l.get_layer_params()); + DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameters()"); + ++computational_layer_idx; + } + private: + + size_t computational_layer_idx = 0; + visitor& v_; + }; + } + + template < + typename net_type, + typename visitor + > + void visit_layer_parameters( + net_type& net, + visitor v + ) + { + visit_computational_layers(net, impl::visit_layer_parameters(v)); + } + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + template < + typename visitor + > + class visit_layer_parameter_gradients + { + public: + explicit visit_layer_parameter_gradients(visitor& v) : v_(v) {} + + template + void operator()(add_layer& l) + { + // Call whatever version of the visitor the user provided. + const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_parameter_gradient()) || + call_if_valid(v_, l.get_parameter_gradient()); + DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameter_gradients()"); + ++computational_layer_idx; + } + private: + + size_t computational_layer_idx = 0; + visitor& v_; + }; + } + + template < + typename net_type, + typename visitor + > + void visit_layer_parameter_gradients( + net_type& net, + visitor v + ) + { + visit_layers(net, impl::visit_layer_parameter_gradients(v)); + } + // ---------------------------------------------------------------------------------------- } diff --git a/dlib/dnn/core_abstract.h b/dlib/dnn/core_abstract.h index 80394da7f..be4532caf 100644 --- a/dlib/dnn/core_abstract.h +++ b/dlib/dnn/core_abstract.h @@ -1676,6 +1676,8 @@ namespace dlib add_tag_layer. - v is a function object with a signature equivalent to: v(size_t idx, tensor& t) + or: + v(tensor& t) ensures - Loops over all the computational layers (i.e. layers with parameters, as opposed to loss, tag, or input layers) in net and passes their parameters to @@ -1709,6 +1711,8 @@ namespace dlib add_tag_layer. - v is a function object with a signature equivalent to: v(size_t idx, tensor& t) + or: + v(tensor& t) ensures - Loops over all the computational layers (i.e. layers with parameters, as opposed to loss, tag, or input layers) in net and passes their parameter @@ -1743,7 +1747,9 @@ namespace dlib add_tag_layer. - v is a function object with a signature equivalent to: v(size_t idx, any_net_type& t) - That is, it must take a size_t and then any of the network types such as + or: + v(any_net_type& t) + That is, it takes an optional size_t and then any of the network types such as add_layer, add_loss_layer, etc. ensures - Loops over all the layers in net and calls v() on them. To be specific, this @@ -1767,7 +1773,9 @@ namespace dlib add_tag_layer. - v is a function object with a signature equivalent to: v(size_t idx, any_net_type& t) - That is, it must take a size_t and then any of the network types such as + or: + v(any_net_type& t) + That is, it takes an optional size_t and then any of the network types such as add_layer, add_loss_layer, etc. ensures - Loops over all the layers in net and calls v() on them. The loop happens in @@ -1778,6 +1786,64 @@ namespace dlib v(i-1, layer(net)); !*/ +// ---------------------------------------------------------------------------------------- + + template < + typename net_type, + typename visitor + > + void visit_computational_layers( + net_type& net, + visitor v + ); + /*! + requires + - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or + add_tag_layer. + - v is a function object with a signature equivalent to: + v(size_t idx, any_computational_layer& t) + or: + v(any_computational_layer& t) + That is, it takes an optional size_t and then any of the computational layers. E.g. + one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_. + ensures + - Loops over all the computational layers in net and calls v() on them. To be specific, this + function essentially performs the following: + + for (size_t i = 0; i < net_type::num_layers; ++i) + if (layer(net) is an add_layer type, i.e. it adds a computational layer) + v(i, layer(net).layer_details()); + !*/ + + template < + size_t begin, + size_t end, + typename net_type, + typename visitor + > + void visit_computational_layers_range( + net_type& net, + visitor v + ); + /*! + requires + - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or + add_tag_layer. + - v is a function object with a signature equivalent to: + v(size_t idx, any_computational_layer& t) + or: + v(any_computational_layer& t) + That is, it takes an optional size_t and then any of the computational layers. E.g. + one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_. + ensures + - Loops over all the computational layers in the range [begin,end) in net and calls v() + on them. To be specific, this function essentially performs the following: + + for (size_t i = begin; i < end; ++i) + if (layer(net) is an add_layer type, i.e. it adds a computational layer) + v(i, layer(net).layer_details()); + !*/ + // ---------------------------------------------------------------------------------------- template < @@ -1796,13 +1862,14 @@ namespace dlib add_tag_layer. - v is a function object with a signature equivalent to: v(size_t idx, any_net_type& t) - That is, it must take a size_t and then any of the network types such as + or: + v(any_net_type& t) + That is, it takes an optional size_t and then any of the network types such as add_layer, add_loss_layer, etc. - begin <= end <= net_type::num_layers ensures - Loops over the layers in the range [begin,end) in net and calls v() on them. - The loop happens in the reverse order of visit_layers(). To be specific, - this function essentially performs the following: + To be specific, this function essentially performs the following: for (size_t i = begin; i < end; ++i) v(i, layer(net)); @@ -1824,7 +1891,9 @@ namespace dlib add_tag_layer. - v is a function object with a signature equivalent to: v(size_t idx, any_net_type& t) - That is, it must take a size_t and then any of the network types such as + or: + v(any_net_type& t) + That is, it takes an optional size_t and then any of the network types such as add_layer, add_loss_layer, etc. - begin <= end <= net_type::num_layers ensures diff --git a/dlib/dnn/trainer.h b/dlib/dnn/trainer.h index 3243ae347..1ab43e8c8 100644 --- a/dlib/dnn/trainer.h +++ b/dlib/dnn/trainer.h @@ -667,7 +667,7 @@ namespace dlib // periodically copy these tensors to all the other devices to make sure the // different GPUs don't go out of sync. std::vector reference_params; - visit_layer_parameters(devices[0]->net, [&](size_t, tensor& t) { reference_params.push_back(&t); }); + visit_layer_parameters(devices[0]->net, [&](tensor& t) { reference_params.push_back(&t); }); // If no external thread pools vector was passed, then create one that will // be automatically destructed as soon as the dnn_trainer object goes out of diff --git a/dlib/test/dnn.cpp b/dlib/test/dnn.cpp index b24e98ff2..22dfa9cf4 100644 --- a/dlib/test/dnn.cpp +++ b/dlib/test/dnn.cpp @@ -1995,14 +1995,14 @@ namespace pres> - >>>>>>>>>>>; + >>>>>>>>>>>>; net_type2 pnet; - DLIB_TEST_MSG(pnet.num_layers == 131, pnet.num_layers); - DLIB_TEST_MSG(pnet.num_computational_layers == 109, pnet.num_computational_layers); + DLIB_TEST_MSG(pnet.num_layers == 132, pnet.num_layers); + DLIB_TEST_MSG(pnet.num_computational_layers == 110, pnet.num_computational_layers); std::vector hit(pnet.num_computational_layers, false); size_t count = 0; @@ -2017,6 +2017,14 @@ namespace for (auto x : hit2) DLIB_TEST(x); DLIB_TEST(count == pnet.num_computational_layers); + + int num_relus = 0; + visit_computational_layers(pnet, [&num_relus](relu_&) { ++num_relus; }); + DLIB_TEST(num_relus == 10); + + DLIB_TEST(layer(pnet).layer_details().get_alpha() == 0.01f); + visit_computational_layers(pnet, [](leaky_relu_& l) { l = leaky_relu_(0.001f); }); + DLIB_TEST(layer(pnet).layer_details().get_alpha() == 0.001f); } float tensor_read_cpu(const tensor& t, long i, long k, long r, long c) diff --git a/examples/dnn_introduction3_ex.cpp b/examples/dnn_introduction3_ex.cpp index daf8147b6..6ba1b63a0 100644 --- a/examples/dnn_introduction3_ex.cpp +++ b/examples/dnn_introduction3_ex.cpp @@ -47,16 +47,10 @@ public: visitor_weight_decay_multiplier(double new_weight_decay_multiplier_) : new_weight_decay_multiplier(new_weight_decay_multiplier_) {} - template - void operator()(size_t , input_layer_type& ) const + template + void operator()(layer& l) const { - // ignore other layers - } - - template - void operator()(size_t , add_layer& l) const - { - set_weight_decay_multiplier(l.layer_details(), new_weight_decay_multiplier); + set_weight_decay_multiplier(l, new_weight_decay_multiplier); } private: @@ -98,7 +92,7 @@ int main() try // We can use the visit_layers function to modify the weight decay of the entire // network: - visit_layers(net, visitor_weight_decay_multiplier(0.001)); + visit_computational_layers(net, visitor_weight_decay_multiplier(0.001)); // We can also use predefined visitors to affect the learning rate of the whole // network. @@ -109,14 +103,14 @@ int main() try // visitor that is very similar to the one defined in this example. // Usually, we want to freeze the network, except for the top layers: - visit_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0)); + visit_computational_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0)); set_all_learning_rate_multipliers(net.subnet().subnet(), 0); // Alternatively, we can use the visit_layers_range to modify only a specific set of // layers: - visit_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1)); + visit_computational_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1)); - // Sometimes we might want to set the learning rate differently thoughout the network. + // Sometimes we might want to set the learning rate differently throughout the network. // Here we show how to use adjust the learning rate at the different ResNet50's // convolutional blocks: set_learning_rate_multipliers_range< 0, 2>(net, 1); @@ -143,7 +137,7 @@ int main() try // We can also print the number of parameters of the network: cout << "number of network parameters: " << count_parameters(net) << endl; - // From this point on, we can finetune the new network using this pretrained backbone + // From this point on, we can fine-tune the new network using this pretrained backbone // on another task, such as the one showed in dnn_metric_learning_on_images_ex.cpp. return EXIT_SUCCESS;