Made the DNN layer visiting routines more convenient.

Now the user doesn't have to supply a visitor capable of visiting all layers, but instead just the ones they are interested in. Also added visit_computational_layers() and visit_computational_layers_range() since those capture a very common use case more concisely than visit_layers(). That is, users generally want to mess with the computational layers specifically as those are the stateful layers.
2024-11-01 10:14:53 +08:00 · 2020-09-05 18:33:04 -04:00 · 2020-09-05 18:33:04 -04:00 · afe19fcb8b
commit afe19fcb8b
parent 7dcc7b4ebc
5 changed files with 236 additions and 164 deletions
--- a/dlib/dnn/core.h
+++ b/dlib/dnn/core.h
@ -3471,140 +3471,6 @@ namespace dlib
        return impl_test_layer(l, 0.01);
    }

-// ----------------------------------------------------------------------------------------
-
-    namespace impl
-    {
-        template <size_t i, size_t num>
-        struct vlp_loop
-        {
-            template <typename T, typename U>
-            static typename std::enable_if<!is_add_layer<U>::value>::type invoke_functor(T&& , size_t& , U&& )
-            {
-                // intentionally left empty
-            }
-
-            template <typename T, typename U>
-            static typename std::enable_if<is_add_layer<U>::value>::type invoke_functor(T&& v , size_t& comp_i, U&& l )
-            {
-                v(comp_i, l.layer_details().get_layer_params());
-                ++comp_i;
-            }
-
-            template <
-                typename net_type,
-                typename visitor
-                >
-            static void visit(
-                size_t comp_i,
-                net_type& net,
-                visitor&& v
-            )
-            {
-                invoke_functor(v, comp_i, layer<i>(net));
-                vlp_loop<i+1, num>::visit(comp_i, net,v);
-            }
-        };
-
-        template <size_t num>
-        struct vlp_loop<num,num>
-        {
-            template <
-                typename net_type,
-                typename visitor
-                >
-            static void visit(
-                size_t,
-                net_type&,
-                visitor&& 
-            )
-            {
-                // Base case of recursion.  Don't do anything.
-            }
-        };
-
-    }
-
-    template <
-        typename net_type,
-        typename visitor
-        >
-    void visit_layer_parameters(
-        net_type& net,
-        visitor v
-    )
-    {
-        size_t comp_i = 0;
-        impl::vlp_loop<0, net_type::num_layers>::visit(comp_i, net, v);
-    }
-
-// ----------------------------------------------------------------------------------------
-
-    namespace impl
-    {
-        template <size_t i, size_t num>
-        struct vlpg_loop
-        {
-            template <typename T, typename U>
-            static typename std::enable_if<!is_add_layer<U>::value>::type invoke_functor(T&& , size_t& , U&& )
-            {
-                // intentionally left empty
-            }
-
-            template <typename T, typename U>
-            static typename std::enable_if<is_add_layer<U>::value>::type invoke_functor(T&& v , size_t& comp_i, U&& l )
-            {
-                v(comp_i, l.get_parameter_gradient());
-                ++comp_i;
-            }
-
-            template <
-                typename net_type,
-                typename visitor
-                >
-            static void visit(
-                size_t comp_i,
-                net_type& net,
-                visitor&& v
-            )
-            {
-                invoke_functor(v, comp_i, layer<i>(net));
-                vlpg_loop<i+1, num>::visit(comp_i, net,v);
-            }
-        };
-
-        template <size_t num>
-        struct vlpg_loop<num,num>
-        {
-            template <
-                typename net_type,
-                typename visitor
-                >
-            static void visit(
-                size_t,
-                net_type&,
-                visitor&& 
-            )
-            {
-                // Base case of recursion.  Don't do anything.
-            }
-        };
-
-    }
-
-    template <
-        typename net_type,
-        typename visitor
-        >
-    void visit_layer_parameter_gradients(
-        net_type& net,
-        visitor v
-    )
-    {
-        size_t comp_i = 0;
-        impl::vlpg_loop<0, net_type::num_layers>::visit(comp_i, net, v);
-    }
-
 // ----------------------------------------------------------------------------------------

    namespace impl
@ -3621,7 +3487,9 @@ namespace dlib
                visitor&& v
            )
            {
-                v(i, layer<i>(net));
+                // Call whatever version of the visitor the user provided.
+                call_if_valid(v, i, layer<i>(net));
+                call_if_valid(v, layer<i>(net));
                vl_loop<i+1, num>::visit(net,v);
            }
        };
@ -3655,7 +3523,9 @@ namespace dlib
            )
            {
                vl_loop_backwards<i+1, num>::visit(net,v);
-                v(i, layer<i>(net));
+                // Call whatever version of the visitor the user provided.
+                call_if_valid(v, i, layer<i>(net));
+                call_if_valid(v, layer<i>(net));
            }
        };

@ -3751,7 +3621,7 @@ namespace dlib
                visitor&& v
            )
            {
-                v(next_net);
+                call_if_valid(v, next_net);
                vl_until_tag<i+1,tag_id>::visit(net,layer<i+1>(net),v);
            }

@ -3766,7 +3636,7 @@ namespace dlib
                visitor&& v
            )
            {
-                v(next_net);
+                call_if_valid(v, next_net);
            }

            template <
@ -3780,7 +3650,7 @@ namespace dlib
                visitor&& v
            )
            {
-                v(next_net);
+                call_if_valid(v, next_net);
            }
        };
    }
@ -3798,6 +3668,137 @@ namespace dlib
        impl::vl_until_tag<0,tag_id>::visit(net, net, v);
    }

+// ----------------------------------------------------------------------------------------
+
+    namespace impl 
+    {
+        template <
+            typename visitor
+            >
+        class visitor_computational_layer
+        {
+        public:
+            explicit visitor_computational_layer(visitor& v) : v_(v) {}
+
+            template <typename T, typename U, typename E>
+            void operator()(size_t idx, add_layer<T,U,E>& l) const
+            {
+                // Call whatever version of the visitor the user provided.
+                call_if_valid(v_, idx, l.layer_details());
+                call_if_valid(v_, l.layer_details());
+            }
+        private:
+
+            visitor& v_;
+        };
+    }
+
+    template <
+        typename net_type,
+        typename visitor
+        >
+    void visit_computational_layers(
+        net_type& net,
+        visitor v
+    )
+    {
+        visit_layers(net, impl::visitor_computational_layer<visitor>(v));
+    }
+
+    template <
+        size_t begin,
+        size_t end,
+        typename net_type,
+        typename visitor
+        >
+    void visit_computational_layers_range(
+        net_type& net,
+        visitor v
+    )
+    {
+        visit_layers_range<begin,end>(net, impl::visitor_computational_layer<visitor>(v));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <
+            typename visitor
+            >
+        class visit_layer_parameters
+        {
+        public:
+            explicit visit_layer_parameters(visitor& v) : v_(v) {}
+
+            template <typename layer>
+            void operator()(layer& l)
+            {
+                // Call whatever version of the visitor the user provided.
+                const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_layer_params()) ||
+                    call_if_valid(v_, l.get_layer_params());
+                DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameters()");
+                ++computational_layer_idx;
+            }
+        private:
+
+            size_t computational_layer_idx = 0;
+            visitor& v_;
+        };
+    }
+
+    template <
+        typename net_type,
+        typename visitor
+        >
+    void visit_layer_parameters(
+        net_type& net,
+        visitor v
+    )
+    {
+        visit_computational_layers(net, impl::visit_layer_parameters<visitor>(v));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl 
+    {
+        template <
+            typename visitor
+            >
+        class visit_layer_parameter_gradients
+        {
+        public:
+            explicit visit_layer_parameter_gradients(visitor& v) : v_(v) {}
+
+            template <typename T, typename U, typename E>
+            void operator()(add_layer<T,U,E>& l) 
+            {
+                // Call whatever version of the visitor the user provided.
+                const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_parameter_gradient()) ||
+                    call_if_valid(v_, l.get_parameter_gradient());
+                DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameter_gradients()");
+                ++computational_layer_idx;
+            }
+        private:
+
+            size_t computational_layer_idx = 0;
+            visitor& v_;
+        };
+    }
+
+    template <
+        typename net_type,
+        typename visitor
+        >
+    void visit_layer_parameter_gradients(
+        net_type& net,
+        visitor v
+    )
+    {
+        visit_layers(net, impl::visit_layer_parameter_gradients<visitor>(v));
+    }
+
 // ----------------------------------------------------------------------------------------

 }
--- a/dlib/dnn/core_abstract.h
+++ b/dlib/dnn/core_abstract.h
@ -1676,6 +1676,8 @@ namespace dlib
              add_tag_layer.
            - v is a function object with a signature equivalent to: 
                v(size_t idx, tensor& t)
+              or:
+                v(tensor& t)
        ensures
            - Loops over all the computational layers (i.e. layers with parameters, as
              opposed to loss, tag, or input layers) in net and passes their parameters to
@ -1709,6 +1711,8 @@ namespace dlib
              add_tag_layer.
            - v is a function object with a signature equivalent to: 
                v(size_t idx, tensor& t)
+              or:
+                v(tensor& t)
        ensures
            - Loops over all the computational layers (i.e. layers with parameters, as
              opposed to loss, tag, or input layers) in net and passes their parameter
@ -1743,7 +1747,9 @@ namespace dlib
              add_tag_layer.
            - v is a function object with a signature equivalent to: 
                v(size_t idx, any_net_type& t)
-              That is, it must take a size_t and then any of the network types such as
+              or:
+                v(any_net_type& t)
+              That is, it takes an optional size_t and then any of the network types such as
              add_layer, add_loss_layer, etc.
        ensures
            - Loops over all the layers in net and calls v() on them.  To be specific, this
@ -1767,7 +1773,9 @@ namespace dlib
              add_tag_layer.
            - v is a function object with a signature equivalent to: 
                v(size_t idx, any_net_type& t)
-              That is, it must take a size_t and then any of the network types such as
+              or:
+                v(any_net_type& t)
+              That is, it takes an optional size_t and then any of the network types such as
              add_layer, add_loss_layer, etc.
        ensures
            - Loops over all the layers in net and calls v() on them.  The loop happens in
@ -1778,6 +1786,64 @@ namespace dlib
                    v(i-1, layer<i-1>(net));
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename net_type,
+        typename visitor
+        >
+    void visit_computational_layers(
+        net_type& net,
+        visitor v
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+              add_tag_layer.
+            - v is a function object with a signature equivalent to: 
+                v(size_t idx, any_computational_layer& t)
+              or:
+                v(any_computational_layer& t)
+              That is, it takes an optional size_t and then any of the computational layers.  E.g.
+              one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_.
+        ensures
+            - Loops over all the computational layers in net and calls v() on them.  To be specific, this
+              function essentially performs the following:
+
+                for (size_t i = 0; i < net_type::num_layers; ++i)
+                    if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer)
+                        v(i, layer<i>(net).layer_details());
+    !*/
+
+    template <
+        size_t begin,
+        size_t end,
+        typename net_type,
+        typename visitor
+        >
+    void visit_computational_layers_range(
+        net_type& net,
+        visitor v
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+              add_tag_layer.
+            - v is a function object with a signature equivalent to: 
+                v(size_t idx, any_computational_layer& t)
+              or:
+                v(any_computational_layer& t)
+              That is, it takes an optional size_t and then any of the computational layers.  E.g.
+              one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_.
+        ensures
+            - Loops over all the computational layers in the range [begin,end) in net and calls v()
+              on them.  To be specific, this function essentially performs the following:
+
+                for (size_t i = begin; i < end; ++i)
+                    if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer)
+                        v(i, layer<i>(net).layer_details());
+    !*/
+
 // ----------------------------------------------------------------------------------------

    template <
@ -1796,13 +1862,14 @@ namespace dlib
              add_tag_layer.
            - v is a function object with a signature equivalent to: 
                v(size_t idx, any_net_type& t)
-              That is, it must take a size_t and then any of the network types such as
+              or:
+                v(any_net_type& t)
+              That is, it takes an optional size_t and then any of the network types such as
              add_layer, add_loss_layer, etc.
            - begin <= end <= net_type::num_layers
        ensures
            - Loops over the layers in the range [begin,end) in net and calls v() on them.
-              The loop happens in the reverse order of visit_layers().  To be specific,
-              this function essentially performs the following:
+              To be specific, this function essentially performs the following:

                for (size_t i = begin; i < end; ++i)
                    v(i, layer<i>(net));
@ -1824,7 +1891,9 @@ namespace dlib
              add_tag_layer.
            - v is a function object with a signature equivalent to: 
                v(size_t idx, any_net_type& t)
-              That is, it must take a size_t and then any of the network types such as
+              or:
+                v(any_net_type& t)
+              That is, it takes an optional size_t and then any of the network types such as
              add_layer, add_loss_layer, etc.
            - begin <= end <= net_type::num_layers
        ensures
--- a/dlib/dnn/trainer.h
+++ b/dlib/dnn/trainer.h
@ -667,7 +667,7 @@ namespace dlib
            // periodically copy these tensors to all the other devices to make sure the
            // different GPUs don't go out of sync.
            std::vector<tensor*> reference_params;
-            visit_layer_parameters(devices[0]->net, [&](size_t, tensor& t) { reference_params.push_back(&t); });
+            visit_layer_parameters(devices[0]->net, [&](tensor& t) { reference_params.push_back(&t); });

            // If no external thread pools vector was passed, then create one that will
            // be automatically destructed as soon as the dnn_trainer object goes out of
--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@ -1995,14 +1995,14 @@ namespace
            pres<res<res<res_down< // 2 prelu layers here
            tag4<repeat<9,pres,    // 9 groups, each containing 2 prelu layers  
            res_down<
-            res<
+            leaky_relu<res<
            input<matrix<unsigned char>>
-            >>>>>>>>>>>;
+            >>>>>>>>>>>>;

        net_type2 pnet;

-        DLIB_TEST_MSG(pnet.num_layers == 131, pnet.num_layers);
-        DLIB_TEST_MSG(pnet.num_computational_layers == 109, pnet.num_computational_layers);
+        DLIB_TEST_MSG(pnet.num_layers == 132, pnet.num_layers);
+        DLIB_TEST_MSG(pnet.num_computational_layers == 110, pnet.num_computational_layers);

        std::vector<bool> hit(pnet.num_computational_layers, false);
        size_t count = 0;
@ -2017,6 +2017,14 @@ namespace
        for (auto x : hit2)
            DLIB_TEST(x);
        DLIB_TEST(count == pnet.num_computational_layers);
+
+        int num_relus = 0;
+        visit_computational_layers(pnet, [&num_relus](relu_&) { ++num_relus; });
+        DLIB_TEST(num_relus == 10);
+
+        DLIB_TEST(layer<leaky_relu>(pnet).layer_details().get_alpha() == 0.01f);
+        visit_computational_layers(pnet, [](leaky_relu_& l) { l = leaky_relu_(0.001f); });
+        DLIB_TEST(layer<leaky_relu>(pnet).layer_details().get_alpha() == 0.001f);
    }

    float tensor_read_cpu(const tensor& t, long i, long k, long r, long c)
--- a/examples/dnn_introduction3_ex.cpp
+++ b/examples/dnn_introduction3_ex.cpp
@ -47,16 +47,10 @@ public:
    visitor_weight_decay_multiplier(double new_weight_decay_multiplier_) :
        new_weight_decay_multiplier(new_weight_decay_multiplier_) {}

-    template<typename input_layer_type>
-    void operator()(size_t , input_layer_type& )  const
+    template <typename layer>
+    void operator()(layer& l) const
    {
-        // ignore other layers
-    }
-
-    template <typename T, typename U, typename E>
-    void operator()(size_t , add_layer<T,U,E>& l)  const
-    {
-        set_weight_decay_multiplier(l.layer_details(), new_weight_decay_multiplier);
+        set_weight_decay_multiplier(l, new_weight_decay_multiplier);
    }

 private:
@ -98,7 +92,7 @@ int main() try

    // We can use the visit_layers function to modify the weight decay of the entire
    // network:
-    visit_layers(net, visitor_weight_decay_multiplier(0.001));
+    visit_computational_layers(net, visitor_weight_decay_multiplier(0.001));

    // We can also use predefined visitors to affect the learning rate of the whole
    // network.
@ -109,14 +103,14 @@ int main() try
    // visitor that is very similar to the one defined in this example.

    // Usually, we want to freeze the network, except for the top layers:
-    visit_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0));
+    visit_computational_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0));
    set_all_learning_rate_multipliers(net.subnet().subnet(), 0);

    // Alternatively, we can use the visit_layers_range to modify only a specific set of
    // layers:
-    visit_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1));
+    visit_computational_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1));

-    // Sometimes we might want to set the learning rate differently thoughout the network.
+    // Sometimes we might want to set the learning rate differently throughout the network.
    // Here we show how to use adjust the learning rate at the different ResNet50's
    // convolutional blocks:
    set_learning_rate_multipliers_range<  0,   2>(net, 1);
@ -143,7 +137,7 @@ int main() try
    // We can also print the number of parameters of the network:
    cout << "number of network parameters: " << count_parameters(net) << endl;

-    // From this point on, we can finetune the new network using this pretrained backbone
+    // From this point on, we can fine-tune the new network using this pretrained backbone
    // on another task, such as the one showed in dnn_metric_learning_on_images_ex.cpp.

    return EXIT_SUCCESS;