Add scale_prev layer (#2171)

* Add scale_prev layer * remove comment and fix gradient * add test for scale_ and scale_prev_ layers
2024-11-01 10:14:53 +08:00 · 2020-09-12 20:55:24 +09:00 · 2020-09-12 20:55:24 +09:00 · 9d60949a3a
commit 9d60949a3a
parent 77e6255fdd
3 changed files with 249 additions and 0 deletions
--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
@ -2723,6 +2723,128 @@ namespace dlib
    using scale9_  = scale_<tag9>;
    using scale10_ = scale_<tag10>;

+// ----------------------------------------------------------------------------------------
+
+    template <
+        template<typename> class tag
+        >
+    class scale_prev_
+    {
+    public:
+        const static unsigned long id = tag_id<tag>::id;
+
+        scale_prev_()
+        {
+        }
+
+        template <typename SUBNET>
+        void setup (const SUBNET& /*sub*/)
+        {
+        }
+
+        template <typename SUBNET>
+        void forward(const SUBNET& sub, resizable_tensor& output)
+        {
+            auto&& src = sub.get_output();
+            auto&& scales = layer<tag>(sub).get_output();
+            DLIB_CASSERT(scales.num_samples() == src.num_samples() &&
+                         scales.k()           == src.k() &&
+                         scales.nr()          == 1 &&
+                         scales.nc()          == 1,
+                         "scales.k(): " << scales.k() <<
+                         "\nsrc.k(): " << src.k()
+                         );
+
+            output.copy_size(src);
+            tt::scale_channels(false, output, src, scales);
+        }
+
+        template <typename SUBNET>
+        void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
+        {
+            auto&& src = sub.get_output();
+            auto&& scales = layer<tag>(sub).get_output();
+            tt::scale_channels(true, sub.get_gradient_input(), gradient_input, scales);
+
+            if (reshape_src.num_samples() != src.num_samples())
+            {
+                reshape_scales = alias_tensor(src.num_samples()*src.k());
+                reshape_src = alias_tensor(src.num_samples()*src.k(),src.nr()*src.nc());
+            }
+
+            auto&& scales_grad = layer<tag>(sub).get_gradient_input();
+            auto sgrad = reshape_scales(scales_grad);
+            tt::dot_prods(true, sgrad, reshape_src(src), reshape_src(gradient_input));
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+        inline dpoint map_input_to_output (const dpoint& p) const { return p; }
+        inline dpoint map_output_to_input (const dpoint& p) const { return p; }
+
+        friend void serialize(const scale_prev_& item, std::ostream& out)
+        {
+            serialize("scale_prev_", out);
+            serialize(item.reshape_scales, out);
+            serialize(item.reshape_src, out);
+        }
+
+        friend void deserialize(scale_prev_& item, std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "scale_prev_")
+                throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::scale_prev_.");
+            deserialize(item.reshape_scales, in);
+            deserialize(item.reshape_src, in);
+        }
+
+        friend std::ostream& operator<<(std::ostream& out, const scale_prev_& /*item*/)
+        {
+            out << "scale_prev"<<id;
+            return out;
+        }
+
+        friend void to_xml(const scale_prev_& /*item*/, std::ostream& out)
+        {
+            out << "<scale_prev tag='"<<id<<"'/>\n";
+        }
+
+    private:
+        alias_tensor reshape_scales;
+        alias_tensor reshape_src;
+        resizable_tensor params;
+    };
+
+    template <
+        template<typename> class tag,
+        typename SUBNET
+        >
+    using scale_prev = add_layer<scale_prev_<tag>, SUBNET>;
+
+    template <typename SUBNET> using scale_prev1  = scale_prev<tag1, SUBNET>;
+    template <typename SUBNET> using scale_prev2  = scale_prev<tag2, SUBNET>;
+    template <typename SUBNET> using scale_prev3  = scale_prev<tag3, SUBNET>;
+    template <typename SUBNET> using scale_prev4  = scale_prev<tag4, SUBNET>;
+    template <typename SUBNET> using scale_prev5  = scale_prev<tag5, SUBNET>;
+    template <typename SUBNET> using scale_prev6  = scale_prev<tag6, SUBNET>;
+    template <typename SUBNET> using scale_prev7  = scale_prev<tag7, SUBNET>;
+    template <typename SUBNET> using scale_prev8  = scale_prev<tag8, SUBNET>;
+    template <typename SUBNET> using scale_prev9  = scale_prev<tag9, SUBNET>;
+    template <typename SUBNET> using scale_prev10 = scale_prev<tag10, SUBNET>;
+
+    using scale_prev1_  = scale_prev_<tag1>;
+    using scale_prev2_  = scale_prev_<tag2>;
+    using scale_prev3_  = scale_prev_<tag3>;
+    using scale_prev4_  = scale_prev_<tag4>;
+    using scale_prev5_  = scale_prev_<tag5>;
+    using scale_prev6_  = scale_prev_<tag6>;
+    using scale_prev7_  = scale_prev_<tag7>;
+    using scale_prev8_  = scale_prev_<tag8>;
+    using scale_prev9_  = scale_prev_<tag9>;
+    using scale_prev10_ = scale_prev_<tag10>;
+
 // ----------------------------------------------------------------------------------------

    class relu_
--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@ -2656,6 +2656,80 @@ namespace dlib

 // ----------------------------------------------------------------------------------------

+    template <
+        template<typename> class tag
+        >
+    class scale_prev_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of the EXAMPLE_COMPUTATIONAL_LAYER_ interface
+                defined above.  This layer scales the output channels of the tagged layer
+                by multiplying it with the output of the previous layer.  It is excatly the
+                same as the scale_ layer, but with the inputs swapped, which is useful since
+                it allows mapping between inputs and outputs of this layer.  To be specific:
+                    - Let INPUT == sub.get_output()
+                    - Let SCALES == layer<tag>(sub).get_output()
+                    - This layer takes INPUT and SCALES as input.
+                    - The output of this layer has the same dimensions as INPUT.
+                    - This layer requires:
+                        - SCALES.num_samples() == INPUT.num_samples()
+                        - SCALES.k()  == INPUT.k()
+                        - SCALES.nr() == 1
+                        - SCALES.nc() == 1
+                    - The output tensor is produced by pointwise multiplying SCALES with
+                      INPUT at each spatial location.  Therefore, if OUT is the output of
+                      this layer then we would have:
+                        OUT(n,k,r,c) == INPUT(n,k,r,c)*SCALES(n,k)
+        !*/
+
+    public:
+        scale_prev_(
+        );
+
+        template <typename SUBNET> void setup (const SUBNET& sub);
+        template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
+        template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
+        dpoint map_input_to_output(dpoint p) const;
+        dpoint map_output_to_input(dpoint p) const;
+        const tensor& get_layer_params() const;
+        tensor& get_layer_params();
+        /*!
+            These functions are implemented as described in the EXAMPLE_COMPUTATIONAL_LAYER_ interface.
+        !*/
+    };
+
+
+    template <
+        template<typename> class tag,
+        typename SUBNET
+        >
+    using scale_prev = add_layer<scale_prev_<tag>, SUBNET>;
+
+    // Here we add some convenient aliases for using scale_prev_ with the tag layers.
+    template <typename SUBNET> using scale_prev1  = scale_prev<tag1, SUBNET>;
+    template <typename SUBNET> using scale_prev2  = scale_prev<tag2, SUBNET>;
+    template <typename SUBNET> using scale_prev3  = scale_prev<tag3, SUBNET>;
+    template <typename SUBNET> using scale_prev4  = scale_prev<tag4, SUBNET>;
+    template <typename SUBNET> using scale_prev5  = scale_prev<tag5, SUBNET>;
+    template <typename SUBNET> using scale_prev6  = scale_prev<tag6, SUBNET>;
+    template <typename SUBNET> using scale_prev7  = scale_prev<tag7, SUBNET>;
+    template <typename SUBNET> using scale_prev8  = scale_prev<tag8, SUBNET>;
+    template <typename SUBNET> using scale_prev9  = scale_prev<tag9, SUBNET>;
+    template <typename SUBNET> using scale_prev10 = scale_prev<tag10, SUBNET>;
+    using scale_prev1_  = scale_prev_<tag1>;
+    using scale_prev2_  = scale_prev_<tag2>;
+    using scale_prev3_  = scale_prev_<tag3>;
+    using scale_prev4_  = scale_prev_<tag4>;
+    using scale_prev5_  = scale_prev_<tag5>;
+    using scale_prev6_  = scale_prev_<tag6>;
+    using scale_prev7_  = scale_prev_<tag7>;
+    using scale_prev8_  = scale_prev_<tag8>;
+    using scale_prev9_  = scale_prev_<tag9>;
+    using scale_prev10_ = scale_prev_<tag10>;
+
+    // ----------------------------------------------------------------------------------------
+
    template<
        template<typename> class... TAG_TYPES
        >
--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@ -3706,6 +3706,58 @@ namespace

    }

+    void test_layers_scale_and_scale_prev()
+    {
+        print_spinner();
+        using net_type1 = scale1<con<3,1,1,1,1,avg_pool_everything<tag1<input_rgb_image>>>>;
+        using net_type2 = scale_prev2<skip1<tag2<con<3,1,1,1,1,avg_pool_everything<tag1<input_rgb_image>>>>>>;
+
+        dlib::tt::tensor_rand rnd;
+        dlib::resizable_tensor x(1, 3, 64, 64);
+        rnd.fill_gaussian(x);
+        net_type1 net1;
+        net_type2 net2;
+        net1.forward(x);
+        net2.forward(x);
+
+        // make sure both convolutional layers have the same weights
+        layer<3>(net2).layer_details() = layer<1>(net1).layer_details();
+        const auto& params1 = layer<1>(net1).layer_details().get_layer_params();
+        const auto& params2 = layer<3>(net2).layer_details().get_layer_params();
+        DLIB_CASSERT(params1.size() == params2.size());
+        for (size_t i = 0; i < params1.size(); ++i)
+        {
+            DLIB_CASSERT(*(params1.begin() + i) == *(params2.begin() + i));
+        }
+        net2.forward(x);
+
+        // make sure both outputs are the same
+        const auto& out1 = net1.get_output();
+        const auto& out2 = net2.get_output();
+        DLIB_TEST(out1.size() == out2.size());
+        for (size_t i = 0; i < out1.size(); ++i)
+        {
+            DLIB_TEST(*(out1.begin() + i) == *(out2.begin() + i));
+        }
+
+        // make sure gradients are the same (within some precision)
+        const double epsilon = 1e-4;
+        dlib::resizable_tensor gradient(out1);
+        rnd.fill_gaussian(gradient);
+
+        net1.back_propagate_error(x, gradient);
+        const auto& grad1 = layer<1>(net1).get_parameter_gradient();
+
+        net2.back_propagate_error(x, gradient);
+        const auto& grad2 = layer<3>(net2).get_parameter_gradient();
+
+        DLIB_TEST(grad1.size() == grad2.size());
+        for (size_t i = 0; i < grad1.size(); ++i)
+        {
+            DLIB_TEST(::std::abs(*(grad1.begin() + i) - *(grad2.begin() + i)) < epsilon);
+        }
+    }
+
 // ----------------------------------------------------------------------------------------
    
    // This test really just checks if the mmod loss goes negative when a whole lot of overlapping
@ -3887,6 +3939,7 @@ namespace
            test_loss_dot();
            test_loss_multimulticlass_log();
            test_loss_mmod();
+            test_layers_scale_and_scale_prev();
        }

        void perform_test()