Refactored the image pyramid code. Now there is just one templated object,

pyramid_down and you give it the downsampling amount as a template argument.
2024-11-01 10:14:53 +08:00 · 2013-10-03 21:36:51 -04:00 · 2013-10-03 21:36:51 -04:00 · e0a6e30581
commit e0a6e30581
parent 39ed906c39
7 changed files with 653 additions and 2262 deletions
--- a/dlib/image_transforms/image_pyramid.h
+++ b/dlib/image_transforms/image_pyramid.h
--- a/dlib/image_transforms/image_pyramid_abstract.h
+++ b/dlib/image_transforms/image_pyramid_abstract.h
@ -10,11 +10,18 @@
 namespace dlib
 {

+    template <
+        unsigned int N
+        >
    class pyramid_down : noncopyable
    {
        /*!
+            REQUIREMENTS ON N
+                N > 1
+
            WHAT THIS OBJECT REPRESENTS
-                This is a simple functor to help create image pyramids.
+                This is a simple functor to help create image pyramids.  In particular, it
+                downsamples images at a ratio of N to N-1.


                WARNING, when mapping rectangles from one layer of a pyramid
@ -43,18 +50,14 @@ namespace dlib
                - pixel_traits<typename in_image_type::type>::has_alpha == false
                - pixel_traits<typename out_image_type::type>::has_alpha == false
            ensures
-                - #down will contain an image that is roughly half the size of the original
-                  image.  To be specific, this function performs the following steps:
-                    - 1. Applies a 5x5 Gaussian filter to the original image to smooth it a little.
-                    - 2. Every other row and column is discarded to create an image half the size
-                         of the original.  This smaller image is stored in #down.
-                - if both input and output images contain RGB pixels then the downsampled image will
+                - #down will contain an image that is roughly (N-1)/N times the size of the
+                  original image.  
+                - If both input and output images contain RGB pixels then the downsampled image will
                  be in color.  Otherwise, the downsampling will be performed in a grayscale mode.
                - The location of a point P in original image will show up at point point_down(P)
                  in the #down image.  
-                - Note that some points on the border of the original image will correspond to 
-                  points outside the #down image.  This is because the 5x5 filter is not applied 
-                  at the borders.
+                - Note that some points on the border of the original image might correspond to 
+                  points outside the #down image.  
        !*/

    // -------------------------------
@ -151,61 +154,6 @@ namespace dlib

    };

-// ----------------------------------------------------------------------------------------
-
-    class pyramid_down_3_2 : noncopyable
-    {
-        /*!
-            WHAT THIS OBJECT REPRESENTS
-                This is a function object with an interface identical to pyramid_down (defined
-                at the top of this file) except that it downsamples images at a ratio of 3 to 2
-                instead of 2 to 1.
-        !*/
-    };
-
-
-// ----------------------------------------------------------------------------------------
-
-    class pyramid_down_4_3 : noncopyable
-    {
-        /*!
-            WHAT THIS OBJECT REPRESENTS
-                This is a function object with an interface identical to pyramid_down (defined
-                at the top of this file) except that it downsamples images at a ratio of 4 to 3
-                instead of 2 to 1.
-        !*/
-    };
-
-// ----------------------------------------------------------------------------------------
-
-    class pyramid_down_5_4 : noncopyable
-    {
-        /*!
-            WHAT THIS OBJECT REPRESENTS
-                This is a function object with an interface identical to pyramid_down (defined
-                at the top of this file) except that it downsamples images at a ratio of 5 to 4
-                instead of 2 to 1.
-        !*/
-    };
-
-// ----------------------------------------------------------------------------------------
-
-    template <
-        unsigned int N
-        >
-    class pyramid_down_generic : noncopyable
-    {
-        /*!
-            REQUIREMENTS ON N
-                N > 1
-
-            WHAT THIS OBJECT REPRESENTS
-                This is a function object with an interface identical to pyramid_down
-                (defined at the top of this file) except that it downsamples images at a
-                ratio of N to N-1 instead of 2 to 1.
-        !*/
-    };
-
 // ----------------------------------------------------------------------------------------

    class pyramid_disable : noncopyable
--- a/dlib/test/object_detector.cpp
+++ b/dlib/test/object_detector.cpp
@ -379,7 +379,7 @@ namespace
        make_simple_test_data(images, object_locations);

        typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
-        typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
+        typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
        image_scanner_type scanner;
        const rectangle object_box = compute_box_dimensions(1,35*35);
        scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
@ -463,7 +463,7 @@ namespace
        make_simple_test_data(images, object_locations);

        typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
-        typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
+        typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
        image_scanner_type scanner;
        const rectangle object_box = compute_box_dimensions(1,35*35);
        std::vector<rectangle> mboxes;
@ -512,7 +512,7 @@ namespace
        make_simple_test_data(images, object_locations);

        typedef hashed_feature_image<fine_hog_image<3,3,2,4,hog_signed_gradient> > feature_extractor_type;
-        typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
+        typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
        image_scanner_type scanner;
        const rectangle object_box = compute_box_dimensions(1,35*35);
        scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
@ -555,7 +555,7 @@ namespace
        make_simple_test_data(images, object_locations);

        typedef hashed_feature_image<poly_image<2> > feature_extractor_type;
-        typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
+        typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
        image_scanner_type scanner;
        const rectangle object_box = compute_box_dimensions(1,35*35);
        scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
@ -598,7 +598,7 @@ namespace
        make_simple_test_data(images, object_locations);

        typedef hashed_feature_image<poly_image<2> > feature_extractor_type;
-        typedef scan_image_pyramid<pyramid_down_3_2, feature_extractor_type> image_scanner_type;
+        typedef scan_image_pyramid<pyramid_down<3>, feature_extractor_type> image_scanner_type;
        image_scanner_type scanner;
        const rectangle object_box = compute_box_dimensions(1,35*35);
        std::vector<rectangle> mboxes;
@ -647,12 +647,12 @@ namespace
        make_simple_test_data(images, object_locations);

        typedef nearest_neighbor_feature_image<poly_image<5> > feature_extractor_type;
-        typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
+        typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
        image_scanner_type scanner;

        setup_grid_detection_templates(scanner, object_locations, 2, 2);
        feature_extractor_type nnfe;
-        pyramid_down pyr_down;
+        pyramid_down<2> pyr_down;
        poly_image<5> polyi;
        nnfe.set_basis(randomly_sample_image_features(images, pyr_down, polyi, 80));
        scanner.copy_configuration(nnfe);
@ -697,7 +697,7 @@ namespace
        image_scanner_type scanner;

        feature_extractor_type nnfe;
-        pyramid_down pyr_down;
+        pyramid_down<2> pyr_down;
        poly_image<5> polyi;
        nnfe.set_basis(randomly_sample_image_features(images, pyr_down, polyi, 80));
        scanner.copy_configuration(nnfe);
@ -737,7 +737,7 @@ namespace
        std::vector<std::vector<rectangle> > object_locations;
        make_simple_test_data(images, object_locations);

-        typedef scan_image_pyramid<pyramid_down_5_4, very_simple_feature_extractor> image_scanner_type;
+        typedef scan_image_pyramid<pyramid_down<5>, very_simple_feature_extractor> image_scanner_type;
        image_scanner_type scanner;
        const rectangle object_box = compute_box_dimensions(1,70*70);
        scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
@ -771,7 +771,7 @@ namespace

    class pyramid_down_funny : noncopyable
    {
-        pyramid_down pyr;
+        pyramid_down<2> pyr;
    public:

        template <typename T>
--- a/dlib/test/pyramid_down.cpp
+++ b/dlib/test/pyramid_down.cpp
@ -23,7 +23,7 @@ namespace
 void test_pyramid_down_grayscale()
 {
    array2d<unsigned char> img, down;
-    pyramid_down pyr;
+    pyramid_down<2> pyr;

    img.set_size(300,264);

@ -52,7 +52,7 @@ void test_pyramid_down_rgb()
 {
    array2d<rgb_pixel> img;
    array2d<bgr_pixel> down;
-    pyramid_down pyr;
+    pyramid_down<2> pyr;

    img.set_size(231, 351);

@ -325,59 +325,59 @@ void test_pyramid_down_small_sizes()
            test_pyramid_down_rgb();

            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down>();";
-            test_pyramid_down_small_sizes<pyramid_down>();
-            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_3_2>();";
-            test_pyramid_down_small_sizes<pyramid_down_3_2>();
-            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_4_3>();";
-            test_pyramid_down_small_sizes<pyramid_down_4_3>();
-            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_5_4>();";
-            test_pyramid_down_small_sizes<pyramid_down_5_4>();
+            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<2> >();";
+            test_pyramid_down_small_sizes<pyramid_down<2> >();
+            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<3> >();";
+            test_pyramid_down_small_sizes<pyramid_down<3> >();
+            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<4> >();";
+            test_pyramid_down_small_sizes<pyramid_down<4> >();
+            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<5> >();";
+            test_pyramid_down_small_sizes<pyramid_down<5> >();
            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_disable>();";
            test_pyramid_down_small_sizes<pyramid_disable>();
-            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_generic<3> >();";
-            test_pyramid_down_small_sizes<pyramid_down_generic<3> >();
+            dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<9> >();";
+            test_pyramid_down_small_sizes<pyramid_down<9> >();

            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down>();";
-            test_pyramid_down_rgb2<pyramid_down>();
+            dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<2> >();";
+            test_pyramid_down_rgb2<pyramid_down<2> >();

            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_3_2>();";
-            test_pyramid_down_rgb2<pyramid_down_3_2>();
+            dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<3> >();";
+            test_pyramid_down_rgb2<pyramid_down<3> >();

            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_4_3>();";
-            test_pyramid_down_rgb2<pyramid_down_4_3>();
+            dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<4> >();";
+            test_pyramid_down_rgb2<pyramid_down<4> >();

            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_5_4>();";
-            test_pyramid_down_rgb2<pyramid_down_5_4>();
+            dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<5> >();";
+            test_pyramid_down_rgb2<pyramid_down<5> >();

            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_generic<5> >();";
-            test_pyramid_down_rgb2<pyramid_down_generic<5> >();
+            dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<8> >();";
+            test_pyramid_down_rgb2<pyramid_down<8> >();


            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down>();";
-            test_pyramid_down_grayscale2<pyramid_down>();
+            dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<2> >();";
+            test_pyramid_down_grayscale2<pyramid_down<2> >();

            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_3_2>();";
-            test_pyramid_down_grayscale2<pyramid_down_3_2>();
+            dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<3> >();";
+            test_pyramid_down_grayscale2<pyramid_down<3> >();

            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_4_3>();";
-            test_pyramid_down_grayscale2<pyramid_down_4_3>();
+            dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<4> >();";
+            test_pyramid_down_grayscale2<pyramid_down<4> >();

            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_5_4>();";
-            test_pyramid_down_grayscale2<pyramid_down_5_4>();
+            dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<5> >();";
+            test_pyramid_down_grayscale2<pyramid_down<5> >();

            print_spinner();
-            dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_generic<6> >();";
-            test_pyramid_down_grayscale2<pyramid_down_generic<6> >();
+            dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<6> >();";
+            test_pyramid_down_grayscale2<pyramid_down<6> >();
        }
    } a;

--- a/examples/object_detector_advanced_ex.cpp
+++ b/examples/object_detector_advanced_ex.cpp
@ -196,7 +196,7 @@ int main()
        make_simple_test_data(images, object_locations);


-        typedef scan_image_pyramid<pyramid_down_5_4, very_simple_feature_extractor> image_scanner_type;
+        typedef scan_image_pyramid<pyramid_down<5>, very_simple_feature_extractor> image_scanner_type;
        image_scanner_type scanner;
        // Instead of using setup_grid_detection_templates() like in object_detector_ex.cpp, lets manually
        // setup the sliding window box.  We use a window with the same shape as the white boxes we
--- a/examples/object_detector_ex.cpp
+++ b/examples/object_detector_ex.cpp
@ -145,11 +145,12 @@ int main()
                      parameters yourself.  They are automatically populated by the 
                      structural_object_detection_trainer.

-                The sliding window classifiers described above are applied to every level of an image
-                pyramid.   So you need to tell scan_image_pyramid what kind of pyramid you want to
-                use.  In this case we are using pyramid_down which downsamples each pyramid layer by
-                half (dlib also contains other version of pyramid_down which result in finer grained
-                pyramids).
+                The sliding window classifiers described above are applied to every level of an
+                image pyramid.  So you need to tell scan_image_pyramid what kind of pyramid you want
+                to use.  In this case we are using pyramid_down<2> which downsamples each pyramid
+                layer by half (if you want to use a finer image pyramid then just change the
+                template argument to a larger value.  For example, using pyramid_down<5> would
+                downsample each layer by a ratio of 5 to 4).

                Finally, some of the feature extraction zones are allowed to move freely within the
                object box.  This means that when we are sliding the classifier over an image, some
@ -168,7 +169,7 @@ int main()
                feature extraction regions.
        */
        typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
-        typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
+        typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
        image_scanner_type scanner;

        // The hashed_feature_image in the scanner needs to be supplied with a hash function capable 
--- a/examples/train_object_detector.cpp
+++ b/examples/train_object_detector.cpp
@ -133,7 +133,7 @@ int main(int argc, char** argv)


        typedef hashed_feature_image<hog_image<4,4,1,9,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
-        typedef scan_image_pyramid<pyramid_down_3_2, feature_extractor_type> image_scanner_type;
+        typedef scan_image_pyramid<pyramid_down<3>, feature_extractor_type> image_scanner_type;

        if (parser.option("t") || parser.option("cross-validate"))
        {