Added scan_image_movable_parts()

2024-11-01 10:14:53 +08:00 · 2012-07-15 20:16:40 -04:00 · 2012-07-15 20:16:40 -04:00 · 3a2b8ee4a1
commit 3a2b8ee4a1
parent ab57847911
2 changed files with 286 additions and 7 deletions
--- a/dlib/image_processing/scan_image.h
+++ b/dlib/image_processing/scan_image.h
@ -22,20 +22,20 @@ namespace dlib

        inline rectangle bounding_box_of_rects (
            const std::vector<std::pair<unsigned int, rectangle> >& rects,
-            const point& origin
+            const point& position
        )
        /*!
            ensures
                - returns the smallest rectangle that contains all the 
                  rectangles in rects.  That is, returns the rectangle that
-                  contains translate_rect(rects[i].second,origin) for all valid i.
+                  contains translate_rect(rects[i].second,position) for all valid i.
        !*/
        {
            rectangle rect;

            for (unsigned long i = 0; i < rects.size(); ++i)
            {
-                rect += translate_rect(rects[i].second,origin);
+                rect += translate_rect(rects[i].second,position);
            }

            return rect;
@ -72,7 +72,7 @@ namespace dlib
    double sum_of_rects_in_images (
        const image_array_type& images,
        const std::vector<std::pair<unsigned int, rectangle> >& rects,
-        const point& origin
+        const point& position
    )
    {
        DLIB_ASSERT(all_images_same_size(images),
@ -101,13 +101,89 @@ namespace dlib
        for (unsigned long i = 0; i < rects.size(); ++i)
        {
            const typename image_array_type::type& img = images[rects[i].first];
-            const rectangle rect = get_rect(img).intersect(translate_rect(rects[i].second,origin));
+            const rectangle rect = get_rect(img).intersect(translate_rect(rects[i].second,position));
            temp += sum(matrix_cast<ptype>(subm(array_to_matrix(img), rect)));
        }

        return static_cast<double>(temp);
    }

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_array_type
+        >
+    double sum_of_rects_in_images_movable_parts (
+        const image_array_type& images,
+        const rectangle& window,
+        const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
+        const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
+        const point& position
+    )
+    {
+        DLIB_ASSERT(all_images_same_size(images) && center(window) == point(0,0),
+            "\t double sum_of_rects_in_images_movable_parts()"
+            << "\n\t Invalid arguments given to this function."
+            << "\n\t all_images_same_size(images): " << all_images_same_size(images)
+            << "\n\t center(window): " << center(window)
+        );
+#ifdef ENABLE_ASSERTS
+        for (unsigned long i = 0; i < fixed_rects.size(); ++i)
+        {
+            DLIB_ASSERT(fixed_rects[i].first < images.size(),
+                "\t double sum_of_rects_in_images_movable_parts()"
+                << "\n\t fixed_rects["<<i<<"].first must refer to a valid image."
+                << "\n\t fixed_rects["<<i<<"].first: " << fixed_rects[i].first 
+                << "\n\t images.size(): " << images.size() 
+            );
+        }
+        for (unsigned long i = 0; i < movable_rects.size(); ++i)
+        {
+            DLIB_ASSERT(movable_rects[i].first < images.size(),
+                "\t double sum_of_rects_in_images_movable_parts()"
+                << "\n\t movable_rects["<<i<<"].first must refer to a valid image."
+                << "\n\t movable_rects["<<i<<"].first: " << movable_rects[i].first 
+                << "\n\t images.size(): " << images.size() 
+            );
+            DLIB_ASSERT(center(movable_rects[i].second) == point(0,0),
+                "\t double sum_of_rects_in_images_movable_parts()"
+                << "\n\t movable_rects["<<i<<"].second: " << movable_rects[i].second 
+            );
+        }
+#endif
+        typedef typename image_array_type::type::type pixel_type;
+        typedef typename promote<pixel_type>::type ptype;
+
+        ptype temp = 0;
+
+        // compute TOTAL_FIXED part
+        for (unsigned long i = 0; i < fixed_rects.size(); ++i)
+        {
+            const typename image_array_type::type& img = images[fixed_rects[i].first];
+            const rectangle rect = get_rect(img).intersect(translate_rect(fixed_rects[i].second,position));
+            temp += sum(matrix_cast<ptype>(subm(array_to_matrix(img), rect)));
+        }
+
+        if (images.size() > 0)
+        {
+            // compute TOTAL_MOVABLE part
+            array2d<ptype> tempimg(images[0].nr(), images[0].nc());
+            for (unsigned long i = 0; i < movable_rects.size(); ++i)
+            {
+                const typename image_array_type::type& img = images[movable_rects[i].first];
+
+                assign_all_pixels(tempimg, 0);
+                sum_filter(img, tempimg, movable_rects[i].second);
+
+                const rectangle rect = get_rect(tempimg).intersect(translate_rect(window,position));
+                if (rect.is_empty() == false)
+                    temp += std::max(0,max(matrix_cast<ptype>(subm(array_to_matrix(tempimg), rect))));
+            }
+        }
+
+        return static_cast<double>(temp);
+    }
+
 // ----------------------------------------------------------------------------------------

    template <
@ -188,6 +264,120 @@ namespace dlib
        }
    }

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_array_type
+        >
+    void scan_image_movable_parts (
+        std::vector<std::pair<double, point> >& dets,
+        const image_array_type& images,
+        const rectangle& window,
+        const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
+        const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
+        const double thresh,
+        const unsigned long max_dets
+    )
+    {
+        DLIB_ASSERT(images.size() > 0 && all_images_same_size(images) && 
+                    center(window) == point(0,0) && window.area() > 0,
+            "\t void scan_image_movable_parts()"
+            << "\n\t Invalid arguments given to this function."
+            << "\n\t all_images_same_size(images): " << all_images_same_size(images)
+            << "\n\t center(window): " << center(window)
+            << "\n\t window.area():  " << window.area() 
+            << "\n\t images.size():  " << images.size() 
+        );
+#ifdef ENABLE_ASSERTS
+        for (unsigned long i = 0; i < fixed_rects.size(); ++i)
+        {
+            DLIB_ASSERT(fixed_rects[i].first < images.size(),
+                "\t void scan_image_movable_parts()"
+                << "\n\t Invalid arguments given to this function."
+                << "\n\t fixed_rects["<<i<<"].first must refer to a valid image."
+                << "\n\t fixed_rects["<<i<<"].first: " << fixed_rects[i].first 
+                << "\n\t images.size(): " << images.size() 
+            );
+        }
+        for (unsigned long i = 0; i < movable_rects.size(); ++i)
+        {
+            DLIB_ASSERT(movable_rects[i].first < images.size(),
+                "\t void scan_image_movable_parts()"
+                << "\n\t Invalid arguments given to this function."
+                << "\n\t movable_rects["<<i<<"].first must refer to a valid image."
+                << "\n\t movable_rects["<<i<<"].first: " << movable_rects[i].first 
+                << "\n\t images.size(): " << images.size() 
+            );
+            DLIB_ASSERT(center(movable_rects[i].second) == point(0,0) &&
+                        movable_rects[i].second.area() > 0,
+                "\t void scan_image_movable_parts()"
+                << "\n\t Invalid arguments given to this function."
+                << "\n\t movable_rects["<<i<<"].second: " << movable_rects[i].second 
+                << "\n\t movable_rects["<<i<<"].second.area(): " << movable_rects[i].second.area()
+            );
+        }
+#endif
+
+
+        dets.clear();
+        if (max_dets == 0)
+            return;
+        if (movable_rects.size() == 0 && fixed_rects.size() == 0)
+            return;
+
+
+        typedef typename image_array_type::type::type pixel_type;
+        typedef typename promote<pixel_type>::type ptype;
+
+        array2d<ptype> accum(images[0].nr(), images[0].nc());
+        assign_all_pixels(accum, 0);
+
+        for (unsigned long i = 0; i < fixed_rects.size(); ++i)
+            sum_filter(images[fixed_rects[i].first], accum, fixed_rects[i].second);
+
+        array2d<ptype> temp(accum.nr(), accum.nc());
+        for (unsigned long i = 0; i < movable_rects.size(); ++i)
+        {
+            const rectangle rect = movable_rects[i].second;
+            assign_all_pixels(temp, 0);
+            sum_filter(images[movable_rects[i].first], temp, rect);
+            max_filter(temp, accum, window.width(), window.height(), 0);  
+        }
+
+        // TODO, make this block its own function and reuse it in scan_image().
+        unsigned long count = 0;
+        dlib::rand rnd;
+        for (long r = 0; r < accum.nr(); ++r)
+        {
+            for (long c = 0; c < accum.nc(); ++c)
+            {
+                const ptype cur_sum = accum[r][c];
+                if (cur_sum >= thresh)
+                {
+                    ++count;
+
+                    if (dets.size() < max_dets)
+                    {
+                        dets.push_back(std::make_pair(cur_sum, point(c,r)));
+                    }
+                    else 
+                    {
+                        // The idea here is to cause us to randomly sample possible detection
+                        // locations throughout the image rather than just stopping the detection
+                        // procedure once we hit the max_dets limit. So this method will result
+                        // in a random subsample of all the detections >= thresh being in dets
+                        // at the end of scan_image_movable_parts().
+                        const unsigned long random_index = rnd.get_random_32bit_number()%count;
+                        if (random_index < dets.size())
+                        {
+                            dets[random_index] = std::make_pair(cur_sum, point(c,r));
+                        }
+                    }
+                }
+            }
+        }
+    }
+
 // ----------------------------------------------------------------------------------------

 }
--- a/dlib/image_processing/scan_image_abstract.h
+++ b/dlib/image_processing/scan_image_abstract.h
@ -38,7 +38,7 @@ namespace dlib
    double sum_of_rects_in_images (
        const image_array_type& images,
        const std::vector<std::pair<unsigned int, rectangle> >& rects,
-        const point& origin
+        const point& position
    );
    /*!
        requires
@ -50,11 +50,52 @@ namespace dlib
              (i.e. all the rectangles must reference valid elements of images)
        ensures
            - returns the sum of the pixels inside the given rectangles.  To be precise, 
-              let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, origin) 
+              let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, position) 
              from the image images[rects[i].first].  Then this function returns the 
              sum of RECT_SUM[i] for all the valid values of i.
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_array_type
+        >
+    double sum_of_rects_in_images_movable_parts (
+        const image_array_type& images,
+        const rectangle& window,
+        const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
+        const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
+        const point& position
+    );
+    /*!
+        requires
+            - image_array_type             == an implementation of array/array_kernel_abstract.h
+            - image_array_type::type       == an implementation of array2d/array2d_kernel_abstract.h
+            - image_array_type::type::type == a scalar pixel type (e.g. int rather than rgb_pixel)
+            - all_images_same_size(images) == true
+            - center(window) == point(0,0)
+            - for all valid i: 
+                - fixed_rects[i].first < images.size()
+                  (i.e. all the rectangles must reference valid elements of images)
+            - for all valid i: 
+                - movable_rects[i].first < images.size()
+                  (i.e. all the rectangles must reference valid elements of images)
+                - center(movable_rects[i].second) == point(0,0) 
+        ensures
+            - returns the sum of the pixels inside fixed_rects as well as the sum of the pixels
+              inside movable_rects when these latter rectangles are placed at their highest
+              scoring locations inside the given window.  To be precise: 
+                - let RECT_SUM(r,x) = sum of pixels inside the rectangle translate_rect(r.second, x) 
+                  from the image images[r.first].
+                - let WIN_MAX(i) = The maximum value of RECT_SUM(movable_rects[i],X) when maximizing
+                  over all the X such that translate_rect(window,position).contains(X) == true.
+
+                - let TOTAL_FIXED   == sum over all elements R in fixed_rects of: RECT_SUM(R,position)
+                - let TOTAL_MOVABLE == sum over all valid i of: max(WIN_MAX(i), 0)
+
+              Then this function returns TOTAL_FIXED + TOTAL_MOVABLE.
+    !*/
+
 // ----------------------------------------------------------------------------------------

    template <
@ -90,6 +131,54 @@ namespace dlib
                  test.  
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_array_type
+        >
+    void scan_image_movable_parts (
+        std::vector<std::pair<double, point> >& dets,
+        const image_array_type& images,
+        const rectangle& window,
+        const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
+        const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
+        const double thresh,
+        const unsigned long max_dets
+    );
+    /*!
+        requires
+            - image_array_type             == an implementation of array/array_kernel_abstract.h
+            - image_array_type::type       == an implementation of array2d/array2d_kernel_abstract.h
+            - image_array_type::type::type == a scalar pixel type (e.g. int rather than rgb_pixel)
+            - images.size() > 0
+            - all_images_same_size(images) == true
+            - center(window) == point(0,0)
+            - window.area() > 0
+            - for all valid i: 
+                - fixed_rects[i].first < images.size()
+                  (i.e. all the rectangles must reference valid elements of images)
+            - for all valid i: 
+                - movable_rects[i].first < images.size()
+                  (i.e. all the rectangles must reference valid elements of images)
+                - center(movable_rects[i].second) == point(0,0) 
+                - movable_rects[i].second.area() > 0
+        ensures
+            - Scans the given window over the images and reports the locations with a score bigger
+              than thresh.
+            - Specifically, we have:
+                - #dets.size() <= max_dets
+                  (note that dets is cleared before new detections are added by scan_image_movable_parts())
+                - for all valid i:
+                    - #dets[i].first == sum_of_rects_in_images_movable_parts(images,
+                                                                             window,
+                                                                             fixed_rects,
+                                                                             movable_rects,
+                                                                             #dets[i].second) >= thresh
+            - if (there are more than max_dets locations that pass the above threshold test) then
+                - #dets == a random subsample of all the locations which passed the threshold
+                  test.  
+    !*/
+
 // ----------------------------------------------------------------------------------------

 }