Added scan_image_custom

2024-11-01 10:14:53 +08:00 · 2013-08-17 22:02:13 -04:00 · 2013-08-17 22:02:13 -04:00 · 09b4ec67eb
commit 09b4ec67eb
parent cf7e70769a
3 changed files with 792 additions and 0 deletions
--- a/dlib/image_processing.h
+++ b/dlib/image_processing.h
@ -11,6 +11,7 @@
 #include "image_processing/scan_image_pyramid_tools.h"
 #include "image_processing/setup_hashed_features.h"
 #include "image_processing/scan_image_boxes.h"
+#include "image_processing/scan_image_custom.h"
 #include "image_processing/remove_unobtainable_rectangles.h"

 #endif // DLIB_IMAGE_PROCESSInG_H___
--- a/dlib/image_processing/scan_image_custom.h
+++ b/dlib/image_processing/scan_image_custom.h
@ -0,0 +1,401 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SCAN_IMAGE_CuSTOM_H__
+#define DLIB_SCAN_IMAGE_CuSTOM_H__
+
+#include "scan_image_custom_abstract.h"
+#include "../matrix.h"
+#include "../geometry.h"
+#include <vector>
+#include "../image_processing/full_object_detection.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    class scan_image_custom : noncopyable
+    {
+
+    public:
+
+        typedef matrix<double,0,1> feature_vector_type;
+        typedef Feature_extractor_type feature_extractor_type;
+
+        scan_image_custom (
+        );  
+
+        template <
+            typename image_type
+            >
+        void load (
+            const image_type& img
+        );
+
+        inline bool is_loaded_with_image (
+        ) const;
+
+        inline void copy_configuration(
+            const feature_extractor_type& fe
+        );
+
+        const Feature_extractor_type& get_feature_extractor (
+        ) const { return feats; }
+
+        inline void copy_configuration (
+            const scan_image_custom& item
+        );
+
+        inline long get_num_dimensions (
+        ) const;
+
+        void detect (
+            const feature_vector_type& w,
+            std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh
+        ) const;
+
+        void get_feature_vector (
+            const full_object_detection& obj,
+            feature_vector_type& psi
+        ) const;
+
+        full_object_detection get_full_object_detection (
+            const rectangle& rect,
+            const feature_vector_type& w
+        ) const;
+
+        const rectangle get_best_matching_rect (
+            const rectangle& rect
+        ) const;
+
+        inline unsigned long get_num_detection_templates (
+        ) const { return 1; }
+
+        inline unsigned long get_num_movable_components_per_detection_template (
+        ) const { return 0; }
+
+        template <typename T>
+        friend void serialize (
+            const scan_image_custom<T>& item,
+            std::ostream& out
+        );
+
+        template <typename T>
+        friend void deserialize (
+            scan_image_custom<T>& item,
+            std::istream& in 
+        );
+
+    private:
+        static bool compare_pair_rect (
+            const std::pair<double, rectangle>& a,
+            const std::pair<double, rectangle>& b
+        )
+        {
+            return a.first < b.first;
+        }
+
+
+        DLIB_MAKE_HAS_MEMBER_FUNCTION_TEST(
+            has_compute_object_score,
+            double, 
+            compute_object_score,
+            ( const matrix<double,0,1>& w, const rectangle& obj) const
+        );
+
+        template <typename fe_type>
+        typename enable_if<has_compute_object_score<fe_type> >::type compute_all_rect_scores (
+            const fe_type& feats,
+            const feature_vector_type& w,
+            std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh
+        ) const
+        {
+            for (unsigned long i = 0; i < search_rects.size(); ++i)
+            {
+                const double score = feats.compute_object_score(w, search_rects[i]);
+                if (score >= thresh)
+                {
+                    dets.push_back(std::make_pair(score, search_rects[i]));
+                }
+            }
+        }
+
+        template <typename fe_type>
+        typename disable_if<has_compute_object_score<fe_type> >::type compute_all_rect_scores (
+            const fe_type& feats,
+            const feature_vector_type& w,
+            std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh
+        ) const
+        {
+            matrix<double,0,1> psi(w.size());
+            psi = 0;
+            double prev_dot = 0;
+            for (unsigned long i = 0; i < search_rects.size(); ++i)
+            {
+                // Reset these back to zero every so often to avoid the accumulation of
+                // rounding error.  Note that the only reason we do this loop in this
+                // complex way is to avoid needing to zero the psi vector every iteration.
+                if ((i%500) == 499)
+                {
+                    psi = 0;
+                    prev_dot = 0;
+                }
+
+                feats.get_feature_vector(search_rects[i], psi);
+                const double cur_dot = dot(psi, w);
+                const double score = cur_dot - prev_dot;
+                if (score >= thresh)
+                {
+                    dets.push_back(std::make_pair(score, search_rects[i]));
+                }
+                prev_dot = cur_dot;
+            }
+        }
+
+
+        feature_extractor_type feats;
+        std::vector<rectangle> search_rects;
+        bool loaded_with_image;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void serialize (
+        const scan_image_custom<T>& item,
+        std::ostream& out
+    )
+    {
+        int version = 1;
+        serialize(version, out);
+        serialize(item.feats, out);
+        serialize(item.search_rects, out);
+        serialize(item.loaded_with_image, out);
+        serialize(item.get_num_dimensions(), out);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void deserialize (
+        scan_image_custom<T>& item,
+        std::istream& in 
+    )
+    {
+        int version = 0;
+        deserialize(version, in);
+        if (version != 1)
+            throw serialization_error("Unsupported version found when deserializing a scan_image_custom object.");
+
+        deserialize(item.feats, in);
+        deserialize(item.search_rects, in);
+        deserialize(item.loaded_with_image, in);
+
+        // When developing some feature extractor, it's easy to accidentally change its
+        // number of dimensions and then try to deserialize data from an older version of
+        // your extractor into the current code.  This check is here to catch that kind of
+        // user error.
+        long dims;
+        deserialize(dims, in);
+        if (item.get_num_dimensions() != dims)
+            throw serialization_error("Number of dimensions in serialized scan_image_custom doesn't match the expected number.");
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+//                         scan_image_custom member functions
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    scan_image_custom<Feature_extractor_type>::
+    scan_image_custom (
+    ) :
+        loaded_with_image(false)
+    {
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    template <
+        typename image_type
+        >
+    void scan_image_custom<Feature_extractor_type>::
+    load (
+        const image_type& img
+    )
+    {
+        feats.load(img, search_rects);
+        loaded_with_image = true;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    bool scan_image_custom<Feature_extractor_type>::
+    is_loaded_with_image (
+    ) const
+    {
+        return loaded_with_image;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    void scan_image_custom<Feature_extractor_type>::
+    copy_configuration(
+        const feature_extractor_type& fe
+    )
+    {
+        feats.copy_configuration(fe);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    void scan_image_custom<Feature_extractor_type>::
+    copy_configuration (
+        const scan_image_custom& item
+    )
+    {
+        feats.copy_configuration(item.feats);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    long scan_image_custom<Feature_extractor_type>::
+    get_num_dimensions (
+    ) const
+    {
+        return feats.get_num_dimensions();
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    void scan_image_custom<Feature_extractor_type>::
+    detect (
+        const feature_vector_type& w,
+        std::vector<std::pair<double, rectangle> >& dets,
+        const double thresh
+    ) const
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_loaded_with_image() &&
+                    w.size() >= get_num_dimensions(), 
+            "\t void scan_image_custom::detect()"
+            << "\n\t Invalid inputs were given to this function "
+            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
+            << "\n\t w.size():               " << w.size()
+            << "\n\t get_num_dimensions():   " << get_num_dimensions()
+            << "\n\t this: " << this
+            );
+        
+        dets.clear();
+        compute_all_rect_scores(feats, w,dets,thresh);
+        std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    const rectangle scan_image_custom<Feature_extractor_type>::
+    get_best_matching_rect (
+        const rectangle& rect
+    ) const
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_loaded_with_image(),
+            "\t const rectangle scan_image_custom::get_best_matching_rect()"
+            << "\n\t Invalid inputs were given to this function "
+            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
+            << "\n\t this: " << this
+            );
+
+
+        double best_score = -1;
+        rectangle best_rect;
+        for (unsigned long i = 0; i < search_rects.size(); ++i)
+        {
+            const double score = (rect.intersect(search_rects[i])).area()/(double)(rect+search_rects[i]).area();
+            if (score > best_score)
+            {
+                best_score = score;
+                best_rect = search_rects[i];
+            }
+        }
+        return best_rect;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    full_object_detection scan_image_custom<Feature_extractor_type>::
+    get_full_object_detection (
+        const rectangle& rect,
+        const feature_vector_type& /*w*/
+    ) const
+    {
+        return full_object_detection(rect);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    void scan_image_custom<Feature_extractor_type>::
+    get_feature_vector (
+        const full_object_detection& obj,
+        feature_vector_type& psi
+    ) const
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_loaded_with_image() &&
+                    psi.size() >= get_num_dimensions() &&
+                    obj.num_parts() == 0,
+            "\t void scan_image_custom::get_feature_vector()"
+            << "\n\t Invalid inputs were given to this function "
+            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
+            << "\n\t psi.size():             " << psi.size()
+            << "\n\t get_num_dimensions():   " << get_num_dimensions()
+            << "\n\t obj.num_parts():                            " << obj.num_parts()
+            << "\n\t this: " << this
+            );
+
+
+        feats.get_feature_vector(get_best_matching_rect(obj.get_rect()), psi);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SCAN_IMAGE_CuSTOM_H__
+
--- a/dlib/image_processing/scan_image_custom_abstract.h
+++ b/dlib/image_processing/scan_image_custom_abstract.h
@ -0,0 +1,390 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_H__
+#ifdef DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_H__
+
+#include <vector>
+#include "../matrix.h"
+#include "../geometry.h"
+#include "../image_processing/full_object_detection_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class example_feature_extractor
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object defines the interface a feature extractor must implement if it
+                is to be used with the scan_image_custom object defined at the bottom of
+                this file.  
+
+                In this case, the purpose of a feature extractor is to associated a
+                complete feature vector with each rectangle in an image.  In particular,
+                each rectangle is scored by taking the dot product between this feature
+                vector and a weight vector.  If this score is greater than a threshold then
+                the rectangle is output as a detection.
+        !*/
+
+    public:
+
+        template <
+            typename image_type
+            >
+        void load (
+            const image_type& image,
+            std::vector<rectangle>& candidate_objects
+        );
+        /*!
+            ensures
+                - Loads the given image into this feature extractor.  This means that
+                  subsequent calls to get_feature_vector() will return the feature vector
+                  corresponding to locations in the image given to load().
+                - #candidate_objects == a set of bounding boxes in the given image that
+                  might contain objects of interest.  These are the locations that will be
+                  checked for the presents of objects when this feature extractor is used
+                  with the scan_image_custom object.
+
+        !*/
+
+        void copy_configuration (
+            const feature_extractor& item
+        );
+        /*!
+            ensures
+                - Copies all the state information of item into *this, except for state
+                  information populated by load().  More precisely, given two
+                  feature extractor objects S1 and S2, the following sequence of
+                  instructions should always result in both of them having the exact same
+                  state:
+                    S2.copy_configuration(S1);
+                    S1.load(img, temp);
+                    S2.load(img, temp);
+        !*/
+
+        unsigned long get_num_dimensions (
+        ) const;
+        /*!
+            ensures
+                - returns the dimensionality of the feature vectors output by this object.
+        !*/
+
+        void get_feature_vector (
+            const rectangle& obj,
+            matrix<double,0,1>& psi
+        ) const;
+        /*!
+            requires
+                - psi.size() >= get_num_dimensions()
+                  (i.e. psi must have preallocated its memory before this function is called)
+            ensures
+                - This function computes the feature vector associated with the given rectangle
+                  in obj.  This rectangle is interpreted as a bounding box within the last image
+                  given to this->load() and a feature vector describing that bounding box is 
+                  output into psi.
+                - The feature vector is added into psi.  That is, it does not overwrite the
+                  previous contents of psi, but instead, it adds the vector to psi.
+                - The dimensionality of the vector added to psi is get_num_dimensions().  This
+                  means that elements of psi after psi(get_num_dimensions()-1) are not modified.
+                - #psi.size() == psi.size()
+                  (i.e. this function does not change the size of the psi vector)
+        !*/
+
+        double compute_object_score (
+            const matrix<double,0,1>& w,
+            const rectangle& obj
+        ) const;
+        /*!
+            requires
+                - w.size() >= get_num_dimensions()
+            ensures
+                - This function returns the dot product between the feature vector for
+                  object box obj and the given w vector.  That is, this function computes
+                  the same number as the following code snippet:
+                     matrix<double,0,1> psi(w.size());
+                     psi = 0;
+                     get_feature_vector(obj, psi);
+                     return dot(psi, w);
+                  The point of the compute_object_score() routine is to compute this dot
+                  product in a much more efficient way than directly calling
+                  get_feature_vector() and dot().  Therefore, compute_object_score() is an
+                  optional function.  If you can't think of a faster way to compute these
+                  scores then do not implement compute_object_score() and the
+                  scan_image_custom object will simply compute these scores for you.
+                  However, it is often the case that there is something clever you can do
+                  to make this computation faster.  If that is the case, then you can
+                  provide an implementation of this function with your feature extractor
+                  and then scan_image_custom will use it instead of using the default
+                  calculation method shown in the above code snippet.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+    
+    void serialize( 
+        const feature_extractor& item, 
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+    void deserialize( 
+        feature_extractor& item, 
+        std::istream& in
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename Feature_extractor_type
+        >
+    class scan_image_custom : noncopyable
+    {
+        /*!
+            REQUIREMENTS ON Feature_extractor_type
+                - must be an object with an interface compatible with the
+                  example_feature_extractor defined at the top of this file.
+
+            INITIAL VALUE
+                - is_loaded_with_image() == false
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for running a classifier over an image with the goal
+                of localizing each object present.  The localization is in the form of the
+                bounding box around each object of interest.  
+
+                Unlike the scan_image_pyramid and scan_image_boxes objects, this image
+                scanner delegates all the work of constructing the object feature vector to
+                its Feature_extractor_type template argument.  That is, scan_image_custom
+                simply asks the supplied feature extractor what boxes in the image we
+                should investigate and then asks the feature extractor for the complete
+                feature vector for each box.  That is, scan_image_custom does not apply any
+                kind of pyramiding or other higher level processing to the features coming
+                out of the feature extractor.  That means that when you use
+                scan_image_custom it is completely up to you to define the feature vector
+                used with each image box.
+
+            THREAD SAFETY
+                Concurrent access to an instance of this object is not safe and should be
+                protected by a mutex lock except for the case where you are copying the
+                configuration (via copy_configuration()) of a scan_image_custom object to
+                many other threads.  In this case, it is safe to copy the configuration of
+                a shared object so long as no other operations are performed on it.
+        !*/
+
+    public:
+
+        typedef matrix<double,0,1> feature_vector_type;
+        typedef Feature_extractor_type feature_extractor_type;
+
+        scan_image_custom (
+        );  
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        template <
+            typename image_type
+            >
+        void load (
+            const image_type& img
+        );
+        /*!
+            requires
+                - image_type must be a type with the following properties:
+                    - image_type objects can be loaded into Feature_extractor_type
+                      objects via Feature_extractor_type::load().
+            ensures
+                - #is_loaded_with_image() == true
+                - Calls get_feature_extractor().load() on the given image.  That is, we
+                  will have loaded the image into the feature extractor in this
+                  scan_image_custom object.  We will also have stored the candidate
+                  object locations generated by the feature extractor and will scan
+                  over them when this->detect() is called.
+                - This object is ready to run a classifier over img to detect object
+                  locations.  Call detect() to do this.
+        !*/
+
+        bool is_loaded_with_image (
+        ) const;
+        /*!
+            ensures
+                - returns true if this object has been loaded with an image to process and
+                  false otherwise.
+        !*/
+
+        const feature_extractor_type& get_feature_extractor (
+        ) const; 
+        /*!
+            ensures
+                - returns a const reference to the feature_extractor_type object used 
+                  internally for local feature extraction.  
+        !*/
+
+        void copy_configuration(
+            const feature_extractor_type& fe
+        );
+        /*!
+            ensures
+                - This function performs the equivalent of
+                  get_feature_extractor().copy_configuration(fe) (i.e. this function allows
+                  you to configure the parameters of the underlying feature extractor used
+                  by a scan_image_custom object)
+        !*/
+
+        void copy_configuration (
+            const scan_image_custom& item
+        );
+        /*!
+            ensures
+                - Copies all the state information of item into *this, except for state
+                  information populated by load().  More precisely, given two
+                  scan_image_custom objects S1 and S2, the following sequence of
+                  instructions should always result in both of them having the exact same
+                  state:
+                    S2.copy_configuration(S1);
+                    S1.load(img);
+                    S2.load(img);
+        !*/
+
+        long get_num_dimensions (
+        ) const;
+        /*!
+            ensures
+                - returns the number of dimensions in the feature vector for a candidate
+                  object location.  That is, this function returns get_feature_extractor().get_num_dimensions().
+        !*/
+
+        void detect (
+            const feature_vector_type& w,
+            std::vector<std::pair<double, rectangle> >& dets,
+            const double thresh
+        ) const;
+        /*!
+            requires
+                - w.size() >= get_num_dimensions()
+                - is_loaded_with_image() == true
+            ensures
+                - Scans over all the candidate object locations produced by the feature
+                  extractor during image loading and stores all detections into #dets.
+                - for all valid i:
+                    - #dets[i].second == The candidate object location which produced this
+                      detection.  This rectangle gives the location of the detection.  
+                    - #dets[i].first == The score for this detection.  This value is equal
+                      to dot(w, feature vector for this candidate object location).
+                    - #dets[i].first >= thresh
+                - #dets will be sorted in descending order. 
+                  (i.e.  #dets[i].first >= #dets[j].first for all i, and j>i)
+                - Elements of w beyond index get_num_dimensions()-1 are ignored.  I.e. only
+                  the first get_num_dimensions() are used.
+                - Note that no form of non-max suppression is performed.  If a locations
+                  has a score >= thresh then it is reported in #dets.
+        !*/
+
+        void get_feature_vector (
+            const full_object_detection& obj,
+            feature_vector_type& psi
+        ) const;
+        /*!
+            requires
+                - obj.num_parts() == 0 
+                - is_loaded_with_image() == true
+                - psi.size() >= get_num_dimensions()
+                  (i.e. psi must have preallocated its memory before this function is called)
+            ensures
+                - This function allows you to determine the feature vector used for a
+                  candidate object location output from detect().  Note that this vector is
+                  added to psi.  Note also that you must use get_full_object_detection() to
+                  convert a rectangle from detect() into the needed full_object_detection.
+                - The dimensionality of the vector added to psi is get_num_dimensions().  This
+                  means that elements of psi after psi(get_num_dimensions()-1) are not modified.
+                - Since scan_image_custom only searches a limited set of object locations,
+                  not all possible rectangles can be output by detect().  So in the case
+                  where obj.get_rect() could not arise from a call to detect(), this
+                  function will map obj.get_rect() to the nearest possible rectangle and
+                  then add the feature vector for the mapped rectangle into #psi.
+                - get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
+                  gets mapped to for feature extraction.
+        !*/
+
+        full_object_detection get_full_object_detection (
+            const rectangle& rect,
+            const feature_vector_type& w
+        ) const;
+        /*!
+            ensures
+                - returns full_object_detection(rect)
+                  (This function is here only for compatibility with the scan_image_pyramid
+                  object)
+        !*/
+
+        const rectangle get_best_matching_rect (
+            const rectangle& rect
+        ) const;
+        /*!
+            requires
+                - is_loaded_with_image() == true
+            ensures
+                - Since scan_image_custom only searches a limited set of object locations,
+                  not all possible rectangles can be represented.  Therefore, this function
+                  allows you to supply a rectangle and obtain the nearest possible
+                  candidate object location rectangle.
+        !*/
+
+        unsigned long get_num_detection_templates (
+        ) const { return 1; }
+        /*!
+            ensures
+                - returns 1.  Note that this function is here only for compatibility with
+                  the scan_image_pyramid object.  Notionally, its return value indicates
+                  that a scan_image_custom object is always ready to detect objects once an
+                  image has been loaded.
+        !*/
+
+        unsigned long get_num_movable_components_per_detection_template (
+        ) const { return 0; }
+        /*!
+            ensures
+                - returns 0.  Note that this function is here only for compatibility with
+                  the scan_image_pyramid object.  Its return value means that this object
+                  does not support using movable part models.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void serialize (
+        const scan_image_custom<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+    template <typename T>
+    void deserialize (
+        scan_image_custom<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_H__
+