mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Simplified code slightly and filled out the spec
This commit is contained in:
parent
94b0344532
commit
7847038b33
@ -29,10 +29,6 @@ namespace dlib
|
||||
rects.clear();
|
||||
find_candidate_object_locations(img, rects);
|
||||
}
|
||||
|
||||
void copy_configuration (
|
||||
const default_box_generator&
|
||||
){}
|
||||
};
|
||||
|
||||
inline void serialize(const default_box_generator&, std::ostream& ) {}
|
||||
@ -341,7 +337,7 @@ namespace dlib
|
||||
const box_generator& bg
|
||||
)
|
||||
{
|
||||
detect_boxes.copy_configuration(bg);
|
||||
detect_boxes = bg;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
@ -356,7 +352,7 @@ namespace dlib
|
||||
)
|
||||
{
|
||||
feats.copy_configuration(item.feats);
|
||||
detect_boxes.copy_configuration(item.detect_boxes);
|
||||
detect_boxes = item.detect_boxes;
|
||||
num_spatial_pyramid_levels = item.num_spatial_pyramid_levels;
|
||||
}
|
||||
|
||||
|
@ -1,2 +1,379 @@
|
||||
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
|
||||
// License: Boost Software License See LICENSE.txt for the full license.
|
||||
#undef DLIB_SCAN_IMAGE_bOXES_ABSTRACT_H__
|
||||
#ifdef DLIB_SCAN_IMAGE_bOXES_ABSTRACT_H__
|
||||
|
||||
#include "../matrix.h"
|
||||
#include "../geometry.h"
|
||||
#include "../image_processing.h"
|
||||
#include "../array2d.h"
|
||||
#include "full_object_detection.h"
|
||||
#include "../image_transforms/segment_image_abstract.h"
|
||||
#include <vector>
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class default_box_generator
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is a function object that takes in an image and outputs a set of
|
||||
candidate object locations. It is also the default box generator used by
|
||||
the scan_image_boxes object defined below.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
template <typename image_type>
|
||||
void operator() (
|
||||
const image_type& img,
|
||||
std::vector<rectangle>& rects
|
||||
) const
|
||||
/*!
|
||||
ensures
|
||||
- #rects == the set of candidate object locations which should be searched
|
||||
inside img. That is, these are the rectangles which might contain
|
||||
objects of interest within the given image.
|
||||
!*/
|
||||
{
|
||||
rects.clear();
|
||||
find_candidate_object_locations(img, rects);
|
||||
}
|
||||
};
|
||||
|
||||
inline void serialize (const default_box_generator&, std::ostream& ) {}
|
||||
inline void deserialize( default_box_generator&, std::istream& ) {}
|
||||
/*!
|
||||
ensures
|
||||
- provides serialization support.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator = default_box_generator
|
||||
>
|
||||
class scan_image_boxes : noncopyable
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON Feature_extractor_type
|
||||
- must be an object with an interface compatible with the hashed_feature_image
|
||||
object defined in dlib/image_keypoint/hashed_feature_image_abstract.h or
|
||||
with the nearest_neighbor_feature_image object defined in
|
||||
dlib/image_keypoint/nearest_neighbor_feature_image_abstract.h
|
||||
|
||||
REQUIREMENTS ON Box_generator
|
||||
- must be an object with an interface compatible with the
|
||||
default_box_generator object defined at the top of this file.
|
||||
|
||||
INITIAL VALUE
|
||||
- get_num_spatial_pyramid_levels() == 3
|
||||
- is_loaded_with_image() == false
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This object is a tool for running a classifier over an image with the goal
|
||||
of localizing each object present. The localization is in the form of the
|
||||
bounding box around each object of interest.
|
||||
|
||||
Unlike the scan_image_boxes object which scans a fixed sized window over
|
||||
an image pyramid, the scan_image_boxes tool allows you to define your own
|
||||
list of "candidate object locations" which should be evaluated. This is
|
||||
simply a list of rectangle objects which might contain objects of interest.
|
||||
The scan_image_boxes object will then evaluate the classifier at each of
|
||||
these locations and return the subset of rectangles which appear to have
|
||||
objects in them. The candidate object location generation is provided by
|
||||
the Box_generator that is passed in as a template argument.
|
||||
|
||||
This object can also be understood as a general tool for implementing the
|
||||
spatial pyramid models described in the paper:
|
||||
Beyond Bags of Features: Spatial Pyramid Matching for Recognizing
|
||||
Natural Scene Categories by Svetlana Lazebnik, Cordelia Schmid,
|
||||
and Jean Ponce
|
||||
|
||||
|
||||
The classifiers used by this object have three parts:
|
||||
1. The underlying feature extraction provided by Feature_extractor_type
|
||||
objects, which associate a vector with each location in an image.
|
||||
|
||||
2. A rule for extracting a feature vector from a candidate object
|
||||
location. In this object we use the spatial pyramid matching method.
|
||||
This means we cut an object's detection window into a set of "feature
|
||||
extraction regions" and extract a bag-of-words vector from each
|
||||
before finally concatenating them to form the final feature vector
|
||||
representing the entire object window. The set of feature extraction
|
||||
regions can be configured by the user by calling
|
||||
set_num_spatial_pyramid_levels(). To be a little more precise, the
|
||||
feature vector for a candidate object window is defined as follows:
|
||||
- Let N denote the number of feature extraction zones.
|
||||
- Let M denote the dimensionality of the vectors output by
|
||||
Feature_extractor_type objects.
|
||||
- Let F(i) == the M dimensional vector which is the sum of all
|
||||
vectors given by our Feature_extractor_type object inside the
|
||||
i-th feature extraction zone. So this is notionally a
|
||||
bag-of-words vector from the i-th zone.
|
||||
- Then the feature vector for an object window is an M*N
|
||||
dimensional vector [F(1) F(2) F(3) ... F(N)] (i.e. it is a
|
||||
concatenation of the N vectors). This feature vector can be
|
||||
thought of as a collection of N bags-of-words, each bag coming
|
||||
from a spatial location determined by one of the feature
|
||||
extraction zones.
|
||||
|
||||
3. A weight vector and a threshold value. The dot product between the
|
||||
weight vector and the feature vector for a candidate object location
|
||||
gives the score of the location. If this score is greater than the
|
||||
threshold value then the candidate object location is output as a
|
||||
detection.
|
||||
|
||||
THREAD SAFETY
|
||||
Concurrent access to an instance of this object is not safe and should be
|
||||
protected by a mutex lock except for the case where you are copying the
|
||||
configuration (via copy_configuration()) of a scan_image_boxes object to
|
||||
many other threads. In this case, it is safe to copy the configuration of
|
||||
a shared object so long as no other operations are performed on it.
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
typedef matrix<double,0,1> feature_vector_type;
|
||||
|
||||
typedef Feature_extractor_type feature_extractor_type;
|
||||
typedef Box_generator box_generator;
|
||||
|
||||
scan_image_boxes (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- this object is properly initialized
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
void load (
|
||||
const image_type& img
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_type must be a type with the following properties:
|
||||
- image_type objects can be loaded into Feature_extractor_type
|
||||
objects via Feature_extractor_type::load().
|
||||
- image_type objects can be passed to the first argument of
|
||||
Box_generator::operator()
|
||||
ensures
|
||||
- #is_loaded_with_image() == true
|
||||
- This object is ready to run a classifier over img to detect object
|
||||
locations. Call detect() to do this.
|
||||
!*/
|
||||
|
||||
bool is_loaded_with_image (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns true if this object has been loaded with an image to process and
|
||||
false otherwise.
|
||||
!*/
|
||||
|
||||
void copy_configuration(
|
||||
const feature_extractor_type& fe
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Let BASE_FE denote the feature_extractor_type object used internally for
|
||||
local feature extraction. Then this function performs
|
||||
BASE_FE.copy_configuration(fe) (i.e. this function allows you to
|
||||
configure the parameters of the underlying feature extractor used by a
|
||||
scan_image_boxes object)
|
||||
!*/
|
||||
|
||||
void copy_configuration(
|
||||
const box_generator& bg
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Let BASE_BG denote the box_generator object used internally for candidate
|
||||
box generation. Then this function performs:
|
||||
BASE_BG = bg;
|
||||
(i.e. this function allows you to configure the parameters of the
|
||||
underlying box generator used by a scan_image_boxes object)
|
||||
!*/
|
||||
|
||||
void copy_configuration (
|
||||
const scan_image_boxes& item
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Copies all the state information of item into *this, except for state
|
||||
information populated by load(). More precisely, given two scan_image_boxes
|
||||
objects S1 and S2, the following sequence of instructions should always
|
||||
result in both of them having the exact same state:
|
||||
S2.copy_configuration(S1);
|
||||
S1.load(img);
|
||||
S2.load(img);
|
||||
!*/
|
||||
|
||||
long get_num_dimensions (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of dimensions in the feature vector for a candidate
|
||||
object location. This value is the dimensionality of the underlying
|
||||
feature vectors produced by Feature_extractor_type times the number of
|
||||
feature extraction regions used. Note that the number of feature
|
||||
extraction regions used is a function of
|
||||
get_num_spatial_pyramid_levels().
|
||||
!*/
|
||||
|
||||
unsigned long get_num_spatial_pyramid_levels (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns the number of layers in the spatial pyramid. For example, if
|
||||
this function returns 1 then it means we use a simple bag-of-words
|
||||
representation over the whole object window. If it returns 2 then it
|
||||
means the feature representation is the concatenation of 5 bag-of-words
|
||||
vectors, one from the entire object window and 4 others from 4 different
|
||||
parts of the object window. If it returns 3 then there are 1+4+16
|
||||
bag-of-words vectors concatenated together in the feature representation,
|
||||
and so on.
|
||||
!*/
|
||||
|
||||
void set_num_spatial_pyramid_levels (
|
||||
unsigned long levels
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- levels > 0
|
||||
ensures
|
||||
- #get_num_spatial_pyramid_levels() == levels
|
||||
!*/
|
||||
|
||||
void detect (
|
||||
const feature_vector_type& w,
|
||||
std::vector<std::pair<double, rectangle> >& dets,
|
||||
const double thresh
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- w.size() >= get_num_dimensions()
|
||||
- is_loaded_with_image() == true
|
||||
ensures
|
||||
- Scans over all the candidate object locations as discussed in the WHAT
|
||||
THIS OBJECT REPRESENTS section and stores all detections into #dets.
|
||||
- for all valid i:
|
||||
- #dets[i].second == The candidate object location which produced this
|
||||
detection. This rectangle gives the location of the detection.
|
||||
- #dets[i].first == The score for this detection. This value is equal
|
||||
to dot(w, feature vector for this candidate object location).
|
||||
- #dets[i].first >= thresh
|
||||
- #dets will be sorted in descending order.
|
||||
(i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
|
||||
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only
|
||||
the first get_num_dimensions() are used.
|
||||
- Note that no form of non-max suppression is performed. If a locations
|
||||
has a score >= thresh then it is reported in #dets.
|
||||
!*/
|
||||
|
||||
void get_feature_vector (
|
||||
const full_object_detection& obj,
|
||||
feature_vector_type& psi
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- obj.num_parts() == 0
|
||||
- is_loaded_with_image() == true
|
||||
- psi.size() >= get_num_dimensions()
|
||||
(i.e. psi must have preallocated its memory before this function is called)
|
||||
ensures
|
||||
- This function allows you to determine the feature vector used for a
|
||||
candidate object location output from detect(). Note that this vector is
|
||||
added to psi. Note also that you must use get_full_object_detection() to
|
||||
convert a rectangle from detect() into the needed full_object_detection.
|
||||
- Since scan_image_boxes only searches a limited set of object locations,
|
||||
not all possible rectangles can be output by detect(). So in the case
|
||||
where obj.get_rect() could not arise from a call to detect(), this
|
||||
function will map obj.get_rect() to the nearest possible rectangle and
|
||||
then add the feature vector for the mapped rectangle into #psi.
|
||||
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
|
||||
gets mapped to for feature extraction.
|
||||
!*/
|
||||
|
||||
full_object_detection get_full_object_detection (
|
||||
const rectangle& rect,
|
||||
const feature_vector_type& w
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- returns full_object_detection(rect)
|
||||
(This function is here only for compatibility with the scan_image_pyramid
|
||||
object)
|
||||
!*/
|
||||
|
||||
const rectangle get_best_matching_rect (
|
||||
const rectangle& rect
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- is_loaded_with_image() == true
|
||||
ensures
|
||||
- Since scan_image_boxes only searches a limited set of object locations,
|
||||
not all possible rectangles can be represented. Therefore, this function
|
||||
allows you to supply a rectangle and obtain the nearest possible
|
||||
candidate object location rectangle.
|
||||
!*/
|
||||
|
||||
unsigned long get_num_detection_templates (
|
||||
) const { return 1; }
|
||||
/*!
|
||||
ensures
|
||||
- returns 1. Note that this function is here only for compatibility with
|
||||
the scan_image_pyramid object. Notionally, its return value indicates
|
||||
that a scan_image_boxes object is always ready to detect objects once
|
||||
an image has been loaded.
|
||||
!*/
|
||||
|
||||
unsigned long get_num_movable_components_per_detection_template (
|
||||
) const { return 0; }
|
||||
/*!
|
||||
ensures
|
||||
- returns 0. Note that this function is here only for compatibility with
|
||||
the scan_image_pyramid object. Its return value means that this object
|
||||
does not support using movable part models.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
void serialize (
|
||||
const scan_image_boxes<Feature_extractor_type,Box_generator>& item,
|
||||
std::ostream& out
|
||||
);
|
||||
/*!
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
template <
|
||||
typename Feature_extractor_type,
|
||||
typename Box_generator
|
||||
>
|
||||
void deserialize (
|
||||
scan_image_boxes<Feature_extractor_type,Box_generator>& item,
|
||||
std::istream& in
|
||||
);
|
||||
/*!
|
||||
provides deserialization support
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
#endif // DLIB_SCAN_IMAGE_bOXES_ABSTRACT_H__
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user