Simplified code slightly and filled out the spec

This commit is contained in:
Davis King 2013-03-24 15:18:12 -04:00
parent 94b0344532
commit 7847038b33
2 changed files with 379 additions and 6 deletions

View File

@ -29,10 +29,6 @@ namespace dlib
rects.clear();
find_candidate_object_locations(img, rects);
}
void copy_configuration (
const default_box_generator&
){}
};
inline void serialize(const default_box_generator&, std::ostream& ) {}
@ -341,7 +337,7 @@ namespace dlib
const box_generator& bg
)
{
detect_boxes.copy_configuration(bg);
detect_boxes = bg;
}
// ----------------------------------------------------------------------------------------
@ -356,7 +352,7 @@ namespace dlib
)
{
feats.copy_configuration(item.feats);
detect_boxes.copy_configuration(item.detect_boxes);
detect_boxes = item.detect_boxes;
num_spatial_pyramid_levels = item.num_spatial_pyramid_levels;
}

View File

@ -1,2 +1,379 @@
// Copyright (C) 2013 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SCAN_IMAGE_bOXES_ABSTRACT_H__
#ifdef DLIB_SCAN_IMAGE_bOXES_ABSTRACT_H__
#include "../matrix.h"
#include "../geometry.h"
#include "../image_processing.h"
#include "../array2d.h"
#include "full_object_detection.h"
#include "../image_transforms/segment_image_abstract.h"
#include <vector>
namespace dlib
{
// ----------------------------------------------------------------------------------------
class default_box_generator
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a function object that takes in an image and outputs a set of
candidate object locations. It is also the default box generator used by
the scan_image_boxes object defined below.
!*/
public:
template <typename image_type>
void operator() (
const image_type& img,
std::vector<rectangle>& rects
) const
/*!
ensures
- #rects == the set of candidate object locations which should be searched
inside img. That is, these are the rectangles which might contain
objects of interest within the given image.
!*/
{
rects.clear();
find_candidate_object_locations(img, rects);
}
};
inline void serialize (const default_box_generator&, std::ostream& ) {}
inline void deserialize( default_box_generator&, std::istream& ) {}
/*!
ensures
- provides serialization support.
!*/
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator = default_box_generator
>
class scan_image_boxes : noncopyable
{
/*!
REQUIREMENTS ON Feature_extractor_type
- must be an object with an interface compatible with the hashed_feature_image
object defined in dlib/image_keypoint/hashed_feature_image_abstract.h or
with the nearest_neighbor_feature_image object defined in
dlib/image_keypoint/nearest_neighbor_feature_image_abstract.h
REQUIREMENTS ON Box_generator
- must be an object with an interface compatible with the
default_box_generator object defined at the top of this file.
INITIAL VALUE
- get_num_spatial_pyramid_levels() == 3
- is_loaded_with_image() == false
WHAT THIS OBJECT REPRESENTS
This object is a tool for running a classifier over an image with the goal
of localizing each object present. The localization is in the form of the
bounding box around each object of interest.
Unlike the scan_image_boxes object which scans a fixed sized window over
an image pyramid, the scan_image_boxes tool allows you to define your own
list of "candidate object locations" which should be evaluated. This is
simply a list of rectangle objects which might contain objects of interest.
The scan_image_boxes object will then evaluate the classifier at each of
these locations and return the subset of rectangles which appear to have
objects in them. The candidate object location generation is provided by
the Box_generator that is passed in as a template argument.
This object can also be understood as a general tool for implementing the
spatial pyramid models described in the paper:
Beyond Bags of Features: Spatial Pyramid Matching for Recognizing
Natural Scene Categories by Svetlana Lazebnik, Cordelia Schmid,
and Jean Ponce
The classifiers used by this object have three parts:
1. The underlying feature extraction provided by Feature_extractor_type
objects, which associate a vector with each location in an image.
2. A rule for extracting a feature vector from a candidate object
location. In this object we use the spatial pyramid matching method.
This means we cut an object's detection window into a set of "feature
extraction regions" and extract a bag-of-words vector from each
before finally concatenating them to form the final feature vector
representing the entire object window. The set of feature extraction
regions can be configured by the user by calling
set_num_spatial_pyramid_levels(). To be a little more precise, the
feature vector for a candidate object window is defined as follows:
- Let N denote the number of feature extraction zones.
- Let M denote the dimensionality of the vectors output by
Feature_extractor_type objects.
- Let F(i) == the M dimensional vector which is the sum of all
vectors given by our Feature_extractor_type object inside the
i-th feature extraction zone. So this is notionally a
bag-of-words vector from the i-th zone.
- Then the feature vector for an object window is an M*N
dimensional vector [F(1) F(2) F(3) ... F(N)] (i.e. it is a
concatenation of the N vectors). This feature vector can be
thought of as a collection of N bags-of-words, each bag coming
from a spatial location determined by one of the feature
extraction zones.
3. A weight vector and a threshold value. The dot product between the
weight vector and the feature vector for a candidate object location
gives the score of the location. If this score is greater than the
threshold value then the candidate object location is output as a
detection.
THREAD SAFETY
Concurrent access to an instance of this object is not safe and should be
protected by a mutex lock except for the case where you are copying the
configuration (via copy_configuration()) of a scan_image_boxes object to
many other threads. In this case, it is safe to copy the configuration of
a shared object so long as no other operations are performed on it.
!*/
public:
typedef matrix<double,0,1> feature_vector_type;
typedef Feature_extractor_type feature_extractor_type;
typedef Box_generator box_generator;
scan_image_boxes (
);
/*!
ensures
- this object is properly initialized
!*/
template <
typename image_type
>
void load (
const image_type& img
);
/*!
requires
- image_type must be a type with the following properties:
- image_type objects can be loaded into Feature_extractor_type
objects via Feature_extractor_type::load().
- image_type objects can be passed to the first argument of
Box_generator::operator()
ensures
- #is_loaded_with_image() == true
- This object is ready to run a classifier over img to detect object
locations. Call detect() to do this.
!*/
bool is_loaded_with_image (
) const;
/*!
ensures
- returns true if this object has been loaded with an image to process and
false otherwise.
!*/
void copy_configuration(
const feature_extractor_type& fe
);
/*!
ensures
- Let BASE_FE denote the feature_extractor_type object used internally for
local feature extraction. Then this function performs
BASE_FE.copy_configuration(fe) (i.e. this function allows you to
configure the parameters of the underlying feature extractor used by a
scan_image_boxes object)
!*/
void copy_configuration(
const box_generator& bg
);
/*!
ensures
- Let BASE_BG denote the box_generator object used internally for candidate
box generation. Then this function performs:
BASE_BG = bg;
(i.e. this function allows you to configure the parameters of the
underlying box generator used by a scan_image_boxes object)
!*/
void copy_configuration (
const scan_image_boxes& item
);
/*!
ensures
- Copies all the state information of item into *this, except for state
information populated by load(). More precisely, given two scan_image_boxes
objects S1 and S2, the following sequence of instructions should always
result in both of them having the exact same state:
S2.copy_configuration(S1);
S1.load(img);
S2.load(img);
!*/
long get_num_dimensions (
) const;
/*!
ensures
- returns the number of dimensions in the feature vector for a candidate
object location. This value is the dimensionality of the underlying
feature vectors produced by Feature_extractor_type times the number of
feature extraction regions used. Note that the number of feature
extraction regions used is a function of
get_num_spatial_pyramid_levels().
!*/
unsigned long get_num_spatial_pyramid_levels (
) const;
/*!
ensures
- returns the number of layers in the spatial pyramid. For example, if
this function returns 1 then it means we use a simple bag-of-words
representation over the whole object window. If it returns 2 then it
means the feature representation is the concatenation of 5 bag-of-words
vectors, one from the entire object window and 4 others from 4 different
parts of the object window. If it returns 3 then there are 1+4+16
bag-of-words vectors concatenated together in the feature representation,
and so on.
!*/
void set_num_spatial_pyramid_levels (
unsigned long levels
);
/*!
requires
- levels > 0
ensures
- #get_num_spatial_pyramid_levels() == levels
!*/
void detect (
const feature_vector_type& w,
std::vector<std::pair<double, rectangle> >& dets,
const double thresh
) const;
/*!
requires
- w.size() >= get_num_dimensions()
- is_loaded_with_image() == true
ensures
- Scans over all the candidate object locations as discussed in the WHAT
THIS OBJECT REPRESENTS section and stores all detections into #dets.
- for all valid i:
- #dets[i].second == The candidate object location which produced this
detection. This rectangle gives the location of the detection.
- #dets[i].first == The score for this detection. This value is equal
to dot(w, feature vector for this candidate object location).
- #dets[i].first >= thresh
- #dets will be sorted in descending order.
(i.e. #dets[i].first >= #dets[j].first for all i, and j>i)
- Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only
the first get_num_dimensions() are used.
- Note that no form of non-max suppression is performed. If a locations
has a score >= thresh then it is reported in #dets.
!*/
void get_feature_vector (
const full_object_detection& obj,
feature_vector_type& psi
) const;
/*!
requires
- obj.num_parts() == 0
- is_loaded_with_image() == true
- psi.size() >= get_num_dimensions()
(i.e. psi must have preallocated its memory before this function is called)
ensures
- This function allows you to determine the feature vector used for a
candidate object location output from detect(). Note that this vector is
added to psi. Note also that you must use get_full_object_detection() to
convert a rectangle from detect() into the needed full_object_detection.
- Since scan_image_boxes only searches a limited set of object locations,
not all possible rectangles can be output by detect(). So in the case
where obj.get_rect() could not arise from a call to detect(), this
function will map obj.get_rect() to the nearest possible rectangle and
then add the feature vector for the mapped rectangle into #psi.
- get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect()
gets mapped to for feature extraction.
!*/
full_object_detection get_full_object_detection (
const rectangle& rect,
const feature_vector_type& w
) const;
/*!
ensures
- returns full_object_detection(rect)
(This function is here only for compatibility with the scan_image_pyramid
object)
!*/
const rectangle get_best_matching_rect (
const rectangle& rect
) const;
/*!
requires
- is_loaded_with_image() == true
ensures
- Since scan_image_boxes only searches a limited set of object locations,
not all possible rectangles can be represented. Therefore, this function
allows you to supply a rectangle and obtain the nearest possible
candidate object location rectangle.
!*/
unsigned long get_num_detection_templates (
) const { return 1; }
/*!
ensures
- returns 1. Note that this function is here only for compatibility with
the scan_image_pyramid object. Notionally, its return value indicates
that a scan_image_boxes object is always ready to detect objects once
an image has been loaded.
!*/
unsigned long get_num_movable_components_per_detection_template (
) const { return 0; }
/*!
ensures
- returns 0. Note that this function is here only for compatibility with
the scan_image_pyramid object. Its return value means that this object
does not support using movable part models.
!*/
};
// ----------------------------------------------------------------------------------------
template <
typename Feature_extractor_type,
typename Box_generator
>
void serialize (
const scan_image_boxes<Feature_extractor_type,Box_generator>& item,
std::ostream& out
);
/*!
provides serialization support
!*/
template <
typename Feature_extractor_type,
typename Box_generator
>
void deserialize (
scan_image_boxes<Feature_extractor_type,Box_generator>& item,
std::istream& in
);
/*!
provides deserialization support
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SCAN_IMAGE_bOXES_ABSTRACT_H__