Refactored the image pyramid code. Now there is just one templated object,

pyramid_down and you give it the downsampling amount as a template argument.
This commit is contained in:
Davis King 2013-10-03 21:36:51 -04:00
parent 39ed906c39
commit e0a6e30581
7 changed files with 653 additions and 2262 deletions

File diff suppressed because it is too large Load Diff

View File

@ -10,11 +10,18 @@
namespace dlib
{
template <
unsigned int N
>
class pyramid_down : noncopyable
{
/*!
REQUIREMENTS ON N
N > 1
WHAT THIS OBJECT REPRESENTS
This is a simple functor to help create image pyramids.
This is a simple functor to help create image pyramids. In particular, it
downsamples images at a ratio of N to N-1.
WARNING, when mapping rectangles from one layer of a pyramid
@ -43,18 +50,14 @@ namespace dlib
- pixel_traits<typename in_image_type::type>::has_alpha == false
- pixel_traits<typename out_image_type::type>::has_alpha == false
ensures
- #down will contain an image that is roughly half the size of the original
image. To be specific, this function performs the following steps:
- 1. Applies a 5x5 Gaussian filter to the original image to smooth it a little.
- 2. Every other row and column is discarded to create an image half the size
of the original. This smaller image is stored in #down.
- if both input and output images contain RGB pixels then the downsampled image will
- #down will contain an image that is roughly (N-1)/N times the size of the
original image.
- If both input and output images contain RGB pixels then the downsampled image will
be in color. Otherwise, the downsampling will be performed in a grayscale mode.
- The location of a point P in original image will show up at point point_down(P)
in the #down image.
- Note that some points on the border of the original image will correspond to
points outside the #down image. This is because the 5x5 filter is not applied
at the borders.
- Note that some points on the border of the original image might correspond to
points outside the #down image.
!*/
// -------------------------------
@ -151,61 +154,6 @@ namespace dlib
};
// ----------------------------------------------------------------------------------------
class pyramid_down_3_2 : noncopyable
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a function object with an interface identical to pyramid_down (defined
at the top of this file) except that it downsamples images at a ratio of 3 to 2
instead of 2 to 1.
!*/
};
// ----------------------------------------------------------------------------------------
class pyramid_down_4_3 : noncopyable
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a function object with an interface identical to pyramid_down (defined
at the top of this file) except that it downsamples images at a ratio of 4 to 3
instead of 2 to 1.
!*/
};
// ----------------------------------------------------------------------------------------
class pyramid_down_5_4 : noncopyable
{
/*!
WHAT THIS OBJECT REPRESENTS
This is a function object with an interface identical to pyramid_down (defined
at the top of this file) except that it downsamples images at a ratio of 5 to 4
instead of 2 to 1.
!*/
};
// ----------------------------------------------------------------------------------------
template <
unsigned int N
>
class pyramid_down_generic : noncopyable
{
/*!
REQUIREMENTS ON N
N > 1
WHAT THIS OBJECT REPRESENTS
This is a function object with an interface identical to pyramid_down
(defined at the top of this file) except that it downsamples images at a
ratio of N to N-1 instead of 2 to 1.
!*/
};
// ----------------------------------------------------------------------------------------
class pyramid_disable : noncopyable

View File

@ -379,7 +379,7 @@ namespace
make_simple_test_data(images, object_locations);
typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,35*35);
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
@ -463,7 +463,7 @@ namespace
make_simple_test_data(images, object_locations);
typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,35*35);
std::vector<rectangle> mboxes;
@ -512,7 +512,7 @@ namespace
make_simple_test_data(images, object_locations);
typedef hashed_feature_image<fine_hog_image<3,3,2,4,hog_signed_gradient> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,35*35);
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
@ -555,7 +555,7 @@ namespace
make_simple_test_data(images, object_locations);
typedef hashed_feature_image<poly_image<2> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,35*35);
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
@ -598,7 +598,7 @@ namespace
make_simple_test_data(images, object_locations);
typedef hashed_feature_image<poly_image<2> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down_3_2, feature_extractor_type> image_scanner_type;
typedef scan_image_pyramid<pyramid_down<3>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,35*35);
std::vector<rectangle> mboxes;
@ -647,12 +647,12 @@ namespace
make_simple_test_data(images, object_locations);
typedef nearest_neighbor_feature_image<poly_image<5> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner;
setup_grid_detection_templates(scanner, object_locations, 2, 2);
feature_extractor_type nnfe;
pyramid_down pyr_down;
pyramid_down<2> pyr_down;
poly_image<5> polyi;
nnfe.set_basis(randomly_sample_image_features(images, pyr_down, polyi, 80));
scanner.copy_configuration(nnfe);
@ -697,7 +697,7 @@ namespace
image_scanner_type scanner;
feature_extractor_type nnfe;
pyramid_down pyr_down;
pyramid_down<2> pyr_down;
poly_image<5> polyi;
nnfe.set_basis(randomly_sample_image_features(images, pyr_down, polyi, 80));
scanner.copy_configuration(nnfe);
@ -737,7 +737,7 @@ namespace
std::vector<std::vector<rectangle> > object_locations;
make_simple_test_data(images, object_locations);
typedef scan_image_pyramid<pyramid_down_5_4, very_simple_feature_extractor> image_scanner_type;
typedef scan_image_pyramid<pyramid_down<5>, very_simple_feature_extractor> image_scanner_type;
image_scanner_type scanner;
const rectangle object_box = compute_box_dimensions(1,70*70);
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
@ -771,7 +771,7 @@ namespace
class pyramid_down_funny : noncopyable
{
pyramid_down pyr;
pyramid_down<2> pyr;
public:
template <typename T>

View File

@ -23,7 +23,7 @@ namespace
void test_pyramid_down_grayscale()
{
array2d<unsigned char> img, down;
pyramid_down pyr;
pyramid_down<2> pyr;
img.set_size(300,264);
@ -52,7 +52,7 @@ void test_pyramid_down_rgb()
{
array2d<rgb_pixel> img;
array2d<bgr_pixel> down;
pyramid_down pyr;
pyramid_down<2> pyr;
img.set_size(231, 351);
@ -325,59 +325,59 @@ void test_pyramid_down_small_sizes()
test_pyramid_down_rgb();
print_spinner();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down>();";
test_pyramid_down_small_sizes<pyramid_down>();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_3_2>();";
test_pyramid_down_small_sizes<pyramid_down_3_2>();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_4_3>();";
test_pyramid_down_small_sizes<pyramid_down_4_3>();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_5_4>();";
test_pyramid_down_small_sizes<pyramid_down_5_4>();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<2> >();";
test_pyramid_down_small_sizes<pyramid_down<2> >();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<3> >();";
test_pyramid_down_small_sizes<pyramid_down<3> >();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<4> >();";
test_pyramid_down_small_sizes<pyramid_down<4> >();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<5> >();";
test_pyramid_down_small_sizes<pyramid_down<5> >();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_disable>();";
test_pyramid_down_small_sizes<pyramid_disable>();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_generic<3> >();";
test_pyramid_down_small_sizes<pyramid_down_generic<3> >();
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<9> >();";
test_pyramid_down_small_sizes<pyramid_down<9> >();
print_spinner();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down>();";
test_pyramid_down_rgb2<pyramid_down>();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<2> >();";
test_pyramid_down_rgb2<pyramid_down<2> >();
print_spinner();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_3_2>();";
test_pyramid_down_rgb2<pyramid_down_3_2>();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<3> >();";
test_pyramid_down_rgb2<pyramid_down<3> >();
print_spinner();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_4_3>();";
test_pyramid_down_rgb2<pyramid_down_4_3>();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<4> >();";
test_pyramid_down_rgb2<pyramid_down<4> >();
print_spinner();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_5_4>();";
test_pyramid_down_rgb2<pyramid_down_5_4>();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<5> >();";
test_pyramid_down_rgb2<pyramid_down<5> >();
print_spinner();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_generic<5> >();";
test_pyramid_down_rgb2<pyramid_down_generic<5> >();
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<8> >();";
test_pyramid_down_rgb2<pyramid_down<8> >();
print_spinner();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down>();";
test_pyramid_down_grayscale2<pyramid_down>();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<2> >();";
test_pyramid_down_grayscale2<pyramid_down<2> >();
print_spinner();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_3_2>();";
test_pyramid_down_grayscale2<pyramid_down_3_2>();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<3> >();";
test_pyramid_down_grayscale2<pyramid_down<3> >();
print_spinner();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_4_3>();";
test_pyramid_down_grayscale2<pyramid_down_4_3>();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<4> >();";
test_pyramid_down_grayscale2<pyramid_down<4> >();
print_spinner();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_5_4>();";
test_pyramid_down_grayscale2<pyramid_down_5_4>();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<5> >();";
test_pyramid_down_grayscale2<pyramid_down<5> >();
print_spinner();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_generic<6> >();";
test_pyramid_down_grayscale2<pyramid_down_generic<6> >();
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<6> >();";
test_pyramid_down_grayscale2<pyramid_down<6> >();
}
} a;

View File

@ -196,7 +196,7 @@ int main()
make_simple_test_data(images, object_locations);
typedef scan_image_pyramid<pyramid_down_5_4, very_simple_feature_extractor> image_scanner_type;
typedef scan_image_pyramid<pyramid_down<5>, very_simple_feature_extractor> image_scanner_type;
image_scanner_type scanner;
// Instead of using setup_grid_detection_templates() like in object_detector_ex.cpp, lets manually
// setup the sliding window box. We use a window with the same shape as the white boxes we

View File

@ -145,11 +145,12 @@ int main()
parameters yourself. They are automatically populated by the
structural_object_detection_trainer.
The sliding window classifiers described above are applied to every level of an image
pyramid. So you need to tell scan_image_pyramid what kind of pyramid you want to
use. In this case we are using pyramid_down which downsamples each pyramid layer by
half (dlib also contains other version of pyramid_down which result in finer grained
pyramids).
The sliding window classifiers described above are applied to every level of an
image pyramid. So you need to tell scan_image_pyramid what kind of pyramid you want
to use. In this case we are using pyramid_down<2> which downsamples each pyramid
layer by half (if you want to use a finer image pyramid then just change the
template argument to a larger value. For example, using pyramid_down<5> would
downsample each layer by a ratio of 5 to 4).
Finally, some of the feature extraction zones are allowed to move freely within the
object box. This means that when we are sliding the classifier over an image, some
@ -168,7 +169,7 @@ int main()
feature extraction regions.
*/
typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
image_scanner_type scanner;
// The hashed_feature_image in the scanner needs to be supplied with a hash function capable

View File

@ -133,7 +133,7 @@ int main(int argc, char** argv)
typedef hashed_feature_image<hog_image<4,4,1,9,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
typedef scan_image_pyramid<pyramid_down_3_2, feature_extractor_type> image_scanner_type;
typedef scan_image_pyramid<pyramid_down<3>, feature_extractor_type> image_scanner_type;
if (parser.option("t") || parser.option("cross-validate"))
{