mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Refactored the image pyramid code. Now there is just one templated object,
pyramid_down and you give it the downsampling amount as a template argument.
This commit is contained in:
parent
39ed906c39
commit
e0a6e30581
File diff suppressed because it is too large
Load Diff
@ -10,11 +10,18 @@
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
template <
|
||||
unsigned int N
|
||||
>
|
||||
class pyramid_down : noncopyable
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON N
|
||||
N > 1
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is a simple functor to help create image pyramids.
|
||||
This is a simple functor to help create image pyramids. In particular, it
|
||||
downsamples images at a ratio of N to N-1.
|
||||
|
||||
|
||||
WARNING, when mapping rectangles from one layer of a pyramid
|
||||
@ -43,18 +50,14 @@ namespace dlib
|
||||
- pixel_traits<typename in_image_type::type>::has_alpha == false
|
||||
- pixel_traits<typename out_image_type::type>::has_alpha == false
|
||||
ensures
|
||||
- #down will contain an image that is roughly half the size of the original
|
||||
image. To be specific, this function performs the following steps:
|
||||
- 1. Applies a 5x5 Gaussian filter to the original image to smooth it a little.
|
||||
- 2. Every other row and column is discarded to create an image half the size
|
||||
of the original. This smaller image is stored in #down.
|
||||
- if both input and output images contain RGB pixels then the downsampled image will
|
||||
- #down will contain an image that is roughly (N-1)/N times the size of the
|
||||
original image.
|
||||
- If both input and output images contain RGB pixels then the downsampled image will
|
||||
be in color. Otherwise, the downsampling will be performed in a grayscale mode.
|
||||
- The location of a point P in original image will show up at point point_down(P)
|
||||
in the #down image.
|
||||
- Note that some points on the border of the original image will correspond to
|
||||
points outside the #down image. This is because the 5x5 filter is not applied
|
||||
at the borders.
|
||||
- Note that some points on the border of the original image might correspond to
|
||||
points outside the #down image.
|
||||
!*/
|
||||
|
||||
// -------------------------------
|
||||
@ -151,61 +154,6 @@ namespace dlib
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class pyramid_down_3_2 : noncopyable
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is a function object with an interface identical to pyramid_down (defined
|
||||
at the top of this file) except that it downsamples images at a ratio of 3 to 2
|
||||
instead of 2 to 1.
|
||||
!*/
|
||||
};
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class pyramid_down_4_3 : noncopyable
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is a function object with an interface identical to pyramid_down (defined
|
||||
at the top of this file) except that it downsamples images at a ratio of 4 to 3
|
||||
instead of 2 to 1.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class pyramid_down_5_4 : noncopyable
|
||||
{
|
||||
/*!
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is a function object with an interface identical to pyramid_down (defined
|
||||
at the top of this file) except that it downsamples images at a ratio of 5 to 4
|
||||
instead of 2 to 1.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
unsigned int N
|
||||
>
|
||||
class pyramid_down_generic : noncopyable
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON N
|
||||
N > 1
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This is a function object with an interface identical to pyramid_down
|
||||
(defined at the top of this file) except that it downsamples images at a
|
||||
ratio of N to N-1 instead of 2 to 1.
|
||||
!*/
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
class pyramid_disable : noncopyable
|
||||
|
@ -379,7 +379,7 @@ namespace
|
||||
make_simple_test_data(images, object_locations);
|
||||
|
||||
typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
|
||||
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
|
||||
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
|
||||
image_scanner_type scanner;
|
||||
const rectangle object_box = compute_box_dimensions(1,35*35);
|
||||
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
|
||||
@ -463,7 +463,7 @@ namespace
|
||||
make_simple_test_data(images, object_locations);
|
||||
|
||||
typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
|
||||
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
|
||||
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
|
||||
image_scanner_type scanner;
|
||||
const rectangle object_box = compute_box_dimensions(1,35*35);
|
||||
std::vector<rectangle> mboxes;
|
||||
@ -512,7 +512,7 @@ namespace
|
||||
make_simple_test_data(images, object_locations);
|
||||
|
||||
typedef hashed_feature_image<fine_hog_image<3,3,2,4,hog_signed_gradient> > feature_extractor_type;
|
||||
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
|
||||
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
|
||||
image_scanner_type scanner;
|
||||
const rectangle object_box = compute_box_dimensions(1,35*35);
|
||||
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
|
||||
@ -555,7 +555,7 @@ namespace
|
||||
make_simple_test_data(images, object_locations);
|
||||
|
||||
typedef hashed_feature_image<poly_image<2> > feature_extractor_type;
|
||||
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
|
||||
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
|
||||
image_scanner_type scanner;
|
||||
const rectangle object_box = compute_box_dimensions(1,35*35);
|
||||
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
|
||||
@ -598,7 +598,7 @@ namespace
|
||||
make_simple_test_data(images, object_locations);
|
||||
|
||||
typedef hashed_feature_image<poly_image<2> > feature_extractor_type;
|
||||
typedef scan_image_pyramid<pyramid_down_3_2, feature_extractor_type> image_scanner_type;
|
||||
typedef scan_image_pyramid<pyramid_down<3>, feature_extractor_type> image_scanner_type;
|
||||
image_scanner_type scanner;
|
||||
const rectangle object_box = compute_box_dimensions(1,35*35);
|
||||
std::vector<rectangle> mboxes;
|
||||
@ -647,12 +647,12 @@ namespace
|
||||
make_simple_test_data(images, object_locations);
|
||||
|
||||
typedef nearest_neighbor_feature_image<poly_image<5> > feature_extractor_type;
|
||||
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
|
||||
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
|
||||
image_scanner_type scanner;
|
||||
|
||||
setup_grid_detection_templates(scanner, object_locations, 2, 2);
|
||||
feature_extractor_type nnfe;
|
||||
pyramid_down pyr_down;
|
||||
pyramid_down<2> pyr_down;
|
||||
poly_image<5> polyi;
|
||||
nnfe.set_basis(randomly_sample_image_features(images, pyr_down, polyi, 80));
|
||||
scanner.copy_configuration(nnfe);
|
||||
@ -697,7 +697,7 @@ namespace
|
||||
image_scanner_type scanner;
|
||||
|
||||
feature_extractor_type nnfe;
|
||||
pyramid_down pyr_down;
|
||||
pyramid_down<2> pyr_down;
|
||||
poly_image<5> polyi;
|
||||
nnfe.set_basis(randomly_sample_image_features(images, pyr_down, polyi, 80));
|
||||
scanner.copy_configuration(nnfe);
|
||||
@ -737,7 +737,7 @@ namespace
|
||||
std::vector<std::vector<rectangle> > object_locations;
|
||||
make_simple_test_data(images, object_locations);
|
||||
|
||||
typedef scan_image_pyramid<pyramid_down_5_4, very_simple_feature_extractor> image_scanner_type;
|
||||
typedef scan_image_pyramid<pyramid_down<5>, very_simple_feature_extractor> image_scanner_type;
|
||||
image_scanner_type scanner;
|
||||
const rectangle object_box = compute_box_dimensions(1,70*70);
|
||||
scanner.add_detection_template(object_box, create_grid_detection_template(object_box,2,2));
|
||||
@ -771,7 +771,7 @@ namespace
|
||||
|
||||
class pyramid_down_funny : noncopyable
|
||||
{
|
||||
pyramid_down pyr;
|
||||
pyramid_down<2> pyr;
|
||||
public:
|
||||
|
||||
template <typename T>
|
||||
|
@ -23,7 +23,7 @@ namespace
|
||||
void test_pyramid_down_grayscale()
|
||||
{
|
||||
array2d<unsigned char> img, down;
|
||||
pyramid_down pyr;
|
||||
pyramid_down<2> pyr;
|
||||
|
||||
img.set_size(300,264);
|
||||
|
||||
@ -52,7 +52,7 @@ void test_pyramid_down_rgb()
|
||||
{
|
||||
array2d<rgb_pixel> img;
|
||||
array2d<bgr_pixel> down;
|
||||
pyramid_down pyr;
|
||||
pyramid_down<2> pyr;
|
||||
|
||||
img.set_size(231, 351);
|
||||
|
||||
@ -325,59 +325,59 @@ void test_pyramid_down_small_sizes()
|
||||
test_pyramid_down_rgb();
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down>();";
|
||||
test_pyramid_down_small_sizes<pyramid_down>();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_3_2>();";
|
||||
test_pyramid_down_small_sizes<pyramid_down_3_2>();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_4_3>();";
|
||||
test_pyramid_down_small_sizes<pyramid_down_4_3>();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_5_4>();";
|
||||
test_pyramid_down_small_sizes<pyramid_down_5_4>();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<2> >();";
|
||||
test_pyramid_down_small_sizes<pyramid_down<2> >();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<3> >();";
|
||||
test_pyramid_down_small_sizes<pyramid_down<3> >();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<4> >();";
|
||||
test_pyramid_down_small_sizes<pyramid_down<4> >();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<5> >();";
|
||||
test_pyramid_down_small_sizes<pyramid_down<5> >();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_disable>();";
|
||||
test_pyramid_down_small_sizes<pyramid_disable>();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down_generic<3> >();";
|
||||
test_pyramid_down_small_sizes<pyramid_down_generic<3> >();
|
||||
dlog << LINFO << "call test_pyramid_down_small_sizes<pyramid_down<9> >();";
|
||||
test_pyramid_down_small_sizes<pyramid_down<9> >();
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down>();";
|
||||
test_pyramid_down_rgb2<pyramid_down>();
|
||||
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<2> >();";
|
||||
test_pyramid_down_rgb2<pyramid_down<2> >();
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_3_2>();";
|
||||
test_pyramid_down_rgb2<pyramid_down_3_2>();
|
||||
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<3> >();";
|
||||
test_pyramid_down_rgb2<pyramid_down<3> >();
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_4_3>();";
|
||||
test_pyramid_down_rgb2<pyramid_down_4_3>();
|
||||
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<4> >();";
|
||||
test_pyramid_down_rgb2<pyramid_down<4> >();
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_5_4>();";
|
||||
test_pyramid_down_rgb2<pyramid_down_5_4>();
|
||||
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<5> >();";
|
||||
test_pyramid_down_rgb2<pyramid_down<5> >();
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down_generic<5> >();";
|
||||
test_pyramid_down_rgb2<pyramid_down_generic<5> >();
|
||||
dlog << LINFO << "call test_pyramid_down_rgb2<pyramid_down<8> >();";
|
||||
test_pyramid_down_rgb2<pyramid_down<8> >();
|
||||
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down>();";
|
||||
test_pyramid_down_grayscale2<pyramid_down>();
|
||||
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<2> >();";
|
||||
test_pyramid_down_grayscale2<pyramid_down<2> >();
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_3_2>();";
|
||||
test_pyramid_down_grayscale2<pyramid_down_3_2>();
|
||||
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<3> >();";
|
||||
test_pyramid_down_grayscale2<pyramid_down<3> >();
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_4_3>();";
|
||||
test_pyramid_down_grayscale2<pyramid_down_4_3>();
|
||||
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<4> >();";
|
||||
test_pyramid_down_grayscale2<pyramid_down<4> >();
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_5_4>();";
|
||||
test_pyramid_down_grayscale2<pyramid_down_5_4>();
|
||||
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<5> >();";
|
||||
test_pyramid_down_grayscale2<pyramid_down<5> >();
|
||||
|
||||
print_spinner();
|
||||
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down_generic<6> >();";
|
||||
test_pyramid_down_grayscale2<pyramid_down_generic<6> >();
|
||||
dlog << LINFO << "call test_pyramid_down_grayscale2<pyramid_down<6> >();";
|
||||
test_pyramid_down_grayscale2<pyramid_down<6> >();
|
||||
}
|
||||
} a;
|
||||
|
||||
|
@ -196,7 +196,7 @@ int main()
|
||||
make_simple_test_data(images, object_locations);
|
||||
|
||||
|
||||
typedef scan_image_pyramid<pyramid_down_5_4, very_simple_feature_extractor> image_scanner_type;
|
||||
typedef scan_image_pyramid<pyramid_down<5>, very_simple_feature_extractor> image_scanner_type;
|
||||
image_scanner_type scanner;
|
||||
// Instead of using setup_grid_detection_templates() like in object_detector_ex.cpp, lets manually
|
||||
// setup the sliding window box. We use a window with the same shape as the white boxes we
|
||||
|
@ -145,11 +145,12 @@ int main()
|
||||
parameters yourself. They are automatically populated by the
|
||||
structural_object_detection_trainer.
|
||||
|
||||
The sliding window classifiers described above are applied to every level of an image
|
||||
pyramid. So you need to tell scan_image_pyramid what kind of pyramid you want to
|
||||
use. In this case we are using pyramid_down which downsamples each pyramid layer by
|
||||
half (dlib also contains other version of pyramid_down which result in finer grained
|
||||
pyramids).
|
||||
The sliding window classifiers described above are applied to every level of an
|
||||
image pyramid. So you need to tell scan_image_pyramid what kind of pyramid you want
|
||||
to use. In this case we are using pyramid_down<2> which downsamples each pyramid
|
||||
layer by half (if you want to use a finer image pyramid then just change the
|
||||
template argument to a larger value. For example, using pyramid_down<5> would
|
||||
downsample each layer by a ratio of 5 to 4).
|
||||
|
||||
Finally, some of the feature extraction zones are allowed to move freely within the
|
||||
object box. This means that when we are sliding the classifier over an image, some
|
||||
@ -168,7 +169,7 @@ int main()
|
||||
feature extraction regions.
|
||||
*/
|
||||
typedef hashed_feature_image<hog_image<3,3,1,4,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
|
||||
typedef scan_image_pyramid<pyramid_down, feature_extractor_type> image_scanner_type;
|
||||
typedef scan_image_pyramid<pyramid_down<2>, feature_extractor_type> image_scanner_type;
|
||||
image_scanner_type scanner;
|
||||
|
||||
// The hashed_feature_image in the scanner needs to be supplied with a hash function capable
|
||||
|
@ -133,7 +133,7 @@ int main(int argc, char** argv)
|
||||
|
||||
|
||||
typedef hashed_feature_image<hog_image<4,4,1,9,hog_signed_gradient,hog_full_interpolation> > feature_extractor_type;
|
||||
typedef scan_image_pyramid<pyramid_down_3_2, feature_extractor_type> image_scanner_type;
|
||||
typedef scan_image_pyramid<pyramid_down<3>, feature_extractor_type> image_scanner_type;
|
||||
|
||||
if (parser.option("t") || parser.option("cross-validate"))
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user