From e338bf02e0b6965a3e511104daecac0ec8833f06 Mon Sep 17 00:00:00 2001 From: Davis King Date: Tue, 24 Oct 2017 22:10:02 -0400 Subject: [PATCH] Changed the random_cropper's set_min_object_size() routine to take min box dimensions in the same format as the mmod_options object (i.e. two lengths measured in pixels). This should make defining random_cropping strategies that are consistent with MMOD settings much more straightforward since you can just take the mmod_options settings and give them to the random_cropper and it will do the right thing. --- dlib/image_transforms/random_cropper.h | 82 +++++++++++++------ .../random_cropper_abstract.h | 36 +++++--- examples/dnn_mmod_ex.cpp | 4 +- examples/dnn_mmod_train_find_cars_ex.cpp | 4 +- examples/random_cropper_ex.cpp | 8 +- 5 files changed, 93 insertions(+), 41 deletions(-) diff --git a/dlib/image_transforms/random_cropper.h b/dlib/image_transforms/random_cropper.h index 3a2378575..a29bdd3be 100644 --- a/dlib/image_transforms/random_cropper.h +++ b/dlib/image_transforms/random_cropper.h @@ -18,7 +18,8 @@ namespace dlib chip_dims dims = chip_dims(300,300); bool randomly_flip = true; double max_rotation_degrees = 30; - double min_object_size = 0.25; // cropped object will be at least this fraction of the size of the image. + long min_object_length_long_dim = 75; // cropped object will be at least this many pixels along its longest edge. + long min_object_length_short_dim = 30; // cropped object will be at least this many pixels along its shortest edge. double max_object_size = 0.7; // cropped object will be at most this fraction of the size of the image. double background_crops_fraction = 0.5; double translate_amount = 0.10; @@ -78,14 +79,19 @@ namespace dlib double value ) { max_rotation_degrees = std::abs(value); } - double get_min_object_size ( - ) const { return min_object_size; } + long get_min_object_length_long_dim ( + ) const { return min_object_length_long_dim; } + long get_min_object_length_short_dim ( + ) const { return min_object_length_short_dim; } + void set_min_object_size ( - double value + long long_dim, + long short_dim ) { - DLIB_CASSERT(0 < value); - min_object_size = value; + DLIB_CASSERT(0 < short_dim && short_dim <= long_dim); + min_object_length_long_dim = long_dim; + min_object_length_short_dim = short_dim; } double get_max_object_size ( @@ -175,9 +181,6 @@ namespace dlib extract_image_chip(img, crop_plan, crop); const rectangle_transform tform = get_mapping_to_chip(crop_plan); - const unsigned long min_object_size_absolute_rows = std::round(min_object_size*crop_plan.rows); - const unsigned long min_object_size_absolute_cols = std::round(min_object_size*crop_plan.cols); - // copy rects into crop_rects and set ones that are outside the crop to ignore or // drop entirely as appropriate. crop_rects.clear(); @@ -190,8 +193,12 @@ namespace dlib if (get_rect(crop).intersect(rect.rect).area() != 0) { // set to ignore if not totally in the crop or if too small. - if (!get_rect(crop).contains(rect.rect) || (rect.rect.height() < min_object_size_absolute_rows && rect.rect.width() < min_object_size_absolute_cols)) + if (!get_rect(crop).contains(rect.rect) || + (rect.rect.height() < min_object_length_long_dim && rect.rect.width() < min_object_length_long_dim) || + (rect.rect.height() < min_object_length_short_dim || rect.rect.width() < min_object_length_short_dim)) + { rect.ignore = true; + } crop_rects.push_back(rect); } @@ -223,15 +230,39 @@ namespace dlib if (has_non_ignored_box(rects) && rnd.get_random_double() >= background_crops_fraction) { auto rect = rects[randomly_pick_rect(rects)].rect; - // perturb the location of the crop by a small fraction of the object's size. - const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(), - rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width())); - // perturb the scale of the crop by a fraction of the object's size - const double rand_scale_perturb = rnd.get_double_in_range(min_object_size, max_object_size); + // perturb the location of the crop by a small fraction of the object's size. + const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()), + rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width())); + + // We are going to grow rect into the cropping rect. First, we grow it a + // little so that it has the desired minimum border around it. + drectangle drect = centered_drect(center(rect)+rand_translate, rect.width()/max_object_size, rect.height()/max_object_size); + + // Now make rect have the same aspect ratio as dims so that there won't be + // any funny stretching when we crop it. We do this by growing it along + // whichever dimension is too short. + const double target_aspect = dims.cols/(double)dims.rows; + if (drect.width()/drect.height() < target_aspect) + drect = centered_drect(drect, target_aspect*drect.height(), drect.height()); + else + drect = centered_drect(drect, drect.width(), drect.width()/target_aspect); + + // Now perturb the scale of the crop. We do this by shrinking it, but not + // so much that it gets smaller than the min object sizes require. + double current_width = dims.cols*rect.width()/drect.width(); + double current_height = dims.rows*rect.height()/drect.height(); + + // never make any dimension smaller than the short dim. + double min_scale1 = std::max(min_object_length_short_dim/current_width, min_object_length_short_dim/current_height); + // at least one dimension needs to be longer than the long dim. + double min_scale2 = std::min(min_object_length_long_dim/current_width, min_object_length_long_dim/current_height); + double min_scale = std::max(min_scale1, min_scale2); + + const double rand_scale_perturb = 1.0/rnd.get_double_in_range(min_scale, 1); + crop_rect = centered_drect(drect, drect.width()*rand_scale_perturb, drect.height()*rand_scale_perturb); + DLIB_CASSERT(crop_rect.width() == crop_rect.height()); - const long box_size = std::max(rect.height(),rect.width())/rand_scale_perturb; - crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size); } else { @@ -298,14 +329,15 @@ namespace dlib { using std::endl; out << "random_cropper details: " << endl; - out << " chip_dims.rows: " << item.get_chip_dims().rows << endl; - out << " chip_dims.cols: " << item.get_chip_dims().cols << endl; - out << " randomly_flip: " << std::boolalpha << item.get_randomly_flip() << endl; - out << " max_rotation_degrees: " << item.get_max_rotation_degrees() << endl; - out << " min_object_size: " << item.get_min_object_size() << endl; - out << " max_object_size: " << item.get_max_object_size() << endl; - out << " background_crops_fraction: " << item.get_background_crops_fraction() << endl; - out << " translate_amount: " << item.get_translate_amount() << endl; + out << " chip_dims.rows: " << item.get_chip_dims().rows << endl; + out << " chip_dims.cols: " << item.get_chip_dims().cols << endl; + out << " randomly_flip: " << std::boolalpha << item.get_randomly_flip() << endl; + out << " max_rotation_degrees: " << item.get_max_rotation_degrees() << endl; + out << " min_object_length_long_dim: " << item.get_min_object_length_long_dim() << endl; + out << " min_object_length_short_dim: " << item.get_min_object_length_short_dim() << endl; + out << " max_object_size: " << item.get_max_object_size() << endl; + out << " background_crops_fraction: " << item.get_background_crops_fraction() << endl; + out << " translate_amount: " << item.get_translate_amount() << endl; return out; } diff --git a/dlib/image_transforms/random_cropper_abstract.h b/dlib/image_transforms/random_cropper_abstract.h index c3ac81ecc..9c252ab67 100644 --- a/dlib/image_transforms/random_cropper_abstract.h +++ b/dlib/image_transforms/random_cropper_abstract.h @@ -35,7 +35,8 @@ namespace dlib - #get_chip_dims() == chip_dims(300,300) - #get_randomly_flip() == true - #get_max_rotation_degrees() == 30 - - #get_min_object_size() == 0.25 + - #get_min_object_length_long_dim() == 70 + - #get_min_object_length_short_dim() == 30 - #get_max_object_size() == 0.7 - #get_background_crops_fraction() == 0.5 - #get_translate_amount() == 0.1 @@ -143,27 +144,38 @@ namespace dlib - #get_max_rotation_degrees() == std::abs(value) !*/ - double get_min_object_size ( + long get_min_object_length_long_dim ( + ) const; + /*! + ensures + - When a chip is extracted around an object, the chip will be sized so that + the longest edge of the object (i.e. either its height or width, + whichever is longer) is at least #get_min_object_length_long_dim() pixels + in length. When we say "object" here we are referring specifically to + the rectangle in the mmod_rect output by the cropper. + !*/ + + long get_min_object_length_short_dim ( ) const; /*! ensures - When a chip is extracted around an object, the chip will be sized so that - at least one of the object's height or width are >= get_min_object_size() * - the chip's height and width, respectively. E.g. if the chip is 640x480 - pixels in size then the object will be at least 480*get_min_object_size() - pixels tall or 640*get_min_object_size() pixels wide. This also means - that if get_min_object_size() >1 then the object will only be partially - visible in the crop since it will be too big to fit. + the shortest edge of the object (i.e. either its height or width, + whichever is shorter) is at least #get_min_object_length_short_dim() + pixels in length. When we say "object" here we are referring + specifically to the rectangle in the mmod_rect output by the cropper. !*/ void set_min_object_size ( - double value - ); + long long_dim, + long short_dim + ); /*! requires - - 0 < value + - 0 < short_dim <= long_dim ensures - - #get_min_object_size() == value + - #get_min_object_length_short_dim() == short_dim + - #get_min_object_length_long_dim() == long_dim !*/ double get_max_object_size ( diff --git a/examples/dnn_mmod_ex.cpp b/examples/dnn_mmod_ex.cpp index 666b87132..9565d514c 100644 --- a/examples/dnn_mmod_ex.cpp +++ b/examples/dnn_mmod_ex.cpp @@ -159,7 +159,9 @@ int main(int argc, char** argv) try std::vector> mini_batch_labels; random_cropper cropper; cropper.set_chip_dims(200, 200); - cropper.set_min_object_size(0.2); + // Usually you want to give the cropper whatever min sizes you passed to the + // mmod_options constructor, which is what we do here. + cropper.set_min_object_size(40,40); dlib::rand rnd; // Run the trainer until the learning rate gets small. This will probably take several // hours. diff --git a/examples/dnn_mmod_train_find_cars_ex.cpp b/examples/dnn_mmod_train_find_cars_ex.cpp index 804160136..16419477a 100644 --- a/examples/dnn_mmod_train_find_cars_ex.cpp +++ b/examples/dnn_mmod_train_find_cars_ex.cpp @@ -310,7 +310,9 @@ int main(int argc, char** argv) try random_cropper cropper; cropper.set_seed(time(0)); cropper.set_chip_dims(350, 350); - cropper.set_min_object_size(0.20); + // Usually you want to give the cropper whatever min sizes you passed to the + // mmod_options constructor, or very slightly smaller sizes, which is what we do here. + cropper.set_min_object_size(69,28); cropper.set_max_rotation_degrees(2); dlib::rand rnd; diff --git a/examples/random_cropper_ex.cpp b/examples/random_cropper_ex.cpp index 01a2ac5e5..6b020058d 100644 --- a/examples/random_cropper_ex.cpp +++ b/examples/random_cropper_ex.cpp @@ -44,9 +44,13 @@ int main(int argc, char** argv) try // You can tell it how much scale jittering you would like by saying "please // make the objects in the crops have a min and max size of such and such". // You do that by calling these two functions. Here we are saying we want the - // objects in our crops to be between 0.2*400 and 0.8*400 pixels in height. - cropper.set_min_object_size(0.2); + // objects in our crops to be no more than 0.8*400 pixels in height and width. cropper.set_max_object_size(0.8); + // And also that they shouldn't be too small. Specifically, each object's smallest + // dimension (i.e. height or width) should be at least 60 pixels and at least one of + // the dimensions must be at least 80 pixels. So the smallest objects the cropper will + // output will be either 80x60 or 60x80. + cropper.set_min_object_size(80,60); // The cropper can also randomly mirror and rotate crops, which we ask it to // perform as well. cropper.set_randomly_flip(true);