Changed the random_cropper's set_min_object_size() routine to take min box

dimensions in the same format as the mmod_options object (i.e. two lengths
measured in pixels).  This should make defining random_cropping strategies that
are consistent with MMOD settings much more straightforward since you can just
take the mmod_options settings and give them to the random_cropper and it will
do the right thing.
This commit is contained in:
Davis King 2017-10-24 22:10:02 -04:00
parent 1c664eeac5
commit e338bf02e0
5 changed files with 92 additions and 40 deletions

View File

@ -18,7 +18,8 @@ namespace dlib
chip_dims dims = chip_dims(300,300); chip_dims dims = chip_dims(300,300);
bool randomly_flip = true; bool randomly_flip = true;
double max_rotation_degrees = 30; double max_rotation_degrees = 30;
double min_object_size = 0.25; // cropped object will be at least this fraction of the size of the image. long min_object_length_long_dim = 75; // cropped object will be at least this many pixels along its longest edge.
long min_object_length_short_dim = 30; // cropped object will be at least this many pixels along its shortest edge.
double max_object_size = 0.7; // cropped object will be at most this fraction of the size of the image. double max_object_size = 0.7; // cropped object will be at most this fraction of the size of the image.
double background_crops_fraction = 0.5; double background_crops_fraction = 0.5;
double translate_amount = 0.10; double translate_amount = 0.10;
@ -78,14 +79,19 @@ namespace dlib
double value double value
) { max_rotation_degrees = std::abs(value); } ) { max_rotation_degrees = std::abs(value); }
double get_min_object_size ( long get_min_object_length_long_dim (
) const { return min_object_size; } ) const { return min_object_length_long_dim; }
long get_min_object_length_short_dim (
) const { return min_object_length_short_dim; }
void set_min_object_size ( void set_min_object_size (
double value long long_dim,
long short_dim
) )
{ {
DLIB_CASSERT(0 < value); DLIB_CASSERT(0 < short_dim && short_dim <= long_dim);
min_object_size = value; min_object_length_long_dim = long_dim;
min_object_length_short_dim = short_dim;
} }
double get_max_object_size ( double get_max_object_size (
@ -175,9 +181,6 @@ namespace dlib
extract_image_chip(img, crop_plan, crop); extract_image_chip(img, crop_plan, crop);
const rectangle_transform tform = get_mapping_to_chip(crop_plan); const rectangle_transform tform = get_mapping_to_chip(crop_plan);
const unsigned long min_object_size_absolute_rows = std::round(min_object_size*crop_plan.rows);
const unsigned long min_object_size_absolute_cols = std::round(min_object_size*crop_plan.cols);
// copy rects into crop_rects and set ones that are outside the crop to ignore or // copy rects into crop_rects and set ones that are outside the crop to ignore or
// drop entirely as appropriate. // drop entirely as appropriate.
crop_rects.clear(); crop_rects.clear();
@ -190,8 +193,12 @@ namespace dlib
if (get_rect(crop).intersect(rect.rect).area() != 0) if (get_rect(crop).intersect(rect.rect).area() != 0)
{ {
// set to ignore if not totally in the crop or if too small. // set to ignore if not totally in the crop or if too small.
if (!get_rect(crop).contains(rect.rect) || (rect.rect.height() < min_object_size_absolute_rows && rect.rect.width() < min_object_size_absolute_cols)) if (!get_rect(crop).contains(rect.rect) ||
(rect.rect.height() < min_object_length_long_dim && rect.rect.width() < min_object_length_long_dim) ||
(rect.rect.height() < min_object_length_short_dim || rect.rect.width() < min_object_length_short_dim))
{
rect.ignore = true; rect.ignore = true;
}
crop_rects.push_back(rect); crop_rects.push_back(rect);
} }
@ -223,15 +230,39 @@ namespace dlib
if (has_non_ignored_box(rects) && rnd.get_random_double() >= background_crops_fraction) if (has_non_ignored_box(rects) && rnd.get_random_double() >= background_crops_fraction)
{ {
auto rect = rects[randomly_pick_rect(rects)].rect; auto rect = rects[randomly_pick_rect(rects)].rect;
// perturb the location of the crop by a small fraction of the object's size. // perturb the location of the crop by a small fraction of the object's size.
const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(), const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()),
rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width())); rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()));
// perturb the scale of the crop by a fraction of the object's size // We are going to grow rect into the cropping rect. First, we grow it a
const double rand_scale_perturb = rnd.get_double_in_range(min_object_size, max_object_size); // little so that it has the desired minimum border around it.
drectangle drect = centered_drect(center(rect)+rand_translate, rect.width()/max_object_size, rect.height()/max_object_size);
// Now make rect have the same aspect ratio as dims so that there won't be
// any funny stretching when we crop it. We do this by growing it along
// whichever dimension is too short.
const double target_aspect = dims.cols/(double)dims.rows;
if (drect.width()/drect.height() < target_aspect)
drect = centered_drect(drect, target_aspect*drect.height(), drect.height());
else
drect = centered_drect(drect, drect.width(), drect.width()/target_aspect);
// Now perturb the scale of the crop. We do this by shrinking it, but not
// so much that it gets smaller than the min object sizes require.
double current_width = dims.cols*rect.width()/drect.width();
double current_height = dims.rows*rect.height()/drect.height();
// never make any dimension smaller than the short dim.
double min_scale1 = std::max(min_object_length_short_dim/current_width, min_object_length_short_dim/current_height);
// at least one dimension needs to be longer than the long dim.
double min_scale2 = std::min(min_object_length_long_dim/current_width, min_object_length_long_dim/current_height);
double min_scale = std::max(min_scale1, min_scale2);
const double rand_scale_perturb = 1.0/rnd.get_double_in_range(min_scale, 1);
crop_rect = centered_drect(drect, drect.width()*rand_scale_perturb, drect.height()*rand_scale_perturb);
DLIB_CASSERT(crop_rect.width() == crop_rect.height());
const long box_size = std::max(rect.height(),rect.width())/rand_scale_perturb;
crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size);
} }
else else
{ {
@ -302,7 +333,8 @@ namespace dlib
out << " chip_dims.cols: " << item.get_chip_dims().cols << endl; out << " chip_dims.cols: " << item.get_chip_dims().cols << endl;
out << " randomly_flip: " << std::boolalpha << item.get_randomly_flip() << endl; out << " randomly_flip: " << std::boolalpha << item.get_randomly_flip() << endl;
out << " max_rotation_degrees: " << item.get_max_rotation_degrees() << endl; out << " max_rotation_degrees: " << item.get_max_rotation_degrees() << endl;
out << " min_object_size: " << item.get_min_object_size() << endl; out << " min_object_length_long_dim: " << item.get_min_object_length_long_dim() << endl;
out << " min_object_length_short_dim: " << item.get_min_object_length_short_dim() << endl;
out << " max_object_size: " << item.get_max_object_size() << endl; out << " max_object_size: " << item.get_max_object_size() << endl;
out << " background_crops_fraction: " << item.get_background_crops_fraction() << endl; out << " background_crops_fraction: " << item.get_background_crops_fraction() << endl;
out << " translate_amount: " << item.get_translate_amount() << endl; out << " translate_amount: " << item.get_translate_amount() << endl;

View File

@ -35,7 +35,8 @@ namespace dlib
- #get_chip_dims() == chip_dims(300,300) - #get_chip_dims() == chip_dims(300,300)
- #get_randomly_flip() == true - #get_randomly_flip() == true
- #get_max_rotation_degrees() == 30 - #get_max_rotation_degrees() == 30
- #get_min_object_size() == 0.25 - #get_min_object_length_long_dim() == 70
- #get_min_object_length_short_dim() == 30
- #get_max_object_size() == 0.7 - #get_max_object_size() == 0.7
- #get_background_crops_fraction() == 0.5 - #get_background_crops_fraction() == 0.5
- #get_translate_amount() == 0.1 - #get_translate_amount() == 0.1
@ -143,27 +144,38 @@ namespace dlib
- #get_max_rotation_degrees() == std::abs(value) - #get_max_rotation_degrees() == std::abs(value)
!*/ !*/
double get_min_object_size ( long get_min_object_length_long_dim (
) const; ) const;
/*! /*!
ensures ensures
- When a chip is extracted around an object, the chip will be sized so that - When a chip is extracted around an object, the chip will be sized so that
at least one of the object's height or width are >= get_min_object_size() * the longest edge of the object (i.e. either its height or width,
the chip's height and width, respectively. E.g. if the chip is 640x480 whichever is longer) is at least #get_min_object_length_long_dim() pixels
pixels in size then the object will be at least 480*get_min_object_size() in length. When we say "object" here we are referring specifically to
pixels tall or 640*get_min_object_size() pixels wide. This also means the rectangle in the mmod_rect output by the cropper.
that if get_min_object_size() >1 then the object will only be partially !*/
visible in the crop since it will be too big to fit.
long get_min_object_length_short_dim (
) const;
/*!
ensures
- When a chip is extracted around an object, the chip will be sized so that
the shortest edge of the object (i.e. either its height or width,
whichever is shorter) is at least #get_min_object_length_short_dim()
pixels in length. When we say "object" here we are referring
specifically to the rectangle in the mmod_rect output by the cropper.
!*/ !*/
void set_min_object_size ( void set_min_object_size (
double value long long_dim,
long short_dim
); );
/*! /*!
requires requires
- 0 < value - 0 < short_dim <= long_dim
ensures ensures
- #get_min_object_size() == value - #get_min_object_length_short_dim() == short_dim
- #get_min_object_length_long_dim() == long_dim
!*/ !*/
double get_max_object_size ( double get_max_object_size (

View File

@ -159,7 +159,9 @@ int main(int argc, char** argv) try
std::vector<std::vector<mmod_rect>> mini_batch_labels; std::vector<std::vector<mmod_rect>> mini_batch_labels;
random_cropper cropper; random_cropper cropper;
cropper.set_chip_dims(200, 200); cropper.set_chip_dims(200, 200);
cropper.set_min_object_size(0.2); // Usually you want to give the cropper whatever min sizes you passed to the
// mmod_options constructor, which is what we do here.
cropper.set_min_object_size(40,40);
dlib::rand rnd; dlib::rand rnd;
// Run the trainer until the learning rate gets small. This will probably take several // Run the trainer until the learning rate gets small. This will probably take several
// hours. // hours.

View File

@ -310,7 +310,9 @@ int main(int argc, char** argv) try
random_cropper cropper; random_cropper cropper;
cropper.set_seed(time(0)); cropper.set_seed(time(0));
cropper.set_chip_dims(350, 350); cropper.set_chip_dims(350, 350);
cropper.set_min_object_size(0.20); // Usually you want to give the cropper whatever min sizes you passed to the
// mmod_options constructor, or very slightly smaller sizes, which is what we do here.
cropper.set_min_object_size(69,28);
cropper.set_max_rotation_degrees(2); cropper.set_max_rotation_degrees(2);
dlib::rand rnd; dlib::rand rnd;

View File

@ -44,9 +44,13 @@ int main(int argc, char** argv) try
// You can tell it how much scale jittering you would like by saying "please // You can tell it how much scale jittering you would like by saying "please
// make the objects in the crops have a min and max size of such and such". // make the objects in the crops have a min and max size of such and such".
// You do that by calling these two functions. Here we are saying we want the // You do that by calling these two functions. Here we are saying we want the
// objects in our crops to be between 0.2*400 and 0.8*400 pixels in height. // objects in our crops to be no more than 0.8*400 pixels in height and width.
cropper.set_min_object_size(0.2);
cropper.set_max_object_size(0.8); cropper.set_max_object_size(0.8);
// And also that they shouldn't be too small. Specifically, each object's smallest
// dimension (i.e. height or width) should be at least 60 pixels and at least one of
// the dimensions must be at least 80 pixels. So the smallest objects the cropper will
// output will be either 80x60 or 60x80.
cropper.set_min_object_size(80,60);
// The cropper can also randomly mirror and rotate crops, which we ask it to // The cropper can also randomly mirror and rotate crops, which we ask it to
// perform as well. // perform as well.
cropper.set_randomly_flip(true); cropper.set_randomly_flip(true);