Changed the random_cropper's interface so that instead of talking in terms of

min and max object height, it's now min and max object size.  This way, if you
have objects that are short and wide (i.e. objects where the relevant dimension
is width rather than height) you will get sensible behavior out of the random
cropper.
This commit is contained in:
Davis King 2017-06-17 12:34:26 -04:00
parent d2b80bfe6f
commit 17b48b97bb
5 changed files with 38 additions and 35 deletions

View File

@ -18,8 +18,8 @@ namespace dlib
chip_dims dims = chip_dims(300,300); chip_dims dims = chip_dims(300,300);
bool randomly_flip = true; bool randomly_flip = true;
double max_rotation_degrees = 30; double max_rotation_degrees = 30;
double min_object_height = 0.25; // cropped object will be at least this fraction of the height of the image. double min_object_size = 0.25; // cropped object will be at least this fraction of the size of the image.
double max_object_height = 0.7; // cropped object will be at most this fraction of the height of the image. double max_object_size = 0.7; // cropped object will be at most this fraction of the size of the image.
double background_crops_fraction = 0.5; double background_crops_fraction = 0.5;
double translate_amount = 0.10; double translate_amount = 0.10;
@ -78,24 +78,24 @@ namespace dlib
double value double value
) { max_rotation_degrees = std::abs(value); } ) { max_rotation_degrees = std::abs(value); }
double get_min_object_height ( double get_min_object_size (
) const { return min_object_height; } ) const { return min_object_size; }
void set_min_object_height ( void set_min_object_size (
double value double value
) )
{ {
DLIB_CASSERT(0 < value); DLIB_CASSERT(0 < value);
min_object_height = value; min_object_size = value;
} }
double get_max_object_height ( double get_max_object_size (
) const { return max_object_height; } ) const { return max_object_size; }
void set_max_object_height ( void set_max_object_size (
double value double value
) )
{ {
DLIB_CASSERT(0 < value); DLIB_CASSERT(0 < value);
max_object_height = value; max_object_size = value;
} }
template < template <
@ -175,7 +175,8 @@ namespace dlib
extract_image_chip(img, crop_plan, crop); extract_image_chip(img, crop_plan, crop);
const rectangle_transform tform = get_mapping_to_chip(crop_plan); const rectangle_transform tform = get_mapping_to_chip(crop_plan);
const unsigned long min_object_height_absolute = std::round(min_object_height*crop_plan.rows); const unsigned long min_object_size_absolute_rows = std::round(min_object_size*crop_plan.rows);
const unsigned long min_object_size_absolute_cols = std::round(min_object_size*crop_plan.cols);
// copy rects into crop_rects and set ones that are outside the crop to ignore or // copy rects into crop_rects and set ones that are outside the crop to ignore or
// drop entirely as appropriate. // drop entirely as appropriate.
@ -189,7 +190,7 @@ namespace dlib
if (get_rect(crop).intersect(rect.rect).area() != 0) if (get_rect(crop).intersect(rect.rect).area() != 0)
{ {
// set to ignore if not totally in the crop or if too small. // set to ignore if not totally in the crop or if too small.
if (!get_rect(crop).contains(rect.rect) || rect.rect.height() < min_object_height_absolute) if (!get_rect(crop).contains(rect.rect) || (rect.rect.height() < min_object_size_absolute_rows && rect.rect.width() < min_object_size_absolute_cols))
rect.ignore = true; rect.ignore = true;
crop_rects.push_back(rect); crop_rects.push_back(rect);
@ -224,12 +225,12 @@ namespace dlib
auto rect = rects[randomly_pick_rect(rects)].rect; auto rect = rects[randomly_pick_rect(rects)].rect;
// perturb the location of the crop by a small fraction of the object's size. // perturb the location of the crop by a small fraction of the object's size.
const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(), const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(),
rnd.get_double_in_range(-translate_amount,translate_amount)*rect.height()); rnd.get_double_in_range(-translate_amount,translate_amount)*std::max(rect.height(),rect.width()));
// perturb the scale of the crop by a fraction of the object's size // perturb the scale of the crop by a fraction of the object's size
const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height); const double rand_scale_perturb = rnd.get_double_in_range(min_object_size, max_object_size);
const long box_size = rect.height()/rand_scale_perturb; const long box_size = std::max(rect.height(),rect.width())/rand_scale_perturb;
crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size); crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size);
} }
else else

View File

@ -35,8 +35,8 @@ namespace dlib
- #get_chip_dims() == chip_dims(300,300) - #get_chip_dims() == chip_dims(300,300)
- #get_randomly_flip() == true - #get_randomly_flip() == true
- #get_max_rotation_degrees() == 30 - #get_max_rotation_degrees() == 30
- #get_min_object_height() == 0.25 - #get_min_object_size() == 0.25
- #get_max_object_height() == 0.7 - #get_max_object_size() == 0.7
- #get_background_crops_fraction() == 0.5 - #get_background_crops_fraction() == 0.5
- #get_translate_amount() == 0.1 - #get_translate_amount() == 0.1
!*/ !*/
@ -143,46 +143,48 @@ namespace dlib
- #get_max_rotation_degrees() == std::abs(value) - #get_max_rotation_degrees() == std::abs(value)
!*/ !*/
double get_min_object_height ( double get_min_object_size (
) const; ) const;
/*! /*!
ensures ensures
- When a chip is extracted around an object, the chip will be sized so that - When a chip is extracted around an object, the chip will be sized so that
the object's height is at least get_min_object_height() * 100 percent of the at least one of the object's height or width are >= get_min_object_size() *
chip height. E.g. if the chip is HEIGHT pixels tall then the object will the chip's height and width, respectively. E.g. if the chip is 640x480
be at least HEIGHT*get_min_object_height() pixels tall. This also means pixels in size then the object will be at least 480*get_min_object_size()
that if get_min_object_height() >1 then the object will be only partially pixels tall or 640*get_min_object_size() pixels wide. This also means
that if get_min_object_size() >1 then the object will only be partially
visible in the crop since it will be too big to fit. visible in the crop since it will be too big to fit.
!*/ !*/
void set_min_object_height ( void set_min_object_size (
double value double value
); );
/*! /*!
requires requires
- 0 < value - 0 < value
ensures ensures
- #get_min_object_height() == value - #get_min_object_size() == value
!*/ !*/
double get_max_object_height ( double get_max_object_size (
) const; ) const;
/*! /*!
ensures ensures
- When a chip is extracted around an object, the chip will be sized so that - When a chip is extracted around an object, the chip will be sized so that
the object's height is at most get_max_object_height() * 100 percent of the both the object's height and width are at most get_max_object_size() *
chip height. E.g. if the chip is HEIGHT pixels tall then the object will the chip's height and width, respectively. E.g. if the chip is 640x480
be at most HEIGHT*get_max_object_height() pixels tall. pixels in size then the object will be at most 480*get_max_object_size()
pixels tall and 640*get_max_object_size() pixels wide.
!*/ !*/
void set_max_object_height ( void set_max_object_size (
double value double value
); );
/*! /*!
requires requires
- 0 < value - 0 < value
ensures ensures
- #get_max_object_height() == value - #get_max_object_size() == value
!*/ !*/
template < template <

View File

@ -210,9 +210,9 @@ std::vector<matrix<rgb_pixel>> jitter_image(
thread_local random_cropper cropper; thread_local random_cropper cropper;
cropper.set_chip_dims(150,150); cropper.set_chip_dims(150,150);
cropper.set_randomly_flip(true); cropper.set_randomly_flip(true);
cropper.set_max_object_height(0.99999); cropper.set_max_object_size(0.99999);
cropper.set_background_crops_fraction(0); cropper.set_background_crops_fraction(0);
cropper.set_min_object_height(0.97); cropper.set_min_object_size(0.97);
cropper.set_translate_amount(0.02); cropper.set_translate_amount(0.02);
cropper.set_max_rotation_degrees(3); cropper.set_max_rotation_degrees(3);

View File

@ -152,7 +152,7 @@ int main(int argc, char** argv) try
std::vector<std::vector<mmod_rect>> mini_batch_labels; std::vector<std::vector<mmod_rect>> mini_batch_labels;
random_cropper cropper; random_cropper cropper;
cropper.set_chip_dims(200, 200); cropper.set_chip_dims(200, 200);
cropper.set_min_object_height(0.2); cropper.set_min_object_size(0.2);
dlib::rand rnd; dlib::rand rnd;
// Run the trainer until the learning rate gets small. This will probably take several // Run the trainer until the learning rate gets small. This will probably take several
// hours. // hours.

View File

@ -45,8 +45,8 @@ int main(int argc, char** argv) try
// make the objects in the crops have a min and max size of such and such". // make the objects in the crops have a min and max size of such and such".
// You do that by calling these two functions. Here we are saying we want the // You do that by calling these two functions. Here we are saying we want the
// objects in our crops to be between 0.2*400 and 0.8*400 pixels in height. // objects in our crops to be between 0.2*400 and 0.8*400 pixels in height.
cropper.set_min_object_height(0.2); cropper.set_min_object_size(0.2);
cropper.set_max_object_height(0.8); cropper.set_max_object_size(0.8);
// The cropper can also randomly mirror and rotate crops, which we ask it to // The cropper can also randomly mirror and rotate crops, which we ask it to
// perform as well. // perform as well.
cropper.set_randomly_flip(true); cropper.set_randomly_flip(true);