diff --git a/dlib/image_transforms/interpolation.h b/dlib/image_transforms/interpolation.h index f064e311c..c4ce83553 100644 --- a/dlib/image_transforms/interpolation.h +++ b/dlib/image_transforms/interpolation.h @@ -11,6 +11,7 @@ #include "../simd.h" #include "../image_processing/full_object_detection.h" #include +#include "../rand.h" namespace dlib { @@ -2103,6 +2104,45 @@ namespace dlib return res; } +// ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + image_type jitter_image( + const image_type& img, + dlib::rand& rnd + ) + { + DLIB_CASSERT(num_rows(img)*num_columns(img) != 0); + DLIB_CASSERT(num_rows(img)==num_columns(img)); + + const double max_rotation_degrees = 3; + const double min_object_height = 0.97; + const double max_object_height = 0.99999; + const double translate_amount = 0.02; + + + const auto rect = shrink_rect(get_rect(img),3); + + // perturb the location of the crop by a small fraction of the object's size. + const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(), + rnd.get_double_in_range(-translate_amount,translate_amount)*rect.height()); + + // perturb the scale of the crop by a fraction of the object's size + const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height); + + const long box_size = rect.height()/rand_scale_perturb; + const auto crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size); + const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180; + image_type crop; + extract_image_chip(img, chip_details(crop_rect, chip_dims(img.nr(),img.nc()), angle), crop); + if (rnd.get_random_double() > 0.5) + flip_image_left_right(crop); + + return crop; + } + // ---------------------------------------------------------------------------------------- } diff --git a/dlib/image_transforms/interpolation_abstract.h b/dlib/image_transforms/interpolation_abstract.h index a31520bc3..a7b9d9ae3 100644 --- a/dlib/image_transforms/interpolation_abstract.h +++ b/dlib/image_transforms/interpolation_abstract.h @@ -1411,6 +1411,29 @@ namespace dlib !*/ // ---------------------------------------------------------------------------------------- + + template < + typename image_type + > + image_type jitter_image( + const image_type& img, + dlib::rand& rnd + ); + /*! + requires + - image_type == an image object that implements the interface defined in + dlib/image_processing/generic_image.h + - pixel_traits::pixel_type>::has_alpha == false + - img.size() > 0 + - img.nr() == img.nc() + ensures + - Randomly jitters the image a little bit and returns this new jittered image. + To be specific, the returned image has the same size as img and will look + generally similar. The difference is that the returned image will have been + slightly rotated, zoomed, and translated. There is also a 50% chance it will + be mirrored left to right. + !*/ + // ---------------------------------------------------------------------------------------- } diff --git a/examples/dnn_face_recognition_ex.cpp b/examples/dnn_face_recognition_ex.cpp index 7012c0d79..4ade3862f 100644 --- a/examples/dnn_face_recognition_ex.cpp +++ b/examples/dnn_face_recognition_ex.cpp @@ -40,9 +40,9 @@ using namespace std; // The dlib_face_recognition_resnet_model_v1 model used by this example was trained using // essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the // mini-batches were made larger (35x15 instead of 5x5), the iterations without progress -// was set to 10000, the jittering you can see below in jitter_image() was used during -// training, and the training dataset consisted of about 3 million images instead of 55. -// Also, the input layer was locked to images of size 150. +// was set to 10000, dlib::jitter_image() was used during training, and the training +// dataset consisted of about 3 million images instead of 55. Also, the input layer was +// locked to images of size 150. template