Cleaned up jitter_image() code and moved it into dlib proper.

2024-11-01 10:14:53 +08:00 · 2017-10-24 08:02:44 -04:00 · 2017-10-24 08:02:44 -04:00 · 369f2b32e8
commit 369f2b32e8
parent 782f4f4825
3 changed files with 71 additions and 20 deletions
--- a/dlib/image_transforms/interpolation.h
+++ b/dlib/image_transforms/interpolation.h
@ -11,6 +11,7 @@
 #include "../simd.h"
 #include "../image_processing/full_object_detection.h"
 #include <limits>
+#include "../rand.h"

 namespace dlib
 {
@ -2103,6 +2104,45 @@ namespace dlib
        return res;
    }

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type
+        >
+    image_type jitter_image(
+        const image_type& img,
+        dlib::rand& rnd
+    )
+    {
+        DLIB_CASSERT(num_rows(img)*num_columns(img) != 0);
+        DLIB_CASSERT(num_rows(img)==num_columns(img));
+
+        const double max_rotation_degrees = 3;
+        const double min_object_height = 0.97; 
+        const double max_object_height = 0.99999; 
+        const double translate_amount = 0.02;
+
+
+        const auto rect = shrink_rect(get_rect(img),3);
+
+        // perturb the location of the crop by a small fraction of the object's size.
+        const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(), 
+            rnd.get_double_in_range(-translate_amount,translate_amount)*rect.height());
+
+        // perturb the scale of the crop by a fraction of the object's size
+        const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height); 
+
+        const long box_size = rect.height()/rand_scale_perturb;
+        const auto crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size);
+        const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180;
+        image_type crop;
+        extract_image_chip(img, chip_details(crop_rect, chip_dims(img.nr(),img.nc()), angle), crop);
+        if (rnd.get_random_double() > 0.5)
+            flip_image_left_right(crop); 
+
+        return crop;
+    }
+
 // ----------------------------------------------------------------------------------------

 }
--- a/dlib/image_transforms/interpolation_abstract.h
+++ b/dlib/image_transforms/interpolation_abstract.h
@ -1411,6 +1411,29 @@ namespace dlib
    !*/

 // ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type
+        >
+    image_type jitter_image(
+        const image_type& img,
+        dlib::rand& rnd
+    );
+    /*!
+        requires
+            - image_type == an image object that implements the interface defined in
+              dlib/image_processing/generic_image.h 
+            - pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false
+            - img.size() > 0
+            - img.nr() == img.nc()
+        ensures
+            - Randomly jitters the image a little bit and returns this new jittered image.
+              To be specific, the returned image has the same size as img and will look
+              generally similar.  The difference is that the returned image will have been
+              slightly rotated, zoomed, and translated.  There is also a 50% chance it will
+              be mirrored left to right.
+    !*/
+    
 // ----------------------------------------------------------------------------------------

 }
--- a/examples/dnn_face_recognition_ex.cpp
+++ b/examples/dnn_face_recognition_ex.cpp
@ -40,9 +40,9 @@ using namespace std;
 // The dlib_face_recognition_resnet_model_v1 model used by this example was trained using
 // essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the
 // mini-batches were made larger (35x15 instead of 5x5), the iterations without progress
-// was set to 10000, the jittering you can see below in jitter_image() was used during
-// training, and the training dataset consisted of about 3 million images instead of 55.
-// Also, the input layer was locked to images of size 150.
+// was set to 10000, dlib::jitter_image() was used during training, and the training
+// dataset consisted of about 3 million images instead of 55.  Also, the input layer was
+// locked to images of size 150.
 template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
 using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;

@ -206,26 +206,14 @@ std::vector<matrix<rgb_pixel>> jitter_image(
 )
 {
    // All this function does is make 100 copies of img, all slightly jittered by being
-    // zoomed, rotated, and translated a little bit differently.
-    thread_local random_cropper cropper;
-    cropper.set_chip_dims(150,150);
-    cropper.set_randomly_flip(true);
-    cropper.set_max_object_size(0.99999);
-    cropper.set_background_crops_fraction(0);
-    cropper.set_min_object_size(0.97);
-    cropper.set_translate_amount(0.02);
-    cropper.set_max_rotation_degrees(3);
+    // zoomed, rotated, and translated a little bit differently. They are also randomly
+    // mirrored left to right.
+    thread_local dlib::rand rnd;

-    std::vector<mmod_rect> raw_boxes(1), ignored_crop_boxes;
-    raw_boxes[0] = shrink_rect(get_rect(img),3);
    std::vector<matrix<rgb_pixel>> crops; 
-
-    matrix<rgb_pixel> temp; 
    for (int i = 0; i < 100; ++i)
-    {
-        cropper(img, raw_boxes, temp, ignored_crop_boxes);
-        crops.push_back(move(temp));
-    }
+        crops.push_back(jitter_image(img,rnd));
+
    return crops;
 }