Cleaned up jitter_image() code and moved it into dlib proper.

This commit is contained in:
Davis King 2017-10-24 08:02:44 -04:00
parent 782f4f4825
commit 369f2b32e8
3 changed files with 71 additions and 20 deletions

View File

@ -11,6 +11,7 @@
#include "../simd.h"
#include "../image_processing/full_object_detection.h"
#include <limits>
#include "../rand.h"
namespace dlib
{
@ -2103,6 +2104,45 @@ namespace dlib
return res;
}
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
image_type jitter_image(
const image_type& img,
dlib::rand& rnd
)
{
DLIB_CASSERT(num_rows(img)*num_columns(img) != 0);
DLIB_CASSERT(num_rows(img)==num_columns(img));
const double max_rotation_degrees = 3;
const double min_object_height = 0.97;
const double max_object_height = 0.99999;
const double translate_amount = 0.02;
const auto rect = shrink_rect(get_rect(img),3);
// perturb the location of the crop by a small fraction of the object's size.
const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(),
rnd.get_double_in_range(-translate_amount,translate_amount)*rect.height());
// perturb the scale of the crop by a fraction of the object's size
const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height);
const long box_size = rect.height()/rand_scale_perturb;
const auto crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size);
const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180;
image_type crop;
extract_image_chip(img, chip_details(crop_rect, chip_dims(img.nr(),img.nc()), angle), crop);
if (rnd.get_random_double() > 0.5)
flip_image_left_right(crop);
return crop;
}
// ----------------------------------------------------------------------------------------
}

View File

@ -1411,6 +1411,29 @@ namespace dlib
!*/
// ----------------------------------------------------------------------------------------
template <
typename image_type
>
image_type jitter_image(
const image_type& img,
dlib::rand& rnd
);
/*!
requires
- image_type == an image object that implements the interface defined in
dlib/image_processing/generic_image.h
- pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false
- img.size() > 0
- img.nr() == img.nc()
ensures
- Randomly jitters the image a little bit and returns this new jittered image.
To be specific, the returned image has the same size as img and will look
generally similar. The difference is that the returned image will have been
slightly rotated, zoomed, and translated. There is also a 50% chance it will
be mirrored left to right.
!*/
// ----------------------------------------------------------------------------------------
}

View File

@ -40,9 +40,9 @@ using namespace std;
// The dlib_face_recognition_resnet_model_v1 model used by this example was trained using
// essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the
// mini-batches were made larger (35x15 instead of 5x5), the iterations without progress
// was set to 10000, the jittering you can see below in jitter_image() was used during
// training, and the training dataset consisted of about 3 million images instead of 55.
// Also, the input layer was locked to images of size 150.
// was set to 10000, dlib::jitter_image() was used during training, and the training
// dataset consisted of about 3 million images instead of 55. Also, the input layer was
// locked to images of size 150.
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
@ -206,26 +206,14 @@ std::vector<matrix<rgb_pixel>> jitter_image(
)
{
// All this function does is make 100 copies of img, all slightly jittered by being
// zoomed, rotated, and translated a little bit differently.
thread_local random_cropper cropper;
cropper.set_chip_dims(150,150);
cropper.set_randomly_flip(true);
cropper.set_max_object_size(0.99999);
cropper.set_background_crops_fraction(0);
cropper.set_min_object_size(0.97);
cropper.set_translate_amount(0.02);
cropper.set_max_rotation_degrees(3);
// zoomed, rotated, and translated a little bit differently. They are also randomly
// mirrored left to right.
thread_local dlib::rand rnd;
std::vector<mmod_rect> raw_boxes(1), ignored_crop_boxes;
raw_boxes[0] = shrink_rect(get_rect(img),3);
std::vector<matrix<rgb_pixel>> crops;
matrix<rgb_pixel> temp;
for (int i = 0; i < 100; ++i)
{
cropper(img, raw_boxes, temp, ignored_crop_boxes);
crops.push_back(move(temp));
}
crops.push_back(jitter_image(img,rnd));
return crops;
}