mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Cleaned up jitter_image() code and moved it into dlib proper.
This commit is contained in:
parent
782f4f4825
commit
369f2b32e8
@ -11,6 +11,7 @@
|
||||
#include "../simd.h"
|
||||
#include "../image_processing/full_object_detection.h"
|
||||
#include <limits>
|
||||
#include "../rand.h"
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
@ -2103,6 +2104,45 @@ namespace dlib
|
||||
return res;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
image_type jitter_image(
|
||||
const image_type& img,
|
||||
dlib::rand& rnd
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(num_rows(img)*num_columns(img) != 0);
|
||||
DLIB_CASSERT(num_rows(img)==num_columns(img));
|
||||
|
||||
const double max_rotation_degrees = 3;
|
||||
const double min_object_height = 0.97;
|
||||
const double max_object_height = 0.99999;
|
||||
const double translate_amount = 0.02;
|
||||
|
||||
|
||||
const auto rect = shrink_rect(get_rect(img),3);
|
||||
|
||||
// perturb the location of the crop by a small fraction of the object's size.
|
||||
const point rand_translate = dpoint(rnd.get_double_in_range(-translate_amount,translate_amount)*rect.width(),
|
||||
rnd.get_double_in_range(-translate_amount,translate_amount)*rect.height());
|
||||
|
||||
// perturb the scale of the crop by a fraction of the object's size
|
||||
const double rand_scale_perturb = rnd.get_double_in_range(min_object_height, max_object_height);
|
||||
|
||||
const long box_size = rect.height()/rand_scale_perturb;
|
||||
const auto crop_rect = centered_rect(center(rect)+rand_translate, box_size, box_size);
|
||||
const double angle = rnd.get_double_in_range(-max_rotation_degrees, max_rotation_degrees)*pi/180;
|
||||
image_type crop;
|
||||
extract_image_chip(img, chip_details(crop_rect, chip_dims(img.nr(),img.nc()), angle), crop);
|
||||
if (rnd.get_random_double() > 0.5)
|
||||
flip_image_left_right(crop);
|
||||
|
||||
return crop;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -1411,6 +1411,29 @@ namespace dlib
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_type
|
||||
>
|
||||
image_type jitter_image(
|
||||
const image_type& img,
|
||||
dlib::rand& rnd
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_type == an image object that implements the interface defined in
|
||||
dlib/image_processing/generic_image.h
|
||||
- pixel_traits<typename image_traits<image_type>::pixel_type>::has_alpha == false
|
||||
- img.size() > 0
|
||||
- img.nr() == img.nc()
|
||||
ensures
|
||||
- Randomly jitters the image a little bit and returns this new jittered image.
|
||||
To be specific, the returned image has the same size as img and will look
|
||||
generally similar. The difference is that the returned image will have been
|
||||
slightly rotated, zoomed, and translated. There is also a 50% chance it will
|
||||
be mirrored left to right.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -40,9 +40,9 @@ using namespace std;
|
||||
// The dlib_face_recognition_resnet_model_v1 model used by this example was trained using
|
||||
// essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the
|
||||
// mini-batches were made larger (35x15 instead of 5x5), the iterations without progress
|
||||
// was set to 10000, the jittering you can see below in jitter_image() was used during
|
||||
// training, and the training dataset consisted of about 3 million images instead of 55.
|
||||
// Also, the input layer was locked to images of size 150.
|
||||
// was set to 10000, dlib::jitter_image() was used during training, and the training
|
||||
// dataset consisted of about 3 million images instead of 55. Also, the input layer was
|
||||
// locked to images of size 150.
|
||||
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
|
||||
using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
|
||||
|
||||
@ -206,26 +206,14 @@ std::vector<matrix<rgb_pixel>> jitter_image(
|
||||
)
|
||||
{
|
||||
// All this function does is make 100 copies of img, all slightly jittered by being
|
||||
// zoomed, rotated, and translated a little bit differently.
|
||||
thread_local random_cropper cropper;
|
||||
cropper.set_chip_dims(150,150);
|
||||
cropper.set_randomly_flip(true);
|
||||
cropper.set_max_object_size(0.99999);
|
||||
cropper.set_background_crops_fraction(0);
|
||||
cropper.set_min_object_size(0.97);
|
||||
cropper.set_translate_amount(0.02);
|
||||
cropper.set_max_rotation_degrees(3);
|
||||
// zoomed, rotated, and translated a little bit differently. They are also randomly
|
||||
// mirrored left to right.
|
||||
thread_local dlib::rand rnd;
|
||||
|
||||
std::vector<mmod_rect> raw_boxes(1), ignored_crop_boxes;
|
||||
raw_boxes[0] = shrink_rect(get_rect(img),3);
|
||||
std::vector<matrix<rgb_pixel>> crops;
|
||||
|
||||
matrix<rgb_pixel> temp;
|
||||
for (int i = 0; i < 100; ++i)
|
||||
{
|
||||
cropper(img, raw_boxes, temp, ignored_crop_boxes);
|
||||
crops.push_back(move(temp));
|
||||
}
|
||||
crops.push_back(jitter_image(img,rnd));
|
||||
|
||||
return crops;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user