mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Added comments
This commit is contained in:
parent
d53d49ebb8
commit
56f4e19afa
@ -1,3 +1,47 @@
|
||||
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
||||
/*
|
||||
This example shows how to run a CNN based dog face detector using dlib. The
|
||||
example loads a pretrained model and uses it to find dog faces in images.
|
||||
We also use the dlib::shape_predictor to find the location of the eyes and
|
||||
nose and then draw glasses and a mustache onto each dog found :)
|
||||
|
||||
|
||||
Users who are just learning about dlib's deep learning API should read the
|
||||
dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples to learn how
|
||||
the API works. For an introduction to the object detection method you
|
||||
should read dnn_mmod_ex.cpp
|
||||
|
||||
|
||||
|
||||
TRAINING THE MODEL
|
||||
Finally, users interested in how the dog face detector was trained should
|
||||
read the dnn_mmod_ex.cpp example program. It should be noted that the
|
||||
dog face detector used in this example uses a bigger training dataset and
|
||||
larger CNN architecture than what is shown in dnn_mmod_ex.cpp, but
|
||||
otherwise training is the same. If you compare the net_type statements
|
||||
in this file and dnn_mmod_ex.cpp you will see that they are very similar
|
||||
except that the number of parameters has been increased.
|
||||
|
||||
Additionally, the following training parameters were different during
|
||||
training: The following lines in dnn_mmod_ex.cpp were changed from
|
||||
mmod_options options(face_boxes_train, 40*40);
|
||||
trainer.set_iterations_without_progress_threshold(300);
|
||||
to the following when training the model used in this example:
|
||||
mmod_options options(face_boxes_train, 80*80);
|
||||
trainer.set_iterations_without_progress_threshold(8000);
|
||||
|
||||
Also, the random_cropper was left at its default settings, So we didn't
|
||||
call these functions:
|
||||
cropper.set_chip_dims(200, 200);
|
||||
cropper.set_min_object_height(0.2);
|
||||
|
||||
The training data used to create the model is also available at
|
||||
http://dlib.net/files/data/CU_dogs_fully_labeled.tar.gz
|
||||
|
||||
Lastly, the shape_predictor was trained with default settings except we
|
||||
used the following non-default settings: cascade depth=20, tree
|
||||
depth=5, padding=0.2
|
||||
*/
|
||||
|
||||
|
||||
#include <iostream>
|
||||
@ -10,30 +54,6 @@
|
||||
using namespace std;
|
||||
using namespace dlib;
|
||||
|
||||
|
||||
/*
|
||||
Training differences with dnn_mmod_ex.cpp
|
||||
|
||||
A slightly bigger network architecture. Also, to train you must replace the affine layers with bn_con layers.
|
||||
|
||||
mmod_options options(training_labels, 80*80);
|
||||
instead of
|
||||
mmod_options options(face_boxes_train, 40*40);
|
||||
|
||||
trainer.set_iterations_without_progress_threshold(8000);
|
||||
instead of
|
||||
trainer.set_iterations_without_progress_threshold(300);
|
||||
|
||||
random cropper was left at its default settings, So we didn't call these functions:
|
||||
cropper.set_chip_dims(200, 200);
|
||||
cropper.set_min_object_height(0.2);
|
||||
|
||||
|
||||
|
||||
// shape predictor was trained with these settings: tree cascade depth=20, tree depth=5, padding=0.2
|
||||
*/
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
|
||||
@ -46,20 +66,19 @@ using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
int main(int argc, char** argv) try
|
||||
{
|
||||
if (argc < 3)
|
||||
{
|
||||
cout << "Give the path to the examples/faces directory as the argument to this" << endl;
|
||||
cout << "program. For example, if you are in the examples folder then execute " << endl;
|
||||
cout << "this program by running: " << endl;
|
||||
cout << " ./fhog_object_detector_ex faces" << endl;
|
||||
cout << endl;
|
||||
cout << "Call this program like this:" << endl;
|
||||
cout << "./dnn_mmod_dog_hipsterizer mmod_dog_hipsterizer.dat faces/dogs.jpg" << endl;
|
||||
cout << "\nYou can get the mmod_dog_hipsterizer.dat file from:\n";
|
||||
cout << "http://dlib.net/files/mmod_dog_hipsterizer.dat.bz2" << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// load the models as well as glasses and mustache.
|
||||
net_type net;
|
||||
shape_predictor sp;
|
||||
matrix<rgb_alpha_pixel> glasses, mustache;
|
||||
@ -67,11 +86,12 @@ int main(int argc, char** argv) try
|
||||
pyramid_up(glasses);
|
||||
pyramid_up(mustache);
|
||||
|
||||
// right eye (59,35), left eye (176,36)
|
||||
image_window win1(glasses);
|
||||
image_window win2(mustache);
|
||||
|
||||
image_window win_wireframe, win_hipster;
|
||||
|
||||
// Now process each image, find dogs, and hipsterize them by drawing glasses and a
|
||||
// mustache on each dog :)
|
||||
for (int i = 2; i < argc; ++i)
|
||||
{
|
||||
matrix<rgb_pixel> img;
|
||||
@ -84,9 +104,12 @@ int main(int argc, char** argv) try
|
||||
auto dets = net(img);
|
||||
win_wireframe.clear_overlay();
|
||||
win_wireframe.set_image(img);
|
||||
// We will also draw a wireframe on each dog's face so you can see where the
|
||||
// shape_predictor is identifying face landmarks.
|
||||
std::vector<image_window::overlay_line> lines;
|
||||
for (auto&& d : dets)
|
||||
{
|
||||
// get the landmarks for this dog's face
|
||||
auto shape = sp(img, d.rect);
|
||||
|
||||
const rgb_pixel color(0,255,0);
|
||||
@ -97,9 +120,11 @@ int main(int argc, char** argv) try
|
||||
auto rear = shape.part(4);
|
||||
auto reye = shape.part(5);
|
||||
|
||||
// The locations of the left and right ends of the mustache.
|
||||
auto lmustache = 1.3*(leye-reye)/2 + nose;
|
||||
auto rmustache = 1.3*(reye-leye)/2 + nose;
|
||||
|
||||
// Draw the glasses onto the image.
|
||||
std::vector<point> from = {2*point(176,36), 2*point(59,35)}, to = {leye, reye};
|
||||
auto tform = find_similarity_transform(from, to);
|
||||
for (long r = 0; r < glasses.nr(); ++r)
|
||||
@ -111,6 +136,8 @@ int main(int argc, char** argv) try
|
||||
assign_pixel(img(p.y(),p.x()), glasses(r,c));
|
||||
}
|
||||
}
|
||||
|
||||
// Draw the mustache onto the image right under the dog's nose.
|
||||
auto mrect = get_rect(mustache);
|
||||
from = {mrect.tl_corner(), mrect.tr_corner()};
|
||||
to = {rmustache, lmustache};
|
||||
@ -126,6 +153,7 @@ int main(int argc, char** argv) try
|
||||
}
|
||||
|
||||
|
||||
// Record the lines needed for the face wire frame.
|
||||
lines.push_back(image_window::overlay_line(leye, nose, color));
|
||||
lines.push_back(image_window::overlay_line(nose, reye, color));
|
||||
lines.push_back(image_window::overlay_line(reye, leye, color));
|
||||
@ -138,6 +166,7 @@ int main(int argc, char** argv) try
|
||||
win_wireframe.add_overlay(lines);
|
||||
win_hipster.set_image(img);
|
||||
|
||||
cout << "Hit enter to process the next image." << endl;
|
||||
cin.get();
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +1,45 @@
|
||||
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
||||
/*
|
||||
This example shows how to run a CNN based face detector using dlib. The
|
||||
example loads a pretrained model and uses it to find faces in images. The
|
||||
CNN model is much more accurate than the HOG based model shown in the
|
||||
face_detection_ex.cpp example, but takes much more computational power to
|
||||
run, and is meant to be executed on a GPU to attain reasonable speed. For
|
||||
example, on a NVIDIA Titan X GPU, this example program processes images at
|
||||
about the same speed as face_detection_ex.cpp.
|
||||
|
||||
Also, users who are just learning about dlib's deep learning API should read
|
||||
the dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp examples to learn
|
||||
how the API works. For an introduction to the object detection method you
|
||||
should read dnn_mmod_ex.cpp
|
||||
|
||||
|
||||
|
||||
TRAINING THE MODEL
|
||||
Finally, users interested in how the face detector was trained should
|
||||
read the dnn_mmod_ex.cpp example program. It should be noted that the
|
||||
face detector used in this example uses a bigger training dataset and
|
||||
larger CNN architecture than what is shown in dnn_mmod_ex.cpp, but
|
||||
otherwise training is the same. If you compare the net_type statements
|
||||
in this file and dnn_mmod_ex.cpp you will see that they are very similar
|
||||
except that the number of parameters has been increased.
|
||||
|
||||
Additionally, the following training parameters were different during
|
||||
training: The following lines in dnn_mmod_ex.cpp were changed from
|
||||
mmod_options options(face_boxes_train, 40*40);
|
||||
trainer.set_iterations_without_progress_threshold(300);
|
||||
to the following when training the model used in this example:
|
||||
mmod_options options(face_boxes_train, 80*80);
|
||||
trainer.set_iterations_without_progress_threshold(8000);
|
||||
|
||||
Also, the random_cropper was left at its default settings, So we didn't
|
||||
call these functions:
|
||||
cropper.set_chip_dims(200, 200);
|
||||
cropper.set_min_object_height(0.2);
|
||||
|
||||
The training data used to create the model is also available at
|
||||
http://dlib.net/files/data/dlib_face_detection_dataset-2016-09-30.tar.gz
|
||||
*/
|
||||
|
||||
|
||||
#include <iostream>
|
||||
@ -10,26 +52,6 @@
|
||||
using namespace std;
|
||||
using namespace dlib;
|
||||
|
||||
|
||||
/*
|
||||
Training differences with dnn_mmod_ex.cpp
|
||||
|
||||
A slightly bigger network architecture. Also, to train you must replace the affine layers with bn_con layers.
|
||||
|
||||
mmod_options options(training_labels, 80*80);
|
||||
instead of
|
||||
mmod_options options(face_boxes_train, 40*40);
|
||||
|
||||
trainer.set_iterations_without_progress_threshold(8000);
|
||||
instead of
|
||||
trainer.set_iterations_without_progress_threshold(300);
|
||||
|
||||
random cropper was left at its default settings, So we didn't call these functions:
|
||||
cropper.set_chip_dims(200, 200);
|
||||
cropper.set_min_object_height(0.2);
|
||||
*/
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
|
||||
@ -45,13 +67,12 @@ using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb
|
||||
|
||||
int main(int argc, char** argv) try
|
||||
{
|
||||
if (argc < 3)
|
||||
if (argc == 1)
|
||||
{
|
||||
cout << "Give the path to the examples/faces directory as the argument to this" << endl;
|
||||
cout << "program. For example, if you are in the examples folder then execute " << endl;
|
||||
cout << "this program by running: " << endl;
|
||||
cout << " ./fhog_object_detector_ex faces" << endl;
|
||||
cout << endl;
|
||||
cout << "Call this program like this:" << endl;
|
||||
cout << "./dnn_mmod_face_detection_ex mmod_human_face_detector.dat faces/*.jpg" << endl;
|
||||
cout << "\nYou can get the mmod_human_face_detector.dat file from:\n";
|
||||
cout << "http://dlib.net/files/mmod_human_face_detector.dat.bz2" << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -71,15 +92,17 @@ int main(int argc, char** argv) try
|
||||
pyramid_up(img);
|
||||
|
||||
// Note that you can process a bunch of images in a std::vector at once and it runs
|
||||
// faster, since this will form mini-batches of images and therefore get better
|
||||
// parallelism out of your GPU hardware. However, all the images must be the same
|
||||
// size. To avoid this requirement on images being the same size we process them
|
||||
// individually in this example.
|
||||
// much faster, since this will form mini-batches of images and therefore get
|
||||
// better parallelism out of your GPU hardware. However, all the images must be
|
||||
// the same size. To avoid this requirement on images being the same size we
|
||||
// process them individually in this example.
|
||||
auto dets = net(img);
|
||||
win.clear_overlay();
|
||||
win.set_image(img);
|
||||
for (auto&& d : dets)
|
||||
win.add_overlay(d);
|
||||
|
||||
cout << "Hit enter to process the next image." << endl;
|
||||
cin.get();
|
||||
}
|
||||
}
|
||||
@ -89,5 +112,3 @@ catch(std::exception& e)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user