mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
improved examples
This commit is contained in:
parent
07541b4255
commit
5e4aaf2e53
@ -16,6 +16,9 @@
|
||||
Vahid Kazemi and Josephine Sullivan, CVPR 2014
|
||||
and was trained on the iBUG 300-W face landmark dataset.
|
||||
|
||||
Also, note that you can train your own models using dlib's machine learning
|
||||
tools. See train_shape_predictor_ex.cpp to see an example.
|
||||
|
||||
|
||||
|
||||
|
||||
@ -67,8 +70,10 @@ int main(int argc, char** argv)
|
||||
// We need a face detector. We will use this to get bounding boxes for
|
||||
// each face in an image.
|
||||
frontal_face_detector detector = get_frontal_face_detector();
|
||||
// And we also need a shape_predictor. This takes as input an image and bounding
|
||||
// box and outputs a fully landmarked face shape.
|
||||
// And we also need a shape_predictor. This is the tool that will predict face
|
||||
// landmark positions given an image and face bounding box. Here we are just
|
||||
// loading the model from the shape_predictor_68_face_landmarks.dat file you gave
|
||||
// as a command line argument.
|
||||
shape_predictor sp;
|
||||
deserialize(argv[1]) >> sp;
|
||||
|
||||
|
@ -1,12 +1,19 @@
|
||||
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
||||
/*
|
||||
|
||||
|
||||
|
||||
The pose estimator was created by using dlib's implementation of the paper:
|
||||
This example program shows how to use dlib's implementation of the paper:
|
||||
One Millisecond Face Alignment with an Ensemble of Regression Trees by
|
||||
Vahid Kazemi and Josephine Sullivan, CVPR 2014
|
||||
|
||||
In particular, we will train a face landmarking model based on a small dataset
|
||||
and then evaluate it. If you want to visualize the output of the trained
|
||||
model on some images then you can run the face_landmark_detection_ex.cpp
|
||||
example program with sp.dat as the input model.
|
||||
|
||||
It should also be noted that this kind of model, while often used for face
|
||||
landmarking, is quite general and can be used for a variety of shape
|
||||
prediction tasks. But here we demonstrate it only on a simple face
|
||||
landmarking task.
|
||||
*/
|
||||
|
||||
|
||||
@ -22,6 +29,12 @@ using namespace std;
|
||||
std::vector<std::vector<double> > get_interocular_distances (
|
||||
const std::vector<std::vector<full_object_detection> >& objects
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- returns an object D such that:
|
||||
- D[i][j] == the distance, in pixels, between the eyes for the face represented
|
||||
by objects[i][j].
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
@ -58,28 +71,66 @@ int main(int argc, char** argv)
|
||||
// running it on the testing images.
|
||||
//
|
||||
// So here we create the variables that will hold our dataset.
|
||||
// images_train will hold the 4 training images and face_boxes_train
|
||||
// holds the locations of the faces in the training images. So for
|
||||
// images_train will hold the 4 training images and faces_train holds
|
||||
// the locations and poses of each face in the training images. So for
|
||||
// example, the image images_train[0] has the faces given by the
|
||||
// full_object_detections in face_boxes_train[0].
|
||||
// full_object_detections in faces_train[0].
|
||||
dlib::array<array2d<unsigned char> > images_train, images_test;
|
||||
std::vector<std::vector<full_object_detection> > faces_train, faces_test;
|
||||
|
||||
// Now we load the data. These XML files list the images in each
|
||||
// dataset and also contain the positions of the face boxes and landmark
|
||||
// (called parts in the XML file). Obviously you can use any kind of
|
||||
// input format you like so long as you store the data into images_train
|
||||
// and faces_train.
|
||||
// dataset and also contain the positions of the face boxes and
|
||||
// landmarks (called parts in the XML file). Obviously you can use any
|
||||
// kind of input format you like so long as you store the data into
|
||||
// images_train and faces_train.
|
||||
load_image_dataset(images_train, faces_train, faces_directory+"/training_with_face_landmarks.xml");
|
||||
load_image_dataset(images_test, faces_test, faces_directory+"/testing_with_face_landmarks.xml");
|
||||
|
||||
// Now make the object responsible for training the model.
|
||||
shape_predictor_trainer trainer;
|
||||
// This algorithm has a bunch of parameters you can mess with. The
|
||||
// documentation for the shape_predictor_trainer explains all of them.
|
||||
// You should also read Kazemi paper which explains all the parameters
|
||||
// in great detail. However, here I'm just setting three of them
|
||||
// differently than their default values. I'm doing this because we
|
||||
// have a very small dataset. In particular, setting the oversampling
|
||||
// to a high amount (300) effectively boosts the training set size, so
|
||||
// that helps this example.
|
||||
trainer.set_oversampling_amount(300);
|
||||
// I'm also reducing the capacity of the model by explicitly increasing
|
||||
// the regularization (making nu smaller) and by using trees with
|
||||
// smaller depths.
|
||||
trainer.set_nu(0.05);
|
||||
trainer.set_tree_depth(2);
|
||||
|
||||
|
||||
// Tell the trainer to print status messages to the console so we can
|
||||
// see how long the training will take.
|
||||
trainer.be_verbose();
|
||||
|
||||
// Now finally generate the shape model
|
||||
shape_predictor sp = trainer.train(images_train, faces_train);
|
||||
|
||||
|
||||
cout << "mean training error: "<< test_shape_predictor(sp, images_train, faces_train, get_interocular_distances(faces_train)) << endl;
|
||||
cout << "mean testing error: "<< test_shape_predictor(sp, images_test, faces_test, get_interocular_distances(faces_test)) << endl;
|
||||
// Now that we have a model we can test it. This function measures the
|
||||
// average distance between a face landmark output by the
|
||||
// shape_predictor and where it should be according to the truth data.
|
||||
// Note that there is an optional 4th argument that lets us rescale the
|
||||
// distances. Here we are causing the output to scale each face's
|
||||
// distances by the interocular distance, as is customary when
|
||||
// evaluating face landmarking systems.
|
||||
cout << "mean training error: "<<
|
||||
test_shape_predictor(sp, images_train, faces_train, get_interocular_distances(faces_train)) << endl;
|
||||
|
||||
// The real test is to see how well it does on data it wasn't trained
|
||||
// on. We trained it on a very small dataset so the accuracy is not
|
||||
// extremely high, but it's still doing quite good. Moreover, if you
|
||||
// train it on one of the large face landmarking datasets you will
|
||||
// obtain state-of-the-art results, as shown in the Kazemi paper.
|
||||
cout << "mean testing error: "<<
|
||||
test_shape_predictor(sp, images_test, faces_test, get_interocular_distances(faces_test)) << endl;
|
||||
|
||||
// Finally, we save the model to disk so we can use it later.
|
||||
serialize("sp.dat") << sp;
|
||||
}
|
||||
catch (exception& e)
|
||||
|
Loading…
Reference in New Issue
Block a user