diff --git a/tools/python/CMakeLists.txt b/tools/python/CMakeLists.txt index f091bde88..dd8aaa114 100644 --- a/tools/python/CMakeLists.txt +++ b/tools/python/CMakeLists.txt @@ -23,6 +23,7 @@ add_python_module(dlib src/svm_struct.cpp src/image.cpp src/object_detection.cpp + src/shape_predictor.cpp ) # When you run "make install" we will copy the compiled dlib.so (or dlib.pyd) diff --git a/tools/python/src/dlib.cpp b/tools/python/src/dlib.cpp index 3617b443e..4a100607d 100644 --- a/tools/python/src/dlib.cpp +++ b/tools/python/src/dlib.cpp @@ -15,6 +15,7 @@ void bind_sequence_segmenter(); void bind_svm_struct(); void bind_image_classes(); void bind_object_detection(); +void bind_shape_predictors(); BOOST_PYTHON_MODULE(dlib) @@ -35,5 +36,6 @@ BOOST_PYTHON_MODULE(dlib) bind_svm_struct(); bind_image_classes(); bind_object_detection(); + bind_shape_predictors(); } diff --git a/tools/python/src/shape_predictor.cpp b/tools/python/src/shape_predictor.cpp new file mode 100644 index 000000000..18fae11dd --- /dev/null +++ b/tools/python/src/shape_predictor.cpp @@ -0,0 +1,320 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. + +#include +#include +#include +#include +#include "shape_predictor.h" +#include "conversion.h" + +using namespace dlib; +using namespace std; +using namespace boost::python; + +// ---------------------------------------------------------------------------------------- + +full_object_detection run_predictor ( + shape_predictor& predictor, + object img, + object rect +) +{ + rectangle box = extract(rect); + if (is_gray_python_image(img)) + { + return predictor(numpy_gray_image(img), box); + } + else if (is_rgb_python_image(img)) + { + return predictor(numpy_rgb_image(img), box); + } + else + { + throw dlib::error("Unsupported image type, must be 8bit gray or RGB image."); + } +} + +// ---------------------------------------------------------------------------------------- + +rectangle full_obj_det_get_rect (const full_object_detection& detection) +{ return detection.get_rect(); } + +unsigned long full_obj_det_num_parts (const full_object_detection& detection) +{ return detection.num_parts(); } + +point full_obj_det_part (const full_object_detection& detection, const unsigned long idx) +{ + if (idx < 0 || idx >= detection.num_parts()) + { + PyErr_SetString(PyExc_IndexError, "Index out of range"); + boost::python::throw_error_already_set(); + } + return detection.part(idx); +} + +std::vector full_obj_det_parts (const full_object_detection& detection) +{ + const unsigned long num_parts = detection.num_parts(); + std::vector parts(num_parts); + for (unsigned long j = 0; j < num_parts; ++j) + parts[j] = detection.part(j); + return parts; +} + +boost::shared_ptr full_obj_det_init(object& pyrect, object& pyparts) +{ + const unsigned long num_parts = len(pyparts); + std::vector parts(num_parts); + rectangle rect = extract(pyrect); + + for (unsigned long j = 0; j < num_parts; ++j) + parts[j] = extract(pyparts[j]); + + return boost::shared_ptr(new full_object_detection(rect, parts)); +} + +// ---------------------------------------------------------------------------------------- + +inline void train_shape_predictor_on_images_py ( + const object& pyimages, + const object& pydetections, + const std::string& predictor_output_filename, + const shape_predictor_training_options& options +) +{ + const unsigned long num_images = len(pyimages); + if (num_images != len(pydetections)) + throw dlib::error("The length of the detections list must match the length of the images list."); + + std::vector > detections(num_images); + dlib::array > images(num_images); + images_and_nested_params_to_dlib(pyimages, pydetections, images, detections); + + train_shape_predictor_on_images("", images, detections, predictor_output_filename, options); +} + + +inline double test_shape_predictor_with_images_py ( + const object& pyimages, + const object& pydetections, + const object& pyscales, + const std::string& predictor_filename +) +{ + const unsigned long num_images = len(pyimages); + const unsigned long num_scales = len(pyscales); + if (num_images != len(pydetections)) + throw dlib::error("The length of the detections list must match the length of the images list."); + + if (num_scales > 0 && num_scales != num_images) + throw dlib::error("The length of the scales list must match the length of the detections list."); + + std::vector > detections(num_images); + std::vector > scales; + if (num_scales > 0) + scales.resize(num_scales); + dlib::array > images(num_images); + + // Now copy the data into dlib based objects so we can call the trainer. + for (unsigned long i = 0; i < num_images; ++i) + { + const unsigned long num_boxes = len(pydetections[i]); + for (unsigned long j = 0; j < num_boxes; ++j) + detections[i].push_back(extract(pydetections[i][j])); + + pyimage_to_dlib_image(pyimages[i], images[i]); + if (num_scales > 0) + { + if (num_boxes != len(pyscales[i])) + throw dlib::error("The length of the scales list must match the length of the detections list."); + for (unsigned long j = 0; j < num_boxes; ++j) + scales[i].push_back(extract(pyscales[i][j])); + } + } + + return test_shape_predictor_with_images(images, detections, scales, predictor_filename); +} + +inline double test_shape_predictor_with_images_no_scales_py ( + const object& pyimages, + const object& pydetections, + const std::string& predictor_filename +) +{ + boost::python::list pyscales; + return test_shape_predictor_with_images_py(pyimages, pydetections, pyscales, predictor_filename); +} + +// ---------------------------------------------------------------------------------------- + +void bind_shape_predictors() +{ + using boost::python::arg; + { + typedef full_object_detection type; + class_("full_object_detection", + "This object represents the location of an object in an image along with the \ + positions of each of its constituent parts.") + .def("__init__", make_constructor(&full_obj_det_init), +"requires \n\ + - rect: dlib rectangle \n\ + - parts: list of dlib points") + .add_property("rect", &full_obj_det_get_rect, "The bounding box of the parts.") + .add_property("num_parts", &full_obj_det_num_parts, "The number of parts of the object.") + .def("part", &full_obj_det_part, (arg("idx")), "A single part of the object as a dlib point.") + .def("parts", &full_obj_det_parts, "A vector of dlib points representing all of the parts.") + .def_pickle(serialize_pickle()); + } + { + typedef shape_predictor_training_options type; + class_("shape_predictor_training_options", + "This object is a container for the options to the train_shape_predictor() routine.") + .add_property("be_verbose", &type::be_verbose, + &type::be_verbose, + "If true, train_shape_predictor() will print out a lot of information to stdout while training.") + .add_property("cascade_depth", &type::cascade_depth, + &type::cascade_depth, + "The number of cascades created to train the model with.") + .add_property("tree_depth", &type::tree_depth, + &type::tree_depth, + "The depth of the trees used in each cascade. There are pow(2, get_tree_depth()) leaves in each tree") + .add_property("num_trees_per_cascade_level", &type::num_trees_per_cascade_level, + &type::num_trees_per_cascade_level, + "The number of trees created for each cascade.") + .add_property("nu", &type::nu, + &type::nu, + "The regularization parameter. Larger values of this parameter \ + will cause the algorithm to fit the training data better but may also \ + cause overfitting.") + .add_property("oversampling_amount", &type::oversampling_amount, + &type::oversampling_amount, + "The number of randomly selected initial starting points sampled for each training example") + .add_property("feature_pool_size", &type::feature_pool_size, + &type::feature_pool_size, + "Number of pixels used to generate features for the random trees.") + .add_property("lambda", &type::lambda, + &type::lambda, + "Controls how tight the feature sampling should be. Lower values enforce closer features.") + .add_property("num_test_splits", &type::num_test_splits, + &type::num_test_splits, + "Number of split features at each node to sample. The one that gives the best split is chosen.") + .add_property("feature_pool_region_padding", &type::feature_pool_region_padding, + &type::feature_pool_region_padding, + "Size of region within which to sample features for the feature pool, \ + e.g a padding of 0.5 would cause the algorithm to sample pixels from a box that was 2x2 pixels") + .add_property("random_seed", &type::random_seed, + &type::random_seed, + "The random seed used by the internal random number generator"); + } + { + typedef shape_predictor type; + class_("shape_predictor", +"This object is a tool that takes in an image region containing some object and \ +outputs a set of point locations that define the pose of the object. The classic \ +example of this is human face pose prediction, where you take an image of a human \ +face as input and are expected to identify the locations of important facial \ +landmarks such as the corners of the mouth and eyes, tip of the nose, and so forth.") + .def("__init__", make_constructor(&load_object_from_file), +"Loads a shape_predictor from a file that contains the output of the \n\ +train_shape_predictor() routine.") + .def("__call__", &run_predictor, (arg("image"), arg("box")), +"requires \n\ + - image is a numpy ndarray containing either an 8bit grayscale or RGB \n\ + image. \n\ + - box is the bounding box to begin the shape prediction inside. \n\ +ensures \n\ + - This function runs the shape predictor on the input image and returns \n\ + a single full object detection."); + } + { + def("train_shape_predictor", train_shape_predictor_on_images_py, + (arg("images"), arg("object_detections"), arg("detector_filename"), arg("options")), +"requires \n\ + - options.lambda > 0 \n\ + - options.nu > 0 \n\ + - options.feature_pool_region_padding >= 0 \n\ + - len(images) == len(object_detections) \n\ + - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\ + - object_detections should be a list of lists of dlib.full_object_detection objects. \ + Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\ +ensures \n\ + - Uses the shape_predictor_trainer to train a \n\ + shape_predictor based on the provided labeled images and full object detections.\n\ + - This function will apply a reasonable set of default parameters and \n\ + preprocessing techniques to the training procedure for shape_predictors \n\ + objects. So the point of this function is to provide you with a very easy \n\ + way to train a basic shape predictor. \n\ + - The trained shape predictor is serialized to the file predictor_output_filename."); + + def("train_shape_predictor", train_shape_predictor, + (arg("dataset_filename"), arg("predictor_output_filename"), arg("options")), +"requires \n\ + - options.lambda > 0 \n\ + - options.nu > 0 \n\ + - options.feature_pool_region_padding >= 0 \n\ +ensures \n\ + - Uses the shape_predictor_trainer to train a \n\ + shape_predictor based on the labeled images in the XML file \n\ + dataset_filename. This function assumes the file dataset_filename is in the \n\ + XML format produced by dlib's save_image_dataset_metadata() routine. \n\ + - This function will apply a reasonable set of default parameters and \n\ + preprocessing techniques to the training procedure for shape_predictors \n\ + objects. So the point of this function is to provide you with a very easy \n\ + way to train a basic shape predictor. \n\ + - The trained shape predictor is serialized to the file predictor_output_filename."); + + def("test_shape_predictor", test_shape_predictor_py, + (arg("dataset_filename"), arg("predictor_filename")), +"ensures \n\ + - Loads an image dataset from dataset_filename. We assume dataset_filename is \n\ + a file using the XML format written by save_image_dataset_metadata(). \n\ + - Loads a shape_predictor from the file predictor_filename. This means \n\ + predictor_filename should be a file produced by the train_shape_predictor() \n\ + routine. \n\ + - This function tests the predictor against the dataset and returns the \n\ + mean average error of the detector. In fact, The \n\ + return value of this function is identical to that of dlib's \n\ + shape_predictor_trainer() routine. Therefore, see the documentation \n\ + for shape_predictor_trainer() for a detailed definition of the mean average error."); + + def("test_shape_predictor", test_shape_predictor_with_images_no_scales_py, + (arg("images"), arg("detections"), arg("predictor_filename")), +"requires \n\ + - len(images) == len(object_detections) \n\ + - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\ + - object_detections should be a list of lists of dlib.full_object_detection objects. \ + Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\ + ensures \n\ + - Loads a shape_predictor from the file predictor_filename. This means \n\ + predictor_filename should be a file produced by the train_shape_predictor() \n\ + routine. \n\ + - This function tests the predictor against the dataset and returns the \n\ + mean average error of the detector. In fact, The \n\ + return value of this function is identical to that of dlib's \n\ + shape_predictor_trainer() routine. Therefore, see the documentation \n\ + for shape_predictor_trainer() for a detailed definition of the mean average error."); + + + def("test_shape_predictor", test_shape_predictor_with_images_py, + (arg("images"), arg("detections"), arg("scales"), arg("predictor_filename")), +"requires \n\ + - len(images) == len(object_detections) \n\ + - len(object_detections) == len(scales) \n\ + - for every sublist in object_detections: len(object_detections[i]) == len(scales[i]) \n\ + - scales is a list of floating point scales that each predicted part location \ + should be divided by. Useful for normalization. \n\ + - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\ + - object_detections should be a list of lists of dlib.full_object_detection objects. \ + Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\ + ensures \n\ + - Loads a shape_predictor from the file predictor_filename. This means \n\ + predictor_filename should be a file produced by the train_shape_predictor() \n\ + routine. \n\ + - This function tests the predictor against the dataset and returns the \n\ + mean average error of the detector. In fact, The \n\ + return value of this function is identical to that of dlib's \n\ + shape_predictor_trainer() routine. Therefore, see the documentation \n\ + for shape_predictor_trainer() for a detailed definition of the mean average error."); + } +} diff --git a/tools/python/src/shape_predictor.h b/tools/python/src/shape_predictor.h new file mode 100644 index 000000000..e06bb491d --- /dev/null +++ b/tools/python/src/shape_predictor.h @@ -0,0 +1,191 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_SHAPE_PREDICTOR_DETECTOR_H__ +#define DLIB_SHAPE_PREDICTOR_DETECTOR_H__ + +#include "dlib/string.h" +#include "dlib/geometry.h" +#include "dlib/data_io/load_image_dataset.h" +#include "dlib/image_processing.h" + +using namespace std; + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + struct shape_predictor_training_options + { + shape_predictor_training_options() + { + be_verbose = false; + cascade_depth = 10; + tree_depth = 4; + num_trees_per_cascade_level = 500; + nu = 0.1; + oversampling_amount = 20; + feature_pool_size = 400; + lambda = 0.1; + num_test_splits = 20; + feature_pool_region_padding = 0; + random_seed = ""; + } + + bool be_verbose; + unsigned long cascade_depth; + unsigned long tree_depth; + unsigned long num_trees_per_cascade_level; + double nu; + unsigned long oversampling_amount; + unsigned long feature_pool_size; + double lambda; + unsigned long num_test_splits; + double feature_pool_region_padding; + std::string random_seed; + }; + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + inline bool contains_any_detections ( + const std::vector >& detections + ) + { + for (unsigned long i = 0; i < detections.size(); ++i) + { + if (detections[i].size() != 0) + return true; + } + return false; + } + } + +// ---------------------------------------------------------------------------------------- + + template + inline void train_shape_predictor_on_images ( + const std::string& dataset_filename, // can be "" if it's not applicable + image_array& images, + std::vector >& detections, + const std::string& predictor_output_filename, + const shape_predictor_training_options& options + ) + { + if (options.lambda <= 0) + throw error("Invalid lambda value given to train_shape_predictor(), lambda must be > 0."); + if (options.nu <= 0) + throw error("Invalid nu value given to train_shape_predictor(), nu must be > 0."); + if (options.feature_pool_region_padding < 0) + throw error("Invalid feature_pool_region_padding value given to train_shape_predictor(), feature_pool_region_padding must be >= 0."); + + if (images.size() != detections.size()) + throw error("The list of images must have the same length as the list of detections."); + + if (!impl::contains_any_detections(detections)) + throw error("Error, the training dataset does not have any labeled object detections in it."); + + shape_predictor_trainer trainer; + + trainer.set_cascade_depth(options.cascade_depth); + trainer.set_tree_depth(options.tree_depth); + trainer.set_num_trees_per_cascade_level(options.num_trees_per_cascade_level); + trainer.set_nu(options.nu); + trainer.set_random_seed(options.random_seed); + trainer.set_oversampling_amount(options.oversampling_amount); + trainer.set_feature_pool_size(options.feature_pool_size); + trainer.set_feature_pool_region_padding(options.feature_pool_region_padding); + trainer.set_lambda(options.lambda); + trainer.set_num_test_splits(options.num_test_splits); + + if (options.be_verbose) + { + std::cout << "Training with cascade depth: " << options.cascade_depth << std::endl; + std::cout << "Training with tree depth: " << options.tree_depth << std::endl; + std::cout << "Training with " << options.num_trees_per_cascade_level << " trees per cascade level."<< std::endl; + std::cout << "Training with nu: " << options.nu << std::endl; + std::cout << "Training with random seed: " << options.random_seed << std::endl; + std::cout << "Training with oversampling amount: " << options.oversampling_amount << std::endl; + std::cout << "Training with feature pool size: " << options.feature_pool_size << std::endl; + std::cout << "Training with feature pool region padding: " << options.feature_pool_region_padding << std::endl; + std::cout << "Training with lambda: " << options.lambda << std::endl; + std::cout << "Training with " << options.num_test_splits << " split tests."<< std::endl; + trainer.be_verbose(); + } + + shape_predictor predictor = trainer.train(images, detections); + + std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary); + int version = 1; + serialize(predictor, fout); + serialize(version, fout); + + if (options.be_verbose) + std::cout << "Training complete, saved predictor to file " << predictor_output_filename << std::endl; + } + + inline void train_shape_predictor ( + const std::string& dataset_filename, + const std::string& predictor_output_filename, + const shape_predictor_training_options& options + ) + { + dlib::array > images; + std::vector > objects; + load_image_dataset(images, objects, dataset_filename); + + train_shape_predictor_on_images(dataset_filename, images, objects, predictor_output_filename, options); + } + +// ---------------------------------------------------------------------------------------- + + template + inline double test_shape_predictor_with_images ( + image_array& images, + std::vector >& detections, + std::vector >& scales, + const std::string& predictor_filename + ) + { + if (images.size() != detections.size()) + throw error("The list of images must have the same length as the list of detections."); + if (scales.size() > 0 && scales.size() != images.size()) + throw error("The list of scales must have the same length as the list of detections."); + + shape_predictor predictor; + int version = 0; + std::ifstream fin(predictor_filename.c_str(), std::ios::binary); + if (!fin) + throw error("Unable to open file " + predictor_filename); + deserialize(predictor, fin); + deserialize(version, fin); + if (version != 1) + throw error("Unknown shape_predictor format."); + + if (scales.size() > 0) + return test_shape_predictor(predictor, images, detections, scales); + else + return test_shape_predictor(predictor, images, detections); + } + + inline double test_shape_predictor_py ( + const std::string& dataset_filename, + const std::string& predictor_filename + ) + { + dlib::array > images; + // This interface cannot take the scales parameter. + std::vector > scales; + std::vector > objects; + load_image_dataset(images, objects, dataset_filename); + + return test_shape_predictor_with_images(images, objects, scales, predictor_filename); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_SHAPE_PREDICTOR_DETECTOR_H__ +