Add wrappers for the shape predictors

This includes the full_object_detection, a new struct in the same vein as the simple_object_detector_training_options and of course, the shape predictor classes themselves. All of training, fitting and testing are wrapped.
2024-11-01 10:14:53 +08:00 · 2014-12-10 17:25:20 +00:00 · 2014-12-10 17:25:20 +00:00 · e3aee32f34
commit e3aee32f34
parent 315a2b1cb1
4 changed files with 514 additions and 0 deletions
--- a/tools/python/CMakeLists.txt
+++ b/tools/python/CMakeLists.txt
@ -23,6 +23,7 @@ add_python_module(dlib
   src/svm_struct.cpp
   src/image.cpp
   src/object_detection.cpp
   src/shape_predictor.cpp
   )
 # When you run "make install" we will copy the compiled dlib.so (or dlib.pyd) 
--- a/tools/python/src/dlib.cpp
+++ b/tools/python/src/dlib.cpp
@ -15,6 +15,7 @@ void bind_sequence_segmenter();
 void bind_svm_struct();
 void bind_image_classes();
 void bind_object_detection();
 void bind_shape_predictors();
 BOOST_PYTHON_MODULE(dlib)
@ -35,5 +36,6 @@ BOOST_PYTHON_MODULE(dlib)
    bind_svm_struct();
    bind_image_classes();
    bind_object_detection();
    bind_shape_predictors();
 }
--- a/tools/python/src/shape_predictor.cpp
+++ b/tools/python/src/shape_predictor.cpp
@ -0,0 +1,320 @@
 // Copyright (C) 2014  Davis E. King (davis@dlib.net)
 // License: Boost Software License   See LICENSE.txt for the full license.
 #include <dlib/python.h>
 #include <dlib/geometry.h>
 #include <boost/python/args.hpp>
 #include <dlib/image_processing.h>
 #include "shape_predictor.h"
 #include "conversion.h"
 using namespace dlib;
 using namespace std;
 using namespace boost::python;
 // ----------------------------------------------------------------------------------------
 full_object_detection run_predictor (
        shape_predictor& predictor,
        object img,
        object rect
 )
 {
    rectangle box = extract<rectangle>(rect);
    if (is_gray_python_image(img))
    {
        return predictor(numpy_gray_image(img), box);
    }
    else if (is_rgb_python_image(img))
    {
        return predictor(numpy_rgb_image(img), box);
    }
    else
    {
        throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
    }
 }
 // ----------------------------------------------------------------------------------------
 rectangle full_obj_det_get_rect (const full_object_detection& detection)
 { return detection.get_rect(); }
 unsigned long full_obj_det_num_parts (const full_object_detection& detection)
 { return detection.num_parts(); }
 point full_obj_det_part (const full_object_detection& detection, const unsigned long idx)
 {
    if (idx < 0 || idx >= detection.num_parts())
    {
        PyErr_SetString(PyExc_IndexError, "Index out of range");
        boost::python::throw_error_already_set();
    }
    return detection.part(idx);
 }
 std::vector<point> full_obj_det_parts (const full_object_detection& detection)
 {
    const unsigned long num_parts = detection.num_parts();
    std::vector<point> parts(num_parts);
    for (unsigned long j = 0; j < num_parts; ++j)
        parts[j] = detection.part(j);
    return parts;
 }
 boost::shared_ptr<full_object_detection> full_obj_det_init(object& pyrect, object& pyparts)
 {
    const unsigned long num_parts = len(pyparts);
    std::vector<point> parts(num_parts);
    rectangle rect = extract<rectangle>(pyrect);
    for (unsigned long j = 0; j < num_parts; ++j)
        parts[j] = extract<point>(pyparts[j]);
    return boost::shared_ptr<full_object_detection>(new full_object_detection(rect, parts));
 }
 // ----------------------------------------------------------------------------------------
 inline void train_shape_predictor_on_images_py (
        const object& pyimages,
        const object& pydetections,
        const std::string& predictor_output_filename,
        const shape_predictor_training_options& options
 )
 {
    const unsigned long num_images = len(pyimages);
    if (num_images != len(pydetections))
        throw dlib::error("The length of the detections list must match the length of the images list.");
    std::vector<std::vector<full_object_detection> > detections(num_images);
    dlib::array<array2d<rgb_pixel> > images(num_images);
    images_and_nested_params_to_dlib(pyimages, pydetections, images, detections);
    train_shape_predictor_on_images("", images, detections, predictor_output_filename, options);
 }
 inline double test_shape_predictor_with_images_py (
        const object& pyimages,
        const object& pydetections,
        const object& pyscales,
        const std::string& predictor_filename
 )
 {
    const unsigned long num_images = len(pyimages);
    const unsigned long num_scales = len(pyscales);
    if (num_images != len(pydetections))
        throw dlib::error("The length of the detections list must match the length of the images list.");
    if (num_scales > 0 && num_scales != num_images)
        throw dlib::error("The length of the scales list must match the length of the detections list.");
    std::vector<std::vector<full_object_detection> > detections(num_images);
    std::vector<std::vector<double> > scales;
    if (num_scales > 0)
        scales.resize(num_scales);
    dlib::array<array2d<rgb_pixel> > images(num_images);
    // Now copy the data into dlib based objects so we can call the trainer.
    for (unsigned long i = 0; i < num_images; ++i)
    {
        const unsigned long num_boxes = len(pydetections[i]);
        for (unsigned long j = 0; j < num_boxes; ++j)
            detections[i].push_back(extract<full_object_detection>(pydetections[i][j]));
        pyimage_to_dlib_image(pyimages[i], images[i]);
        if (num_scales > 0)
        {
            if (num_boxes != len(pyscales[i]))
                throw dlib::error("The length of the scales list must match the length of the detections list.");
            for (unsigned long j = 0; j < num_boxes; ++j)
                scales[i].push_back(extract<double>(pyscales[i][j]));
        }
    }
    return test_shape_predictor_with_images(images, detections, scales, predictor_filename);
 }
 inline double test_shape_predictor_with_images_no_scales_py (
        const object& pyimages,
        const object& pydetections,
        const std::string& predictor_filename
 )
 {
    boost::python::list pyscales;
    return test_shape_predictor_with_images_py(pyimages, pydetections, pyscales, predictor_filename);
 }
 // ----------------------------------------------------------------------------------------
 void bind_shape_predictors()
 {
    using boost::python::arg;
    {
    typedef full_object_detection type;
    class_<type>("full_object_detection",
    "This object represents the location of an object in an image along with the \
    positions of each of its constituent parts.")
        .def("__init__", make_constructor(&full_obj_det_init),
 "requires \n\
    - rect: dlib rectangle \n\
    - parts: list of dlib points")
        .add_property("rect", &full_obj_det_get_rect, "The bounding box of the parts.")
        .add_property("num_parts", &full_obj_det_num_parts, "The number of parts of the object.")
        .def("part", &full_obj_det_part, (arg("idx")), "A single part of the object as a dlib point.")
        .def("parts", &full_obj_det_parts, "A vector of dlib points representing all of the parts.")
        .def_pickle(serialize_pickle<type>());
    }
    {
    typedef shape_predictor_training_options type;
    class_<type>("shape_predictor_training_options",
        "This object is a container for the options to the train_shape_predictor() routine.")
        .add_property("be_verbose", &type::be_verbose,
                                    &type::be_verbose,
                      "If true, train_shape_predictor() will print out a lot of information to stdout while training.")
        .add_property("cascade_depth", &type::cascade_depth,
                                       &type::cascade_depth,
                      "The number of cascades created to train the model with.")
        .add_property("tree_depth", &type::tree_depth,
                                    &type::tree_depth,
                      "The depth of the trees used in each cascade. There are pow(2, get_tree_depth()) leaves in each tree")
        .add_property("num_trees_per_cascade_level", &type::num_trees_per_cascade_level,
                                                     &type::num_trees_per_cascade_level,
                      "The number of trees created for each cascade.")
        .add_property("nu", &type::nu,
                            &type::nu,
                      "The regularization parameter.  Larger values of this parameter \
                       will cause the algorithm to fit the training data better but may also \
                       cause overfitting.")
        .add_property("oversampling_amount", &type::oversampling_amount,
                                             &type::oversampling_amount,
                      "The number of randomly selected initial starting points sampled for each training example")
        .add_property("feature_pool_size", &type::feature_pool_size,
                                           &type::feature_pool_size,
                      "Number of pixels used to generate features for the random trees.")
        .add_property("lambda", &type::lambda,
                                &type::lambda,
                      "Controls how tight the feature sampling should be. Lower values enforce closer features.")
        .add_property("num_test_splits", &type::num_test_splits,
                                         &type::num_test_splits,
                      "Number of split features at each node to sample. The one that gives the best split is chosen.")
        .add_property("feature_pool_region_padding", &type::feature_pool_region_padding,
                                                     &type::feature_pool_region_padding,
                      "Size of region within which to sample features for the feature pool, \
                      e.g a padding of 0.5 would cause the algorithm to sample pixels from a box that was 2x2 pixels")
        .add_property("random_seed", &type::random_seed,
                                     &type::random_seed,
                      "The random seed used by the internal random number generator");
    }
    {
    typedef shape_predictor type;
    class_<type>("shape_predictor",
 "This object is a tool that takes in an image region containing some object and \
 outputs a set of point locations that define the pose of the object. The classic \
 example of this is human face pose prediction, where you take an image of a human \
 face as input and are expected to identify the locations of important facial \
 landmarks such as the corners of the mouth and eyes, tip of the nose, and so forth.")
        .def("__init__", make_constructor(&load_object_from_file<type>),
 "Loads a shape_predictor from a file that contains the output of the \n\
 train_shape_predictor() routine.")
        .def("__call__", &run_predictor, (arg("image"), arg("box")),
 "requires \n\
    - image is a numpy ndarray containing either an 8bit grayscale or RGB \n\
      image. \n\
    - box is the bounding box to begin the shape prediction inside. \n\
 ensures \n\
    - This function runs the shape predictor on the input image and returns \n\
      a single full object detection.");
    }
    {
    def("train_shape_predictor", train_shape_predictor_on_images_py,
        (arg("images"), arg("object_detections"), arg("detector_filename"), arg("options")),
 "requires \n\
    - options.lambda > 0 \n\
    - options.nu > 0 \n\
    - options.feature_pool_region_padding >= 0 \n\
    - len(images) == len(object_detections) \n\
    - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\
    - object_detections should be a list of lists of dlib.full_object_detection objects. \
      Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\
 ensures \n\
    - Uses the shape_predictor_trainer to train a \n\
      shape_predictor based on the provided labeled images and full object detections.\n\
    - This function will apply a reasonable set of default parameters and \n\
      preprocessing techniques to the training procedure for shape_predictors \n\
      objects.  So the point of this function is to provide you with a very easy \n\
      way to train a basic shape predictor.   \n\
    - The trained shape predictor is serialized to the file predictor_output_filename.");
    def("train_shape_predictor", train_shape_predictor,
        (arg("dataset_filename"), arg("predictor_output_filename"), arg("options")),
 "requires \n\
    - options.lambda > 0 \n\
    - options.nu > 0 \n\
    - options.feature_pool_region_padding >= 0 \n\
 ensures \n\
    - Uses the shape_predictor_trainer to train a \n\
      shape_predictor based on the labeled images in the XML file \n\
      dataset_filename.  This function assumes the file dataset_filename is in the \n\
      XML format produced by dlib's save_image_dataset_metadata() routine. \n\
    - This function will apply a reasonable set of default parameters and \n\
      preprocessing techniques to the training procedure for shape_predictors \n\
      objects.  So the point of this function is to provide you with a very easy \n\
      way to train a basic shape predictor.   \n\
    - The trained shape predictor is serialized to the file predictor_output_filename.");
    def("test_shape_predictor", test_shape_predictor_py,
        (arg("dataset_filename"), arg("predictor_filename")),
 "ensures \n\
    - Loads an image dataset from dataset_filename.  We assume dataset_filename is \n\
      a file using the XML format written by save_image_dataset_metadata(). \n\
    - Loads a shape_predictor from the file predictor_filename.  This means \n\
      predictor_filename should be a file produced by the train_shape_predictor() \n\
      routine. \n\
    - This function tests the predictor against the dataset and returns the \n\
      mean average error of the detector.  In fact, The \n\
      return value of this function is identical to that of dlib's \n\
      shape_predictor_trainer() routine.  Therefore, see the documentation \n\
      for shape_predictor_trainer() for a detailed definition of the mean average error.");
    def("test_shape_predictor", test_shape_predictor_with_images_no_scales_py,
            (arg("images"), arg("detections"), arg("predictor_filename")),
 "requires \n\
    - len(images) == len(object_detections) \n\
    - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\
    - object_detections should be a list of lists of dlib.full_object_detection objects. \
      Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\
 ensures \n\
    - Loads a shape_predictor from the file predictor_filename.  This means \n\
      predictor_filename should be a file produced by the train_shape_predictor()  \n\
      routine. \n\
    - This function tests the predictor against the dataset and returns the \n\
      mean average error of the detector.  In fact, The \n\
      return value of this function is identical to that of dlib's \n\
      shape_predictor_trainer() routine.  Therefore, see the documentation \n\
      for shape_predictor_trainer() for a detailed definition of the mean average error.");
    def("test_shape_predictor", test_shape_predictor_with_images_py,
            (arg("images"), arg("detections"), arg("scales"), arg("predictor_filename")),
 "requires \n\
    - len(images) == len(object_detections) \n\
    - len(object_detections) == len(scales) \n\
    - for every sublist in object_detections: len(object_detections[i]) == len(scales[i]) \n\
    - scales is a list of floating point scales that each predicted part location \
      should be divided by. Useful for normalization. \n\
    - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\
    - object_detections should be a list of lists of dlib.full_object_detection objects. \
      Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\
 ensures \n\
    - Loads a shape_predictor from the file predictor_filename.  This means \n\
      predictor_filename should be a file produced by the train_shape_predictor()  \n\
      routine. \n\
    - This function tests the predictor against the dataset and returns the \n\
      mean average error of the detector.  In fact, The \n\
      return value of this function is identical to that of dlib's \n\
      shape_predictor_trainer() routine.  Therefore, see the documentation \n\
      for shape_predictor_trainer() for a detailed definition of the mean average error.");
    }
 }
--- a/tools/python/src/shape_predictor.h
+++ b/tools/python/src/shape_predictor.h
@ -0,0 +1,191 @@
 // Copyright (C) 2014  Davis E. King (davis@dlib.net)
 // License: Boost Software License   See LICENSE.txt for the full license.
 #ifndef DLIB_SHAPE_PREDICTOR_DETECTOR_H__
 #define DLIB_SHAPE_PREDICTOR_DETECTOR_H__
 #include "dlib/string.h"
 #include "dlib/geometry.h"
 #include "dlib/data_io/load_image_dataset.h"
 #include "dlib/image_processing.h"
 using namespace std;
 namespace dlib
 {
 // ----------------------------------------------------------------------------------------
    struct shape_predictor_training_options
    {
        shape_predictor_training_options()
        {
            be_verbose = false;
            cascade_depth = 10;
            tree_depth = 4;
            num_trees_per_cascade_level = 500;
            nu = 0.1;
            oversampling_amount = 20;
            feature_pool_size = 400;
            lambda = 0.1;
            num_test_splits = 20;
            feature_pool_region_padding = 0;
            random_seed = "";
        }
        bool be_verbose;
        unsigned long cascade_depth;
        unsigned long tree_depth;
        unsigned long num_trees_per_cascade_level;
        double nu;
        unsigned long oversampling_amount;
        unsigned long feature_pool_size;
        double lambda;
        unsigned long num_test_splits;
        double feature_pool_region_padding;
        std::string random_seed;
    };
 // ----------------------------------------------------------------------------------------
    namespace impl
    {
        inline bool contains_any_detections (
            const std::vector<std::vector<full_object_detection> >& detections
        )
        {
            for (unsigned long i = 0; i < detections.size(); ++i)
            {
                if (detections[i].size() != 0)
                    return true;
            }
            return false;
        }
    }
 // ----------------------------------------------------------------------------------------
    template <typename image_array>
    inline void train_shape_predictor_on_images (
        const std::string& dataset_filename, // can be "" if it's not applicable
        image_array& images,
        std::vector<std::vector<full_object_detection> >& detections,
        const std::string& predictor_output_filename,
        const shape_predictor_training_options& options
    )
    {
        if (options.lambda <= 0)
            throw error("Invalid lambda value given to train_shape_predictor(), lambda must be > 0.");
        if (options.nu <= 0)
            throw error("Invalid nu value given to train_shape_predictor(), nu must be > 0.");
        if (options.feature_pool_region_padding < 0)
            throw error("Invalid feature_pool_region_padding value given to train_shape_predictor(), feature_pool_region_padding must be >= 0.");
        if (images.size() != detections.size())
            throw error("The list of images must have the same length as the list of detections.");
        if (!impl::contains_any_detections(detections))
            throw error("Error, the training dataset does not have any labeled object detections in it.");
        shape_predictor_trainer trainer;
        trainer.set_cascade_depth(options.cascade_depth);
        trainer.set_tree_depth(options.tree_depth);
        trainer.set_num_trees_per_cascade_level(options.num_trees_per_cascade_level);
        trainer.set_nu(options.nu);
        trainer.set_random_seed(options.random_seed);
        trainer.set_oversampling_amount(options.oversampling_amount);
        trainer.set_feature_pool_size(options.feature_pool_size);
        trainer.set_feature_pool_region_padding(options.feature_pool_region_padding);
        trainer.set_lambda(options.lambda);
        trainer.set_num_test_splits(options.num_test_splits);
        if (options.be_verbose)
        {
            std::cout << "Training with cascade depth: " << options.cascade_depth << std::endl;
            std::cout << "Training with tree depth: " << options.tree_depth << std::endl;
            std::cout << "Training with " << options.num_trees_per_cascade_level << " trees per cascade level."<< std::endl;
            std::cout << "Training with nu: " << options.nu << std::endl;
            std::cout << "Training with random seed: " << options.random_seed << std::endl;
            std::cout << "Training with oversampling amount: " << options.oversampling_amount << std::endl;
            std::cout << "Training with feature pool size: " << options.feature_pool_size << std::endl;
            std::cout << "Training with feature pool region padding: " << options.feature_pool_region_padding << std::endl;
            std::cout << "Training with lambda: " << options.lambda << std::endl;
            std::cout << "Training with " << options.num_test_splits << " split tests."<< std::endl;
            trainer.be_verbose();
        }
        shape_predictor predictor = trainer.train(images, detections);
        std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
        int version = 1;
        serialize(predictor, fout);
        serialize(version, fout);
        if (options.be_verbose)
            std::cout << "Training complete, saved predictor to file " << predictor_output_filename << std::endl;
    }
    inline void train_shape_predictor (
        const std::string& dataset_filename,
        const std::string& predictor_output_filename,
        const shape_predictor_training_options& options
    )
    {
        dlib::array<array2d<rgb_pixel> > images;
        std::vector<std::vector<full_object_detection> > objects;
        load_image_dataset(images, objects, dataset_filename);
        train_shape_predictor_on_images(dataset_filename, images, objects, predictor_output_filename, options);
    }
 // ----------------------------------------------------------------------------------------
    template <typename image_array>
    inline double test_shape_predictor_with_images (
            image_array& images,
            std::vector<std::vector<full_object_detection> >& detections,
            std::vector<std::vector<double> >& scales,
            const std::string& predictor_filename
    )
    {
        if (images.size() != detections.size())
            throw error("The list of images must have the same length as the list of detections.");
        if (scales.size() > 0  && scales.size() != images.size())
            throw error("The list of scales must have the same length as the list of detections.");
        shape_predictor predictor;
        int version = 0;
        std::ifstream fin(predictor_filename.c_str(), std::ios::binary);
        if (!fin)
            throw error("Unable to open file " + predictor_filename);
        deserialize(predictor, fin);
        deserialize(version, fin);
        if (version != 1)
            throw error("Unknown shape_predictor format.");
        if (scales.size() > 0)
            return test_shape_predictor(predictor, images, detections, scales);
        else
            return test_shape_predictor(predictor, images, detections);
    }
    inline double test_shape_predictor_py (
        const std::string& dataset_filename,
        const std::string& predictor_filename
    )
    {
        dlib::array<array2d<rgb_pixel> > images;
        // This interface cannot take the scales parameter.
        std::vector<std::vector<double> > scales;
        std::vector<std::vector<full_object_detection> > objects;
        load_image_dataset(images, objects, dataset_filename);
        return test_shape_predictor_with_images(images, objects, scales, predictor_filename);
    }
 // ----------------------------------------------------------------------------------------
 }
 #endif // DLIB_SHAPE_PREDICTOR_DETECTOR_H__