Clarified a few comments and simplified the serialization code a bit.

Also just cleaned up a few minor details.
2024-11-01 10:14:53 +08:00 · 2014-12-27 15:30:56 -05:00 · 2014-12-27 15:30:56 -05:00 · 1ab3482597
commit 1ab3482597
parent 773fe59a34
11 changed files with 116 additions and 117 deletions
--- a/python_examples/face_landmark_detection.py
+++ b/python_examples/face_landmark_detection.py
@ -59,16 +59,20 @@ for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
    win.clear_overlay()
    win.set_image(img)
    # Ask the detector to find the bounding boxes of each face. The 1 in the
    # second argument indicates that we should upsample the image 1 time. This
    # will make everything bigger and allow us to detect more faces.
    dets = detector(img, 1)
    print("Number of faces detected: {}".format(len(dets)))
    for k, d in enumerate(dets):
        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
            k, d.left(), d.top(), d.right(), d.bottom()))
-        shapes = predictor(img, d)
+        # Get the landmarks/parts for the face in box d.
-        print("Part 0: {}, Part 1: {} ...".format(shapes.part(0),
+        shape = predictor(img, d)
-                                                  shapes.part(1)))
+        print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
-        # Add all facial landmarks one at a time
+                                                  shape.part(1)))
-        win.add_overlay(shapes)
+        # Draw the face landmarks on the screen.
        win.add_overlay(shape)
    win.add_overlay(dets)
    raw_input("Hit enter to continue")
--- a/python_examples/max_cost_assignment.py
+++ b/python_examples/max_cost_assignment.py
@ -2,9 +2,8 @@
 # The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
 # 
 # This simple example shows how to call dlib's optimal linear assignment
-#   problem solver.
+# problem solver.  It is an implementation of the famous Hungarian algorithm
-#   It is an implementation of the famous Hungarian algorithm and is quite fast,
+# and is quite fast, operating in O(N^3) time.
 #   operating in O(N^3) time.
 #
 # COMPILING THE DLIB PYTHON INTERFACE
 #   Dlib comes with a compiled python interface for python 2.7 on MS Windows. If
--- a/python_examples/sequence_segmenter.py
+++ b/python_examples/sequence_segmenter.py
@ -83,45 +83,47 @@ def print_segment(sentence, names):
        sys.stdout.write("\n")
 # Now let's make some training data.  Each example is a sentence as well as a
 # set of ranges which indicate the locations of any names.   
 names = dlib.ranges()     # make an array of dlib.range objects.
 segments = dlib.rangess() # make an array of arrays of dlib.range objects.
-sentences = ["The other day I saw a man named Jim Smith",
+sentences = []
             "Davis King is the main author of the dlib Library",
             "Bob Jones is a name and so is George Clinton",
             "My dog is named Bob Barker",
             "ABC is an acronym but John James Smith is a name",
             "No names in this sentence at all"]
 sentences.append("The other day I saw a man named Jim Smith")
 # We want to detect person names.  So we note that the name is located within
 # the range [8, 10).  Note that we use half open ranges to identify segments.
 # So in this case, the segment identifies the string "Jim Smith".
 names.append(dlib.range(8, 10))
 segments.append(names)
-# make names empty for use again below
+names.clear() # make names empty for use again below
 names.clear()
 sentences.append("Davis King is the main author of the dlib Library")
 names.append(dlib.range(0, 2))
 segments.append(names)
 names.clear()
 sentences.append("Bob Jones is a name and so is George Clinton")
 names.append(dlib.range(0, 2))
 names.append(dlib.range(8, 10))
 segments.append(names)
 names.clear()
 sentences.append("My dog is named Bob Barker")
 names.append(dlib.range(4, 6))
 segments.append(names)
 names.clear()
 sentences.append("ABC is an acronym but John James Smith is a name")
 names.append(dlib.range(5, 8))
 segments.append(names)
 names.clear()
 sentences.append("No names in this sentence at all")
 segments.append(names)
 names.clear()
 # Now before we can pass these training sentences to the dlib tools we need to
 # convert them into arrays of vectors as discussed above.  We can use either a
 # sparse or dense representation depending on our needs.  In this example, we
--- a/python_examples/train_object_detector.py
+++ b/python_examples/train_object_detector.py
@ -36,6 +36,7 @@ if len(sys.argv) != 2:
    exit()
 faces_folder = sys.argv[1]
 # Now let's do the training.  The train_simple_object_detector() function has a
 # bunch of options, all of which come with reasonable default values.  The next
 # few lines goes over some of these options.
@ -55,6 +56,9 @@ options.C = 5
 options.num_threads = 4
 options.be_verbose = True
 training_xml_path = os.path.join(faces_folder, "training.xml")
 testing_xml_path = os.path.join(faces_folder, "testing.xml")
 # This function does the actual training.  It will save the final detector to
 # detector.svm.  The input is an XML file that lists the images in the training
 # dataset and also contains the positions of the face boxes.  To create your
@ -63,11 +67,10 @@ options.be_verbose = True
 # images with boxes.  To see how to use it read the tools/imglab/README.txt
 # file.  But for this example, we just use the training.xml file included with
 # dlib.
 training_xml_path = os.path.join(faces_folder, "training.xml")
 testing_xml_path = os.path.join(faces_folder, "testing.xml")
 dlib.train_simple_object_detector(training_xml_path, "detector.svm", options)
 # Now that we have a face detector we can test it.  The first statement tests
 # it on the training data.  It will print(the precision, recall, and then)
 # average precision.
@ -80,6 +83,10 @@ print("Training accuracy: {}".format(
 print("Testing accuracy: {}".format(
    dlib.test_simple_object_detector(testing_xml_path, "detector.svm")))
 # Now let's use the detector as you would in a normal application.  First we
 # will load it from disk.
 detector = dlib.simple_object_detector("detector.svm")
@ -106,6 +113,12 @@ for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
    win.add_overlay(dets)
    raw_input("Hit enter to continue")
 # Finally, note that you don't have to use the XML based input to
 # train_simple_object_detector().  If you have already loaded your training
 # images and bounding boxes for the objects then you can call it as shown
@ -126,10 +139,10 @@ boxes_img2 = ([dlib.rectangle(left=154, top=46, right=228, bottom=121),
 boxes = [boxes_img1, boxes_img2]
 detector2 = dlib.train_simple_object_detector(images, boxes, options)
-# We could save this detector by uncommenting the following
+# We could save this detector to disk by uncommenting the following.
 #detector2.save('detector2.svm')
-# Now let's load the trained detector and look at its HOG filter!
+# Now let's look at its HOG filter!
 win_det.set_image(detector2)
 raw_input("Hit enter to continue")
--- a/python_examples/train_shape_predictor.py
+++ b/python_examples/train_shape_predictor.py
@ -8,7 +8,7 @@
 #   In particular, we will train a face landmarking model based on a small
 #   dataset and then evaluate it.  If you want to visualize the output of the
 #   trained model on some images then you can run the
-#   face_landmark_detection.py example program with sp.dat as the input
+#   face_landmark_detection.py example program with predictor.dat as the input
 #   model.
 #
 #   It should also be noted that this kind of model, while often used for face
@ -49,7 +49,7 @@ options = dlib.shape_predictor_training_options()
 # Now make the object responsible for training the model.
 # This algorithm has a bunch of parameters you can mess with.  The
 # documentation for the shape_predictor_trainer explains all of them.
-# You should also read Kazemi paper which explains all the parameters
+# You should also read Kazemi's paper which explains all the parameters
 # in great detail.  However, here I'm just setting three of them
 # differently than their default values.  I'm doing this because we
 # have a very small dataset.  In particular, setting the oversampling
@ -63,33 +63,35 @@ options.nu = 0.05
 options.tree_depth = 2
 options.be_verbose = True
-# This function does the actual training.  It will save the final predictor to
+# dlib.train_shape_predictor() does the actual training.  It will save the
-# predictor.dat.  The input is an XML file that lists the images in the training
+# final predictor to predictor.dat.  The input is an XML file that lists the
-# dataset and also contains the positions of the face parts.
+# images in the training dataset and also contains the positions of the face
 # parts.
 training_xml_path = os.path.join(faces_folder, "training_with_face_landmarks.xml")
 testing_xml_path = os.path.join(faces_folder, "testing_with_face_landmarks.xml")
 dlib.train_shape_predictor(training_xml_path, "predictor.dat", options)
-# Now that we have a facial landmark predictor we can test it.  The first
+# Now that we have a model we can test it.  dlib.test_shape_predictor()
-# statement tests it on the training data.  It will print the mean average error
+# measures the average distance between a face landmark output by the
-print("")  # Print blank line to create gap from previous output
+# shape_predictor and where it should be according to the truth data.
-print("Training accuracy: {}".format(
+print("\nTraining accuracy: {}".format(
    dlib.test_shape_predictor(training_xml_path, "predictor.dat")))
-# However, to get an idea if it really worked without overfitting we need to
+# The real test is to see how well it does on data it wasn't trained on.  We
-# run it on images it wasn't trained on.  The next line does this.  Happily, we
+# trained it on a very small dataset so the accuracy is not extremely high, but
-# see that the object detector works perfectly on the testing images.
+# it's still doing quite good.  Moreover, if you train it on one of the large
 # face landmarking datasets you will obtain state-of-the-art results, as shown
 # in the Kazemi paper.
 testing_xml_path = os.path.join(faces_folder, "testing_with_face_landmarks.xml")
 print("Testing accuracy: {}".format(
    dlib.test_shape_predictor(testing_xml_path, "predictor.dat")))
-# Now let's use the detector as you would in a normal application.  First we
+# Now let's it as you would in a normal application.  First we will load it
-# will load it from disk. We also need to load a face detector to provide the
+# from disk. We also need to load a face detector to provide the initial
-# initial estimate of the facial location
+# estimate of the facial location.
 detector = dlib.get_frontal_face_detector()
 predictor = dlib.shape_predictor("predictor.dat")
 detector = dlib.get_frontal_face_detector()
-# Now let's run the detector and predictor over the images in the faces folder
+# Now let's run the detector and shape_predictor over the images in the faces
-# and display the results.
+# folder and display the results.
 print("Showing detections and predictions on the images in the faces folder...")
 win = dlib.image_window()
 for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
@ -99,21 +101,21 @@ for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
    win.clear_overlay()
    win.set_image(img)
    # Ask the detector to find the bounding boxes of each face. The 1 in the
    # second argument indicates that we should upsample the image 1 time. This
    # will make everything bigger and allow us to detect more faces.
    dets = detector(img, 1)
    print("Number of faces detected: {}".format(len(dets)))
    for k, d in enumerate(dets):
        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
            k, d.left(), d.top(), d.right(), d.bottom()))
-        shapes = predictor(img, d)
+        # Get the landmarks/parts for the face in box d.
-        print("Part 0: {}, Part 1: {} ...".format(shapes.part(0),
+        shape = predictor(img, d)
-                                                  shapes.part(1)))
+        print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
-        # Add all facial landmarks one at a time
+                                                  shape.part(1)))
-        win.add_overlay(shapes)
+        # Draw the face landmarks on the screen.
        win.add_overlay(shape)
    win.add_overlay(dets)
    raw_input("Hit enter to continue")
 # Finally, note that you don't have to use the XML based input to
 # train_shape_predictor().  If you have already loaded your training
 # images and fll_object_detections for the objects then you can call it with
 # the existing objects.
--- a/tools/python/src/gui.cpp
+++ b/tools/python/src/gui.cpp
@ -51,9 +51,7 @@ void add_overlay_rect (
    const rgb_pixel& color
 )
 {
-    std::vector<rectangle> rects;
+    win.add_overlay(rect, color);
    rects.push_back(rect);
    win.add_overlay(rects, color);
 }
 void add_overlay_parts (
@ -62,9 +60,7 @@ void add_overlay_parts (
    const rgb_pixel& color
 )
 {
-    std::vector<full_object_detection> detections;
+    win.add_overlay(render_face_detections(detection, color));
    detections.push_back(detection);
    win.add_overlay(render_face_detections(detections, color));
 }
 boost::shared_ptr<image_window> make_image_window_from_image(object img)
--- a/tools/python/src/object_detection.cpp
+++ b/tools/python/src/object_detection.cpp
@ -257,8 +257,9 @@ ensures \n\
    class_<type>("fhog_object_detector",
        "This object represents a sliding window histogram-of-oriented-gradients based object detector.")
        .def("__init__", make_constructor(&load_object_from_file<type>),  
-"Loads a simple_object_detector from a file that contains the output of the \n\
+"Loads an object detector from a file that contains the output of the \n\
-train_simple_object_detector() routine.")
+train_simple_object_detector() routine or a serialized C++ object of type\n\
 object_detector<scan_fhog_pyramid<pyramid_down<6>>>.")
        .def("__call__", run_detector_with_upscale, (arg("image"), arg("upsample_num_times")=0),
 "requires \n\
    - image is a numpy ndarray containing either an 8bit grayscale or RGB \n\
--- a/tools/python/src/serialize_object_detector.h
+++ b/tools/python/src/serialize_object_detector.h
@ -39,9 +39,10 @@ namespace dlib
    inline void save_simple_object_detector(const simple_object_detector& detector, const std::string& detector_output_filename)
    {
        std::ofstream fout(detector_output_filename.c_str(), std::ios::binary);
        int version = 1;
        serialize(detector, fout);
-        serialize(version, fout);
+        // Don't need to save version of upsampling amount because want to write out the
        // object detector just like the C++ code that serializes an object_detector would.
        // We also don't know the upsampling amount in this case anyway.
    }
 }
--- a/tools/python/src/shape_predictor.cpp
+++ b/tools/python/src/shape_predictor.cpp
@ -38,9 +38,7 @@ full_object_detection run_predictor (
 void save_shape_predictor(const shape_predictor& predictor, const std::string& predictor_output_filename)
 {
    std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
    int version = 1;
    serialize(predictor, fout);
    serialize(version, fout);
 }
 // ----------------------------------------------------------------------------------------
@ -95,7 +93,7 @@ inline shape_predictor train_shape_predictor_on_images_py (
        throw dlib::error("The length of the detections list must match the length of the images list.");
    std::vector<std::vector<full_object_detection> > detections(num_images);
-    dlib::array<array2d<rgb_pixel> > images(num_images);
+    dlib::array<array2d<unsigned char> > images(num_images);
    images_and_nested_params_to_dlib(pyimages, pydetections, images, detections);
    return train_shape_predictor_on_images(images, detections, options);
@ -121,9 +119,9 @@ inline double test_shape_predictor_with_images_py (
    std::vector<std::vector<double> > scales;
    if (num_scales > 0)
        scales.resize(num_scales);
-    dlib::array<array2d<rgb_pixel> > images(num_images);
+    dlib::array<array2d<unsigned char> > images(num_images);
-    // Now copy the data into dlib based objects so we can call the trainer.
+    // Now copy the data into dlib based objects so we can call the testing routine.
    for (unsigned long i = 0; i < num_images; ++i)
    {
        const unsigned long num_boxes = len(pydetections[i]);
@ -193,7 +191,7 @@ void bind_shape_predictors()
                            &type::nu,
                      "The regularization parameter.  Larger values of this parameter \
                       will cause the algorithm to fit the training data better but may also \
-                       cause overfitting.")
+                       cause overfitting.  The value must be in the range (0, 1].")
        .add_property("oversampling_amount", &type::oversampling_amount,
                                             &type::oversampling_amount,
                      "The number of randomly selected initial starting points sampled for each training example")
@ -232,7 +230,7 @@ train_shape_predictor() routine.")
    - box is the bounding box to begin the shape prediction inside. \n\
 ensures \n\
    - This function runs the shape predictor on the input image and returns \n\
-      a single full object detection.")
+      a single full_object_detection.")
        .def("save", save_shape_predictor, (arg("predictor_output_filename")), "Save a shape_predictor to the provided path.")
        .def_pickle(serialize_pickle<type>());
    }
@ -241,36 +239,28 @@ ensures \n\
        (arg("images"), arg("object_detections"), arg("options")),
 "requires \n\
    - options.lambda > 0 \n\
-    - options.nu > 0 \n\
+    - 0 < options.nu <= 1 \n\
    - options.feature_pool_region_padding >= 0 \n\
    - len(images) == len(object_detections) \n\
    - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\
    - object_detections should be a list of lists of dlib.full_object_detection objects. \
      Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\
 ensures \n\
-    - Uses the shape_predictor_trainer to train a \n\
+    - Uses dlib's shape_predictor_trainer object to train a \n\
-      shape_predictor based on the provided labeled images and full object detections.\n\
+      shape_predictor based on the provided labeled images, full_object_detections, and options.\n\
    - This function will apply a reasonable set of default parameters and \n\
      preprocessing techniques to the training procedure for shape_predictors \n\
      objects.  So the point of this function is to provide you with a very easy \n\
      way to train a basic shape predictor. \n\
    - The trained shape_predictor is returned");
    def("train_shape_predictor", train_shape_predictor,
        (arg("dataset_filename"), arg("predictor_output_filename"), arg("options")),
 "requires \n\
    - options.lambda > 0 \n\
-    - options.nu > 0 \n\
+    - 0 < options.nu <= 1 \n\
    - options.feature_pool_region_padding >= 0 \n\
 ensures \n\
-    - Uses the shape_predictor_trainer to train a \n\
+    - Uses dlib's shape_predictor_trainer to train a \n\
      shape_predictor based on the labeled images in the XML file \n\
-      dataset_filename.  This function assumes the file dataset_filename is in the \n\
+      dataset_filename and the provided options.  This function assumes the file dataset_filename is in the \n\
      XML format produced by dlib's save_image_dataset_metadata() routine. \n\
    - This function will apply a reasonable set of default parameters and \n\
      preprocessing techniques to the training procedure for shape_predictors \n\
      objects.  So the point of this function is to provide you with a very easy \n\
      way to train a basic shape predictor.   \n\
    - The trained shape predictor is serialized to the file predictor_output_filename.");
    def("test_shape_predictor", test_shape_predictor_py,
--- a/tools/python/src/shape_predictor.h
+++ b/tools/python/src/shape_predictor.h
@ -73,8 +73,8 @@ namespace dlib
    {
        if (options.lambda <= 0)
            throw error("Invalid lambda value given to train_shape_predictor(), lambda must be > 0.");
-        if (options.nu <= 0)
+        if (!(0 < options.nu && options.nu <= 1))
-            throw error("Invalid nu value given to train_shape_predictor(), nu must be > 0.");
+            throw error("Invalid nu value given to train_shape_predictor(). It is required that 0 < nu <= 1.");
        if (options.feature_pool_region_padding < 0)
            throw error("Invalid feature_pool_region_padding value given to train_shape_predictor(), feature_pool_region_padding must be >= 0.");
@ -123,16 +123,13 @@ namespace dlib
        const shape_predictor_training_options& options
    )
    {
-        dlib::array<array2d<rgb_pixel> > images;
+        dlib::array<array2d<unsigned char> > images;
        std::vector<std::vector<full_object_detection> > objects;
        load_image_dataset(images, objects, dataset_filename);
        shape_predictor predictor = train_shape_predictor_on_images(images, objects, options);
-        std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
+        serialize(predictor_output_filename) << predictor;
        int version = 1;
        serialize(predictor, fout);
        serialize(version, fout);
        if (options.be_verbose)
            std::cout << "Training complete, saved predictor to file " << predictor_output_filename << std::endl;
@ -165,7 +162,7 @@ namespace dlib
    )
    {
        // Load the images, no scales can be provided
-        dlib::array<array2d<rgb_pixel> > images;
+        dlib::array<array2d<unsigned char> > images;
        // This interface cannot take the scales parameter.
        std::vector<std::vector<double> > scales;
        std::vector<std::vector<full_object_detection> > objects;
@ -173,14 +170,7 @@ namespace dlib
        // Load the shape predictor
        shape_predictor predictor;
-        int version = 0;
+        deserialize(predictor_filename) >> predictor;
        std::ifstream fin(predictor_filename.c_str(), std::ios::binary);
        if (!fin)
            throw error("Unable to open file " + predictor_filename);
        deserialize(predictor, fin);
        deserialize(version, fin);
        if (version != 1)
            throw error("Unknown shape_predictor format.");
        return test_shape_predictor_with_images(images, objects, scales, predictor);
    }
--- a/tools/python/src/simple_object_detector.h
+++ b/tools/python/src/simple_object_detector.h
@ -276,32 +276,33 @@ namespace dlib
        // Load the detector off disk (We have to use the explicit serialization here
        // so that we have an open file stream)
        simple_object_detector detector;
        int version = 0;
        std::ifstream fin(detector_filename.c_str(), std::ios::binary);
        if (!fin)
            throw error("Unable to open file " + detector_filename);
        deserialize(detector, fin);
-        deserialize(version, fin);
+
        if (version != 1)
            throw error("Unknown simple_object_detector format.");
        /*  Here we need a little hack to deal with whether we are going to be loading a
         *  simple_object_detector (possibly trained outside of Python) or a
-         *  simple_object_detector_py (definitely trained from Python). In order to do
+         *  simple_object_detector_py (definitely trained from Python). In order to do this
-         *  this we peek into the filestream to see if there is more data after the
+         *  we peek into the filestream to see if there is more data after the object
-         *  version number. If there is, it will be the upsampling amount. Therefore,
+         *  detector. If there is, it will be the version and upsampling amount. Therefore,
-         *  by default we set the upsampling amount to -1 so that we can catch when
+         *  by default we set the upsampling amount to -1 so that we can catch when no
-         *  no upsampling amount has been passed (numbers less than 0). If -1 is
+         *  upsampling amount has been passed (numbers less than 0). If -1 is passed, we
-         *  passed, we assume no upsampling and use 0. If a number > 0 is passed,
+         *  assume no upsampling and use 0. If a number > 0 is passed, we use that, else we
-         *  we use that, else we use the upsampling amount cached with the detector
+         *  use the upsampling amount saved in the detector file (if it exists).
         *  (if it exists).
         */
        unsigned int final_upsampling_amount = 0;
-        const unsigned int cached_upsample_amount = fin.peek();
+        if (fin.peek() != EOF)
        {
            int version = 0;
            deserialize(version, fin);
            if (version != 1)
                throw error("Unknown simple_object_detector format.");
            deserialize(final_upsampling_amount, fin);
        }
        if (upsample_amount >= 0)
            final_upsampling_amount = upsample_amount;
        else if (cached_upsample_amount != std::char_traits<wchar_t>::eof())  // peek() returns EOF if no more data
            deserialize(final_upsampling_amount, fin);
        return test_simple_object_detector_with_images(images, final_upsampling_amount, boxes, ignore, detector);
    }