Clarified a few comments and simplified the serialization code a bit.

Also just cleaned up a few minor details.
2024-11-01 10:14:53 +08:00 · 2014-12-27 15:30:56 -05:00 · 2014-12-27 15:30:56 -05:00 · 1ab3482597
commit 1ab3482597
parent 773fe59a34
11 changed files with 116 additions and 117 deletions
--- a/python_examples/face_landmark_detection.py
+++ b/python_examples/face_landmark_detection.py
@ -59,16 +59,20 @@ for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
    win.clear_overlay()
    win.set_image(img)

+    # Ask the detector to find the bounding boxes of each face. The 1 in the
+    # second argument indicates that we should upsample the image 1 time. This
+    # will make everything bigger and allow us to detect more faces.
    dets = detector(img, 1)
    print("Number of faces detected: {}".format(len(dets)))
    for k, d in enumerate(dets):
        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
            k, d.left(), d.top(), d.right(), d.bottom()))
-        shapes = predictor(img, d)
-        print("Part 0: {}, Part 1: {} ...".format(shapes.part(0),
-                                                  shapes.part(1)))
-        # Add all facial landmarks one at a time
-        win.add_overlay(shapes)
+        # Get the landmarks/parts for the face in box d.
+        shape = predictor(img, d)
+        print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
+                                                  shape.part(1)))
+        # Draw the face landmarks on the screen.
+        win.add_overlay(shape)

    win.add_overlay(dets)
    raw_input("Hit enter to continue")
--- a/python_examples/max_cost_assignment.py
+++ b/python_examples/max_cost_assignment.py
@ -2,9 +2,8 @@
 # The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
 # 
 # This simple example shows how to call dlib's optimal linear assignment
-#   problem solver.
-#   It is an implementation of the famous Hungarian algorithm and is quite fast,
-#   operating in O(N^3) time.
+# problem solver.  It is an implementation of the famous Hungarian algorithm
+# and is quite fast, operating in O(N^3) time.
 #
 # COMPILING THE DLIB PYTHON INTERFACE
 #   Dlib comes with a compiled python interface for python 2.7 on MS Windows. If
--- a/python_examples/sequence_segmenter.py
+++ b/python_examples/sequence_segmenter.py
@ -83,45 +83,47 @@ def print_segment(sentence, names):
        sys.stdout.write("\n")


-# Now let's make some training data.  Each example is a sentence as well as a
-# set of ranges which indicate the locations of any names.
-names = dlib.ranges()  # make an array of dlib.range objects.
-segments = dlib.rangess()  # make an array of arrays of dlib.range objects.
-sentences = ["The other day I saw a man named Jim Smith",
-             "Davis King is the main author of the dlib Library",
-             "Bob Jones is a name and so is George Clinton",
-             "My dog is named Bob Barker",
-             "ABC is an acronym but John James Smith is a name",
-             "No names in this sentence at all"]

+# Now let's make some training data.  Each example is a sentence as well as a
+# set of ranges which indicate the locations of any names.   
+names = dlib.ranges()     # make an array of dlib.range objects.
+segments = dlib.rangess() # make an array of arrays of dlib.range objects.
+sentences = []
+
+sentences.append("The other day I saw a man named Jim Smith")
 # We want to detect person names.  So we note that the name is located within
 # the range [8, 10).  Note that we use half open ranges to identify segments.
-# So in  this case, the segment identifies the string "Jim Smith".
+# So in this case, the segment identifies the string "Jim Smith".
 names.append(dlib.range(8, 10))
 segments.append(names)
-# make names empty for use again below
-names.clear()
+names.clear() # make names empty for use again below

+sentences.append("Davis King is the main author of the dlib Library")
 names.append(dlib.range(0, 2))
 segments.append(names)
 names.clear()

+sentences.append("Bob Jones is a name and so is George Clinton")
 names.append(dlib.range(0, 2))
 names.append(dlib.range(8, 10))
 segments.append(names)
 names.clear()

+sentences.append("My dog is named Bob Barker")
 names.append(dlib.range(4, 6))
 segments.append(names)
 names.clear()

+sentences.append("ABC is an acronym but John James Smith is a name")
 names.append(dlib.range(5, 8))
 segments.append(names)
 names.clear()

+sentences.append("No names in this sentence at all")
 segments.append(names)
 names.clear()

+
 # Now before we can pass these training sentences to the dlib tools we need to
 # convert them into arrays of vectors as discussed above.  We can use either a
 # sparse or dense representation depending on our needs.  In this example, we
--- a/python_examples/train_object_detector.py
+++ b/python_examples/train_object_detector.py
@ -36,6 +36,7 @@ if len(sys.argv) != 2:
    exit()
 faces_folder = sys.argv[1]

+
 # Now let's do the training.  The train_simple_object_detector() function has a
 # bunch of options, all of which come with reasonable default values.  The next
 # few lines goes over some of these options.
@ -55,6 +56,9 @@ options.C = 5
 options.num_threads = 4
 options.be_verbose = True

+
+training_xml_path = os.path.join(faces_folder, "training.xml")
+testing_xml_path = os.path.join(faces_folder, "testing.xml")
 # This function does the actual training.  It will save the final detector to
 # detector.svm.  The input is an XML file that lists the images in the training
 # dataset and also contains the positions of the face boxes.  To create your
@ -63,11 +67,10 @@ options.be_verbose = True
 # images with boxes.  To see how to use it read the tools/imglab/README.txt
 # file.  But for this example, we just use the training.xml file included with
 # dlib.
-training_xml_path = os.path.join(faces_folder, "training.xml")
-testing_xml_path = os.path.join(faces_folder, "testing.xml")
-
 dlib.train_simple_object_detector(training_xml_path, "detector.svm", options)

+
+
 # Now that we have a face detector we can test it.  The first statement tests
 # it on the training data.  It will print(the precision, recall, and then)
 # average precision.
@ -80,6 +83,10 @@ print("Training accuracy: {}".format(
 print("Testing accuracy: {}".format(
    dlib.test_simple_object_detector(testing_xml_path, "detector.svm")))

+
+
+
+
 # Now let's use the detector as you would in a normal application.  First we
 # will load it from disk.
 detector = dlib.simple_object_detector("detector.svm")
@ -106,6 +113,12 @@ for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
    win.add_overlay(dets)
    raw_input("Hit enter to continue")

+
+
+
+
+
+
 # Finally, note that you don't have to use the XML based input to
 # train_simple_object_detector().  If you have already loaded your training
 # images and bounding boxes for the objects then you can call it as shown
@ -126,10 +139,10 @@ boxes_img2 = ([dlib.rectangle(left=154, top=46, right=228, bottom=121),
 boxes = [boxes_img1, boxes_img2]

 detector2 = dlib.train_simple_object_detector(images, boxes, options)
-# We could save this detector by uncommenting the following
+# We could save this detector to disk by uncommenting the following.
 #detector2.save('detector2.svm')

-# Now let's load the trained detector and look at its HOG filter!
+# Now let's look at its HOG filter!
 win_det.set_image(detector2)
 raw_input("Hit enter to continue")

--- a/python_examples/train_shape_predictor.py
+++ b/python_examples/train_shape_predictor.py
@ -8,7 +8,7 @@
 #   In particular, we will train a face landmarking model based on a small
 #   dataset and then evaluate it.  If you want to visualize the output of the
 #   trained model on some images then you can run the
-#   face_landmark_detection.py example program with sp.dat as the input
+#   face_landmark_detection.py example program with predictor.dat as the input
 #   model.
 #
 #   It should also be noted that this kind of model, while often used for face
@ -49,7 +49,7 @@ options = dlib.shape_predictor_training_options()
 # Now make the object responsible for training the model.
 # This algorithm has a bunch of parameters you can mess with.  The
 # documentation for the shape_predictor_trainer explains all of them.
-# You should also read Kazemi paper which explains all the parameters
+# You should also read Kazemi's paper which explains all the parameters
 # in great detail.  However, here I'm just setting three of them
 # differently than their default values.  I'm doing this because we
 # have a very small dataset.  In particular, setting the oversampling
@ -63,33 +63,35 @@ options.nu = 0.05
 options.tree_depth = 2
 options.be_verbose = True

-# This function does the actual training.  It will save the final predictor to
-# predictor.dat.  The input is an XML file that lists the images in the training
-# dataset and also contains the positions of the face parts.
+# dlib.train_shape_predictor() does the actual training.  It will save the
+# final predictor to predictor.dat.  The input is an XML file that lists the
+# images in the training dataset and also contains the positions of the face
+# parts.
 training_xml_path = os.path.join(faces_folder, "training_with_face_landmarks.xml")
-testing_xml_path = os.path.join(faces_folder, "testing_with_face_landmarks.xml")
-
 dlib.train_shape_predictor(training_xml_path, "predictor.dat", options)

-# Now that we have a facial landmark predictor we can test it.  The first
-# statement tests it on the training data.  It will print the mean average error
-print("")  # Print blank line to create gap from previous output
-print("Training accuracy: {}".format(
+# Now that we have a model we can test it.  dlib.test_shape_predictor()
+# measures the average distance between a face landmark output by the
+# shape_predictor and where it should be according to the truth data.
+print("\nTraining accuracy: {}".format(
    dlib.test_shape_predictor(training_xml_path, "predictor.dat")))
-# However, to get an idea if it really worked without overfitting we need to
-# run it on images it wasn't trained on.  The next line does this.  Happily, we
-# see that the object detector works perfectly on the testing images.
+# The real test is to see how well it does on data it wasn't trained on.  We
+# trained it on a very small dataset so the accuracy is not extremely high, but
+# it's still doing quite good.  Moreover, if you train it on one of the large
+# face landmarking datasets you will obtain state-of-the-art results, as shown
+# in the Kazemi paper.
+testing_xml_path = os.path.join(faces_folder, "testing_with_face_landmarks.xml")
 print("Testing accuracy: {}".format(
    dlib.test_shape_predictor(testing_xml_path, "predictor.dat")))

-# Now let's use the detector as you would in a normal application.  First we
-# will load it from disk. We also need to load a face detector to provide the
-# initial estimate of the facial location
-detector = dlib.get_frontal_face_detector()
+# Now let's it as you would in a normal application.  First we will load it
+# from disk. We also need to load a face detector to provide the initial
+# estimate of the facial location.
 predictor = dlib.shape_predictor("predictor.dat")
+detector = dlib.get_frontal_face_detector()

-# Now let's run the detector and predictor over the images in the faces folder
-# and display the results.
+# Now let's run the detector and shape_predictor over the images in the faces
+# folder and display the results.
 print("Showing detections and predictions on the images in the faces folder...")
 win = dlib.image_window()
 for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
@ -99,21 +101,21 @@ for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
    win.clear_overlay()
    win.set_image(img)

+    # Ask the detector to find the bounding boxes of each face. The 1 in the
+    # second argument indicates that we should upsample the image 1 time. This
+    # will make everything bigger and allow us to detect more faces.
    dets = detector(img, 1)
    print("Number of faces detected: {}".format(len(dets)))
    for k, d in enumerate(dets):
        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
            k, d.left(), d.top(), d.right(), d.bottom()))
-        shapes = predictor(img, d)
-        print("Part 0: {}, Part 1: {} ...".format(shapes.part(0),
-                                                  shapes.part(1)))
-        # Add all facial landmarks one at a time
-        win.add_overlay(shapes)
+        # Get the landmarks/parts for the face in box d.
+        shape = predictor(img, d)
+        print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
+                                                  shape.part(1)))
+        # Draw the face landmarks on the screen.
+        win.add_overlay(shape)

    win.add_overlay(dets)
    raw_input("Hit enter to continue")

-# Finally, note that you don't have to use the XML based input to
-# train_shape_predictor().  If you have already loaded your training
-# images and fll_object_detections for the objects then you can call it with
-# the existing objects.
--- a/tools/python/src/gui.cpp
+++ b/tools/python/src/gui.cpp
@ -51,9 +51,7 @@ void add_overlay_rect (
    const rgb_pixel& color
 )
 {
-    std::vector<rectangle> rects;
-    rects.push_back(rect);
-    win.add_overlay(rects, color);
+    win.add_overlay(rect, color);
 }

 void add_overlay_parts (
@ -62,9 +60,7 @@ void add_overlay_parts (
    const rgb_pixel& color
 )
 {
-    std::vector<full_object_detection> detections;
-    detections.push_back(detection);
-    win.add_overlay(render_face_detections(detections, color));
+    win.add_overlay(render_face_detections(detection, color));
 }

 boost::shared_ptr<image_window> make_image_window_from_image(object img)
--- a/tools/python/src/object_detection.cpp
+++ b/tools/python/src/object_detection.cpp
@ -257,8 +257,9 @@ ensures \n\
    class_<type>("fhog_object_detector",
        "This object represents a sliding window histogram-of-oriented-gradients based object detector.")
        .def("__init__", make_constructor(&load_object_from_file<type>),  
-"Loads a simple_object_detector from a file that contains the output of the \n\
-train_simple_object_detector() routine.")
+"Loads an object detector from a file that contains the output of the \n\
+train_simple_object_detector() routine or a serialized C++ object of type\n\
+object_detector<scan_fhog_pyramid<pyramid_down<6>>>.")
        .def("__call__", run_detector_with_upscale, (arg("image"), arg("upsample_num_times")=0),
 "requires \n\
    - image is a numpy ndarray containing either an 8bit grayscale or RGB \n\
--- a/tools/python/src/serialize_object_detector.h
+++ b/tools/python/src/serialize_object_detector.h
@ -39,9 +39,10 @@ namespace dlib
    inline void save_simple_object_detector(const simple_object_detector& detector, const std::string& detector_output_filename)
    {
        std::ofstream fout(detector_output_filename.c_str(), std::ios::binary);
-        int version = 1;
        serialize(detector, fout);
-        serialize(version, fout);
+        // Don't need to save version of upsampling amount because want to write out the
+        // object detector just like the C++ code that serializes an object_detector would.
+        // We also don't know the upsampling amount in this case anyway.
    }
 }

--- a/tools/python/src/shape_predictor.cpp
+++ b/tools/python/src/shape_predictor.cpp
@ -38,9 +38,7 @@ full_object_detection run_predictor (
 void save_shape_predictor(const shape_predictor& predictor, const std::string& predictor_output_filename)
 {
    std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
-    int version = 1;
    serialize(predictor, fout);
-    serialize(version, fout);
 }

 // ----------------------------------------------------------------------------------------
@ -95,7 +93,7 @@ inline shape_predictor train_shape_predictor_on_images_py (
        throw dlib::error("The length of the detections list must match the length of the images list.");

    std::vector<std::vector<full_object_detection> > detections(num_images);
-    dlib::array<array2d<rgb_pixel> > images(num_images);
+    dlib::array<array2d<unsigned char> > images(num_images);
    images_and_nested_params_to_dlib(pyimages, pydetections, images, detections);

    return train_shape_predictor_on_images(images, detections, options);
@ -121,9 +119,9 @@ inline double test_shape_predictor_with_images_py (
    std::vector<std::vector<double> > scales;
    if (num_scales > 0)
        scales.resize(num_scales);
-    dlib::array<array2d<rgb_pixel> > images(num_images);
+    dlib::array<array2d<unsigned char> > images(num_images);

-    // Now copy the data into dlib based objects so we can call the trainer.
+    // Now copy the data into dlib based objects so we can call the testing routine.
    for (unsigned long i = 0; i < num_images; ++i)
    {
        const unsigned long num_boxes = len(pydetections[i]);
@ -193,7 +191,7 @@ void bind_shape_predictors()
                            &type::nu,
                      "The regularization parameter.  Larger values of this parameter \
                       will cause the algorithm to fit the training data better but may also \
-                       cause overfitting.")
+                       cause overfitting.  The value must be in the range (0, 1].")
        .add_property("oversampling_amount", &type::oversampling_amount,
                                             &type::oversampling_amount,
                      "The number of randomly selected initial starting points sampled for each training example")
@ -232,7 +230,7 @@ train_shape_predictor() routine.")
    - box is the bounding box to begin the shape prediction inside. \n\
 ensures \n\
    - This function runs the shape predictor on the input image and returns \n\
-      a single full object detection.")
+      a single full_object_detection.")
        .def("save", save_shape_predictor, (arg("predictor_output_filename")), "Save a shape_predictor to the provided path.")
        .def_pickle(serialize_pickle<type>());
    }
@ -241,36 +239,28 @@ ensures \n\
        (arg("images"), arg("object_detections"), arg("options")),
 "requires \n\
    - options.lambda > 0 \n\
-    - options.nu > 0 \n\
+    - 0 < options.nu <= 1 \n\
    - options.feature_pool_region_padding >= 0 \n\
    - len(images) == len(object_detections) \n\
    - images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\
    - object_detections should be a list of lists of dlib.full_object_detection objects. \
      Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\
 ensures \n\
-    - Uses the shape_predictor_trainer to train a \n\
-      shape_predictor based on the provided labeled images and full object detections.\n\
-    - This function will apply a reasonable set of default parameters and \n\
-      preprocessing techniques to the training procedure for shape_predictors \n\
-      objects.  So the point of this function is to provide you with a very easy \n\
-      way to train a basic shape predictor. \n\
+    - Uses dlib's shape_predictor_trainer object to train a \n\
+      shape_predictor based on the provided labeled images, full_object_detections, and options.\n\
    - The trained shape_predictor is returned");

    def("train_shape_predictor", train_shape_predictor,
        (arg("dataset_filename"), arg("predictor_output_filename"), arg("options")),
 "requires \n\
    - options.lambda > 0 \n\
-    - options.nu > 0 \n\
+    - 0 < options.nu <= 1 \n\
    - options.feature_pool_region_padding >= 0 \n\
 ensures \n\
-    - Uses the shape_predictor_trainer to train a \n\
+    - Uses dlib's shape_predictor_trainer to train a \n\
      shape_predictor based on the labeled images in the XML file \n\
-      dataset_filename.  This function assumes the file dataset_filename is in the \n\
+      dataset_filename and the provided options.  This function assumes the file dataset_filename is in the \n\
      XML format produced by dlib's save_image_dataset_metadata() routine. \n\
-    - This function will apply a reasonable set of default parameters and \n\
-      preprocessing techniques to the training procedure for shape_predictors \n\
-      objects.  So the point of this function is to provide you with a very easy \n\
-      way to train a basic shape predictor.   \n\
    - The trained shape predictor is serialized to the file predictor_output_filename.");

    def("test_shape_predictor", test_shape_predictor_py,
--- a/tools/python/src/shape_predictor.h
+++ b/tools/python/src/shape_predictor.h
@ -73,8 +73,8 @@ namespace dlib
    {
        if (options.lambda <= 0)
            throw error("Invalid lambda value given to train_shape_predictor(), lambda must be > 0.");
-        if (options.nu <= 0)
-            throw error("Invalid nu value given to train_shape_predictor(), nu must be > 0.");
+        if (!(0 < options.nu && options.nu <= 1))
+            throw error("Invalid nu value given to train_shape_predictor(). It is required that 0 < nu <= 1.");
        if (options.feature_pool_region_padding < 0)
            throw error("Invalid feature_pool_region_padding value given to train_shape_predictor(), feature_pool_region_padding must be >= 0.");

@ -123,16 +123,13 @@ namespace dlib
        const shape_predictor_training_options& options
    )
    {
-        dlib::array<array2d<rgb_pixel> > images;
+        dlib::array<array2d<unsigned char> > images;
        std::vector<std::vector<full_object_detection> > objects;
        load_image_dataset(images, objects, dataset_filename);

        shape_predictor predictor = train_shape_predictor_on_images(images, objects, options);

-        std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
-        int version = 1;
-        serialize(predictor, fout);
-        serialize(version, fout);
+        serialize(predictor_output_filename) << predictor;

        if (options.be_verbose)
            std::cout << "Training complete, saved predictor to file " << predictor_output_filename << std::endl;
@ -165,7 +162,7 @@ namespace dlib
    )
    {
        // Load the images, no scales can be provided
-        dlib::array<array2d<rgb_pixel> > images;
+        dlib::array<array2d<unsigned char> > images;
        // This interface cannot take the scales parameter.
        std::vector<std::vector<double> > scales;
        std::vector<std::vector<full_object_detection> > objects;
@ -173,14 +170,7 @@ namespace dlib

        // Load the shape predictor
        shape_predictor predictor;
-        int version = 0;
-        std::ifstream fin(predictor_filename.c_str(), std::ios::binary);
-        if (!fin)
-            throw error("Unable to open file " + predictor_filename);
-        deserialize(predictor, fin);
-        deserialize(version, fin);
-        if (version != 1)
-            throw error("Unknown shape_predictor format.");
+        deserialize(predictor_filename) >> predictor;

        return test_shape_predictor_with_images(images, objects, scales, predictor);
    }
--- a/tools/python/src/simple_object_detector.h
+++ b/tools/python/src/simple_object_detector.h
@ -276,32 +276,33 @@ namespace dlib
        // Load the detector off disk (We have to use the explicit serialization here
        // so that we have an open file stream)
        simple_object_detector detector;
-        int version = 0;
        std::ifstream fin(detector_filename.c_str(), std::ios::binary);
        if (!fin)
            throw error("Unable to open file " + detector_filename);
        deserialize(detector, fin);
-        deserialize(version, fin);
-        if (version != 1)
-            throw error("Unknown simple_object_detector format.");
+

        /*  Here we need a little hack to deal with whether we are going to be loading a
         *  simple_object_detector (possibly trained outside of Python) or a
-         *  simple_object_detector_py (definitely trained from Python). In order to do
-         *  this we peek into the filestream to see if there is more data after the
-         *  version number. If there is, it will be the upsampling amount. Therefore,
-         *  by default we set the upsampling amount to -1 so that we can catch when
-         *  no upsampling amount has been passed (numbers less than 0). If -1 is
-         *  passed, we assume no upsampling and use 0. If a number > 0 is passed,
-         *  we use that, else we use the upsampling amount cached with the detector
-         *  (if it exists).
+         *  simple_object_detector_py (definitely trained from Python). In order to do this
+         *  we peek into the filestream to see if there is more data after the object
+         *  detector. If there is, it will be the version and upsampling amount. Therefore,
+         *  by default we set the upsampling amount to -1 so that we can catch when no
+         *  upsampling amount has been passed (numbers less than 0). If -1 is passed, we
+         *  assume no upsampling and use 0. If a number > 0 is passed, we use that, else we
+         *  use the upsampling amount saved in the detector file (if it exists).
         */
        unsigned int final_upsampling_amount = 0;
-        const unsigned int cached_upsample_amount = fin.peek();
+        if (fin.peek() != EOF)
+        {
+            int version = 0;
+            deserialize(version, fin);
+            if (version != 1)
+                throw error("Unknown simple_object_detector format.");
+            deserialize(final_upsampling_amount, fin);
+        }
        if (upsample_amount >= 0)
            final_upsampling_amount = upsample_amount;
-        else if (cached_upsample_amount != std::char_traits<wchar_t>::eof())  // peek() returns EOF if no more data
-            deserialize(final_upsampling_amount, fin);

        return test_simple_object_detector_with_images(images, final_upsampling_amount, boxes, ignore, detector);
    }