Merge branch 'visionworkz-master'

2024-11-01 10:14:53 +08:00 · 2017-09-16 14:54:56 -04:00 · 2017-09-16 14:54:56 -04:00 · de32c75c15
commit de32c75c15
parent b4bd6f8dee 532552627a
2 changed files with 210 additions and 0 deletions
--- a/python_examples/face_clustering.py
+++ b/python_examples/face_clustering.py
@ -0,0 +1,126 @@
 #!/usr/bin/python
 # The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
 #
 #   This example shows how to use dlib's face recognition tool for clustering using chinese_whispers.
 #   This is useful when you have a collection of photographs which you know are linked to
 #   a particular person, but the person may be photographed with multiple other people.
 #   In this example, we assume the largest cluster will contain photos of the common person in the
 #   collection of photographs. Then, we save extracted images of the face in the largest cluster in
 #   a 150x150 px format which is suitable for jittering and loading to perform metric learning (as shown
 #   in the dnn_metric_learning_on_images_ex.cpp example.
 #   https://github.com/davisking/dlib/blob/master/examples/dnn_metric_learning_on_images_ex.cpp
 #
 # COMPILING/INSTALLING THE DLIB PYTHON INTERFACE
 #   You can install dlib using the command:
 #       pip install dlib
 #
 #   Alternatively, if you want to compile dlib yourself then go into the dlib
 #   root folder and run:
 #       python setup.py install
 #   or
 #       python setup.py install --yes USE_AVX_INSTRUCTIONS
 #   if you have a CPU that supports AVX instructions, since this makes some
 #   things run faster.  This code will also use CUDA if you have CUDA and cuDNN
 #   installed.
 #
 #   Compiling dlib should work on any operating system so long as you have
 #   CMake and boost-python installed.  On Ubuntu, this can be done easily by
 #   running the command:
 #       sudo apt-get install libboost-python-dev cmake
 #
 #   Also note that this example requires scikit-image which can be installed
 #   via the command:
 #       pip install scikit-image
 #   Or downloaded from http://scikit-image.org/download.html. 
 import sys
 import os
 import dlib
 import glob
 from skimage import io
 if len(sys.argv) != 5:
    print(
        "Call this program like this:\n"
        "   ./face_clustering.py shape_predictor_5_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces output_folder\n"
        "You can download a trained facial shape predictor and recognition model from:\n"
        "    http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2\n"
        "    http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2")
    exit()
 predictor_path = sys.argv[1]
 face_rec_model_path = sys.argv[2]
 faces_folder_path = sys.argv[3]
 output_folder_path = sys.argv[4]
 # Load all the models we need: a detector to find the faces, a shape predictor
 # to find face landmarks so we can precisely localize the face, and finally the
 # face recognition model.
 detector = dlib.get_frontal_face_detector()
 sp = dlib.shape_predictor(predictor_path)
 facerec = dlib.face_recognition_model_v1(face_rec_model_path)
 descriptors = []
 images = []
 # Now find all the faces and compute 128D face descriptors for each face.
 for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
    print("Processing file: {}".format(f))
    img = io.imread(f)
    # Ask the detector to find the bounding boxes of each face. The 1 in the
    # second argument indicates that we should upsample the image 1 time. This
    # will make everything bigger and allow us to detect more faces.
    dets = detector(img, 1)
    print("Number of faces detected: {}".format(len(dets)))
    # Now process each face we found.
    for k, d in enumerate(dets):
        # Get the landmarks/parts for the face in box d.
        shape = sp(img, d)
        # Compute the 128D vector that describes the face in img identified by
        # shape.  
        face_descriptor = facerec.compute_face_descriptor(img, shape)
        descriptors.append(face_descriptor)
        images.append((img, shape))
 # Now let's cluster the faces.  
 labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
 num_classes = len(set(labels))
 print("Number of clusters: {}".format(num_classes))
 # Find biggest class
 biggest_class = None
 biggest_class_length = 0
 for i in range(0, num_classes):
    class_length = len([label for label in labels if label == i])
    if class_length > biggest_class_length:
        biggest_class_length = class_length
        biggest_class = i
 print("Biggest cluster id number: {}".format(biggest_class))
 print("Number of faces in biggest cluster: {}".format(biggest_class_length))
 # Find the indices for the biggest class
 indices = []
 for i, label in enumerate(labels):
    if label == biggest_class:
        indices.append(i)
 print("Indices of images in the biggest cluster: {}".format(str(indices)))
 # Ensure output directory exists
 if not os.path.isdir(output_folder_path):
    os.makedirs(output_folder_path)
 # Save the extracted faces
 print("Saving faces in largest cluster to output folder...")
 for i, index in enumerate(indices):
    img, shape = images[index]
    file_path = os.path.join(output_folder_path, "face_" + str(i))
    dlib.save_face_chip(img, shape, file_path)
--- a/tools/python/src/face_recognition.cpp
+++ b/tools/python/src/face_recognition.cpp
@ -9,6 +9,8 @@
 #include <dlib/dnn.h>
 #include <dlib/image_transforms.h>
 #include "indexing.h"
 #include <dlib/image_io.h>
 #include <dlib/clustering.h>
 using namespace dlib;
@ -140,6 +142,78 @@ private:
    anet_type net;
 };
 // ----------------------------------------------------------------------------------------
 boost::python::list chinese_whispers_clustering(boost::python::list descriptors, float threshold)
 {
    boost::python::list clusters;
    size_t num_descriptors = len(descriptors);
    // This next bit of code creates a graph of connected objects and then uses the Chinese
    // whispers graph clustering algorithm to identify how many objects there are and which
    // objects belong to which cluster.
    std::vector<sample_pair> edges;
    std::vector<unsigned long> labels;
    for (size_t i = 0; i < num_descriptors; ++i)
    {
        for (size_t j = i+1; j < num_descriptors; ++j)
        {
            matrix<double,0,1>& first_descriptor = boost::python::extract<matrix<double,0,1>&>(descriptors[i]);
            matrix<double,0,1>& second_descriptor = boost::python::extract<matrix<double,0,1>&>(descriptors[j]);
            if (length(first_descriptor-second_descriptor) < threshold)
                edges.push_back(sample_pair(i,j));
        }
    }
    const auto num_clusters = chinese_whispers(edges, labels);
    for (size_t i = 0; i < labels.size(); ++i)
    {
        clusters.append(labels[i]);
    }
    return clusters;
 }
 void save_face_chips (
    object img,
    const std::vector<full_object_detection>& faces,
    const std::string& chip_filename
 )
 {
    int num_faces = faces.size();
    std::vector<chip_details> dets;
    for (auto& f : faces)
        dets.push_back(get_face_chip_details(f, 150, 0.25));
    dlib::array<matrix<rgb_pixel>> face_chips;
    extract_image_chips(numpy_rgb_image(img), dets, face_chips);
    int i=0;
    for (auto& chip : face_chips) 
    {
        i++;
        if(num_faces > 1) 
        {
            const std::string& file_name = chip_filename + "_" + std::to_string(i) + ".jpg";
            save_jpeg(chip, file_name);
        }
        else
        {
            const std::string& file_name = chip_filename + ".jpg";
            save_jpeg(chip, file_name);
        }
    }
 }
 void save_face_chip (
    object img,
    const full_object_detection& face,
    const std::string& chip_filename
 )
 {
    std::vector<full_object_detection> faces(1, face);
    save_face_chips(img, faces, chip_filename);
    return;
 }
 // ----------------------------------------------------------------------------------------
@ -158,6 +232,16 @@ void bind_face_recognition()
            );
    }
    def("save_face_chip", &save_face_chip, (arg("img"),arg("face"),arg("chip_filename")),
        "Takes an image and a full_object_detection that references a face in that image and saves the face with the specified file name prefix.  The face will be rotated upright and scaled to 150x150 pixels."
        );
    def("save_face_chips", &save_face_chips, (arg("img"),arg("faces"),arg("chip_filename")),
        "Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix.  The faces will be rotated upright and scaled to 150x150 pixels."
        );
    def("chinese_whispers_clustering", &chinese_whispers_clustering, (arg("descriptors"), arg("threshold")),
        "Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using dlib::chinese_whispers."
        );
    {
    typedef std::vector<full_object_detection> type;
    class_<type>("full_object_detections", "An array of full_object_detection objects.")