Added face clustering example to Python API

2024-11-01 10:14:53 +08:00 · 2017-09-06 02:05:42 +08:00 · 2017-09-06 02:05:42 +08:00 · 8b48c09c7b
commit 8b48c09c7b
parent f5a68ded86
2 changed files with 226 additions and 0 deletions
--- a/python_examples/face_clustering.py
+++ b/python_examples/face_clustering.py
@ -0,0 +1,142 @@
+#!/usr/bin/python
+# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+#
+#   This example shows how to use dlib's face recognition tool for clustering using chinese_whispers.
+#   This is useful when you have a collection of photographs which you know are linked to
+#   a particular person, but the person may be photographed with multiple other people.
+#   In this example, we assume the largest cluster will contain photos of the common person in the
+#   collection of photographs. Then, we save extracted images of the face in the largest cluster in
+#   a 150x150 px format which is suitable for jittering and loading to perform metric learning (as shown
+#   in the dnn_metric_learning_on_images_ex.cpp example.
+#   https://github.com/davisking/dlib/blob/master/examples/dnn_metric_learning_on_images_ex.cpp
+#
+# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE
+#   You can install dlib using the command:
+#       pip install dlib
+#
+#   Alternatively, if you want to compile dlib yourself then go into the dlib
+#   root folder and run:
+#       python setup.py install
+#   or
+#       python setup.py install --yes USE_AVX_INSTRUCTIONS
+#   if you have a CPU that supports AVX instructions, since this makes some
+#   things run faster.  This code will also use CUDA if you have CUDA and cuDNN
+#   installed.
+#
+#   Compiling dlib should work on any operating system so long as you have
+#   CMake and boost-python installed.  On Ubuntu, this can be done easily by
+#   running the command:
+#       sudo apt-get install libboost-python-dev cmake
+#
+#   Also note that this example requires scikit-image which can be installed
+#   via the command:
+#       pip install scikit-image
+#   Or downloaded from http://scikit-image.org/download.html. 
+
+import sys
+import os
+import dlib
+import glob
+from skimage import io
+
+if len(sys.argv) != 5:
+    print(
+        "Call this program like this:\n"
+        "   ./face_clustering.py shape_predictor_68_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces output_folder\n"
+        "You can download a trained facial shape predictor and recognition model from:\n"
+        "    http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2\n"
+        "    http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2")
+    exit()
+
+predictor_path = sys.argv[1]
+face_rec_model_path = sys.argv[2]
+faces_folder_path = sys.argv[3]
+output_folder_path = sys.argv[4]
+
+# Load all the models we need: a detector to find the faces, a shape predictor
+# to find face landmarks so we can precisely localize the face, and finally the
+# face recognition model.
+detector = dlib.get_frontal_face_detector()
+sp = dlib.shape_predictor(predictor_path)
+facerec = dlib.face_recognition_model_v1(face_rec_model_path)
+
+descriptors = []
+images = []
+
+# Now process all the images
+for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
+    print("Processing file: {}".format(f))
+    img = io.imread(f)
+
+    # Ask the detector to find the bounding boxes of each face. The 1 in the
+    # second argument indicates that we should upsample the image 1 time. This
+    # will make everything bigger and allow us to detect more faces.
+    dets = detector(img, 1)
+    print("Number of faces detected: {}".format(len(dets)))
+
+    # Now process each face we found.
+    for k, d in enumerate(dets):
+        # Get the landmarks/parts for the face in box d.
+        shape = sp(img, d)
+        # Draw the face landmarks on the screen so we can see what face is currently being processed.
+
+        # Compute the 128D vector that describes the face in img identified by
+        # shape.  In general, if two face descriptor vectors have a Euclidean
+        # distance between them less than 0.6 then they are from the same
+        # person, otherwise they are from different people. Here we just print
+        # the vector to the screen.
+        face_descriptor = facerec.compute_face_descriptor(img, shape)
+        descriptors.append(face_descriptor)
+        images.append((img, shape))
+        # It should also be noted that you can also call this function like this:
+        #  face_descriptor = facerec.compute_face_descriptor(img, shape, 100)
+        # The version of the call without the 100 gets 99.13% accuracy on LFW
+        # while the version with 100 gets 99.38%.  However, the 100 makes the
+        # call 100x slower to execute, so choose whatever version you like.  To
+        # explain a little, the 3rd argument tells the code how many times to
+        # jitter/resample the image.  When you set it to 100 it executes the
+        # face descriptor extraction 100 times on slightly modified versions of
+        # the face and returns the average result.  You could also pick a more
+        # middle value, such as 10, which is only 10x slower but still gets an
+        # LFW accuracy of 99.3%.
+
+labels = facerec.cluster(descriptors)
+label_classes = list(set(labels))
+label_classes.sort()
+num_classes = len(label_classes)
+print("Number of clusters: {}".format(num_classes))
+print("Labels classes: {}".format(str(label_classes)))
+
+# Find biggest class
+biggest_class = None
+biggest_class_length = 0
+for i in range(0, num_classes):
+    class_length = len([label for label in labels if label == i])
+    if class_length > biggest_class_length:
+        biggest_class_length = class_length
+        biggest_class = i
+
+print("Biggest class: {}".format(biggest_class))
+print("Biggest class length: {}".format(biggest_class_length))
+
+# Find the indices for the biggest class
+indices = []
+for i, label in enumerate(labels):
+    if label == biggest_class:
+        indices.append(i)
+
+print("Biggest class indices: {}".format(str(indices)))
+
+# Ensure output directory exists
+if not os.path.isdir(output_folder_path):
+    os.makedirs(output_folder_path)
+
+# Save the extracted faces
+for i, index in enumerate(indices):
+    img, shape = images[index]
+    file_path = os.path.join(output_folder_path, "face_" + str(i))
+    facerec.save_image_chip(img, shape, file_path)
+    
+    
+
+
--- a/tools/python/src/face_recognition.cpp
+++ b/tools/python/src/face_recognition.cpp
@ -9,6 +9,8 @@
 #include <dlib/dnn.h>
 #include <dlib/image_transforms.h>
 #include "indexing.h"
+#include <dlib/image_io.h>
+#include <dlib/clustering.h>


 using namespace dlib;
@ -37,6 +39,79 @@ public:
        cropper->set_max_rotation_degrees(3);
    }

+    boost::python::list cluster(boost::python::list descriptors)
+    {
+        boost::python::list clusters;
+
+        size_t num_descriptors = len(descriptors);
+        
+        // In particular, one simple thing we can do is face clustering.  This next bit of code
+        // creates a graph of connected faces and then uses the Chinese whispers graph clustering
+        // algorithm to identify how many people there are and which faces belong to whom.
+        std::vector<sample_pair> edges;
+        std::vector<unsigned long> labels;
+        for (size_t i = 0; i < num_descriptors; ++i)
+        {
+            for (size_t j = i+1; j < num_descriptors; ++j)
+            {
+                // Faces are connected in the graph if they are close enough.  Here we check if
+                // the distance between two face descriptors is less than 0.6, which is the
+                // decision threshold the network was trained to use.  Although you can
+                // certainly use any other threshold you find useful.
+                matrix<double,0,1> first_descriptor = boost::python::extract<matrix<double,0,1>>(descriptors[i]);
+                matrix<double,0,1> second_descriptor = boost::python::extract<matrix<double,0,1>>(descriptors[j]);
+
+                if (length(first_descriptor-second_descriptor) < 0.6)
+                    edges.push_back(sample_pair(i,j));
+            }
+        }
+        const auto num_clusters = chinese_whispers(edges, labels);
+        for (size_t i = 0; i < labels.size(); ++i)
+        {
+            clusters.append(labels[i]);
+        }
+        return clusters;
+    }
+
+    void save_image_chip (
+        object img,
+        const full_object_detection& face,
+        const std::string& chip_filename
+    )
+    {
+        std::vector<full_object_detection> faces(1, face);
+        save_image_chips(img, faces, chip_filename);
+        return;
+    }
+
+    void save_image_chips (
+        object img,
+        const std::vector<full_object_detection>& faces,
+        const std::string& chip_filename
+    )
+    {
+        int num_faces = faces.size();
+        std::vector<chip_details> dets;
+        for (auto& f : faces)
+            dets.push_back(get_face_chip_details(f, 150, 0.25));
+        dlib::array<matrix<rgb_pixel>> face_chips;
+        extract_image_chips(numpy_rgb_image(img), dets, face_chips);
+        int i=0;
+        for (auto& chip : face_chips) {
+            i++;
+            if(num_faces > 1) 
+            {
+                const std::string& file_name = chip_filename + "_" + std::to_string(i) + ".jpg";
+                save_jpeg(chip, file_name);
+            }
+            else
+            {
+                const std::string& file_name = chip_filename + ".jpg";
+                save_jpeg(chip, file_name);
+            }
+        }
+    }
+
    matrix<double,0,1> compute_face_descriptor (
        object img,
        const full_object_detection& face,
@ -155,6 +230,15 @@ void bind_face_recognition()
        .def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptors, (arg("img"),arg("faces"),arg("num_jitters")=0),
            "Takes an image and an array of full_object_detections that reference faces in that image and converts them into 128D face descriptors.  "
            "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor."
+            )
+        .def("save_image_chip", &face_recognition_model_v1::save_image_chip, (arg("img"),arg("face"),arg("chip_filename")),
+            "Takes an image and a full_object_detection that references a face in that image and saves the face with the specified file name prefix"
+            )
+        .def("save_image_chips", &face_recognition_model_v1::save_image_chips, (arg("img"),arg("faces"),arg("chip_filename")),
+            "Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix"
+            )
+        .def("cluster", &face_recognition_model_v1::cluster, (arg("descriptors")),
+            "Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using chinese_whispers."
            );
    }