diff --git a/python_examples/face_clustering.py b/python_examples/face_clustering.py new file mode 100755 index 000000000..9cfe2ca03 --- /dev/null +++ b/python_examples/face_clustering.py @@ -0,0 +1,142 @@ +#!/usr/bin/python +# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt +# +# This example shows how to use dlib's face recognition tool for clustering using chinese_whispers. +# This is useful when you have a collection of photographs which you know are linked to +# a particular person, but the person may be photographed with multiple other people. +# In this example, we assume the largest cluster will contain photos of the common person in the +# collection of photographs. Then, we save extracted images of the face in the largest cluster in +# a 150x150 px format which is suitable for jittering and loading to perform metric learning (as shown +# in the dnn_metric_learning_on_images_ex.cpp example. +# https://github.com/davisking/dlib/blob/master/examples/dnn_metric_learning_on_images_ex.cpp +# +# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE +# You can install dlib using the command: +# pip install dlib +# +# Alternatively, if you want to compile dlib yourself then go into the dlib +# root folder and run: +# python setup.py install +# or +# python setup.py install --yes USE_AVX_INSTRUCTIONS +# if you have a CPU that supports AVX instructions, since this makes some +# things run faster. This code will also use CUDA if you have CUDA and cuDNN +# installed. +# +# Compiling dlib should work on any operating system so long as you have +# CMake and boost-python installed. On Ubuntu, this can be done easily by +# running the command: +# sudo apt-get install libboost-python-dev cmake +# +# Also note that this example requires scikit-image which can be installed +# via the command: +# pip install scikit-image +# Or downloaded from http://scikit-image.org/download.html. + +import sys +import os +import dlib +import glob +from skimage import io + +if len(sys.argv) != 5: + print( + "Call this program like this:\n" + " ./face_clustering.py shape_predictor_68_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces output_folder\n" + "You can download a trained facial shape predictor and recognition model from:\n" + " http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2\n" + " http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2") + exit() + +predictor_path = sys.argv[1] +face_rec_model_path = sys.argv[2] +faces_folder_path = sys.argv[3] +output_folder_path = sys.argv[4] + +# Load all the models we need: a detector to find the faces, a shape predictor +# to find face landmarks so we can precisely localize the face, and finally the +# face recognition model. +detector = dlib.get_frontal_face_detector() +sp = dlib.shape_predictor(predictor_path) +facerec = dlib.face_recognition_model_v1(face_rec_model_path) + +descriptors = [] +images = [] + +# Now process all the images +for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")): + print("Processing file: {}".format(f)) + img = io.imread(f) + + # Ask the detector to find the bounding boxes of each face. The 1 in the + # second argument indicates that we should upsample the image 1 time. This + # will make everything bigger and allow us to detect more faces. + dets = detector(img, 1) + print("Number of faces detected: {}".format(len(dets))) + + # Now process each face we found. + for k, d in enumerate(dets): + # Get the landmarks/parts for the face in box d. + shape = sp(img, d) + # Draw the face landmarks on the screen so we can see what face is currently being processed. + + # Compute the 128D vector that describes the face in img identified by + # shape. In general, if two face descriptor vectors have a Euclidean + # distance between them less than 0.6 then they are from the same + # person, otherwise they are from different people. Here we just print + # the vector to the screen. + face_descriptor = facerec.compute_face_descriptor(img, shape) + descriptors.append(face_descriptor) + images.append((img, shape)) + # It should also be noted that you can also call this function like this: + # face_descriptor = facerec.compute_face_descriptor(img, shape, 100) + # The version of the call without the 100 gets 99.13% accuracy on LFW + # while the version with 100 gets 99.38%. However, the 100 makes the + # call 100x slower to execute, so choose whatever version you like. To + # explain a little, the 3rd argument tells the code how many times to + # jitter/resample the image. When you set it to 100 it executes the + # face descriptor extraction 100 times on slightly modified versions of + # the face and returns the average result. You could also pick a more + # middle value, such as 10, which is only 10x slower but still gets an + # LFW accuracy of 99.3%. + +labels = facerec.cluster(descriptors) +label_classes = list(set(labels)) +label_classes.sort() +num_classes = len(label_classes) +print("Number of clusters: {}".format(num_classes)) +print("Labels classes: {}".format(str(label_classes))) + +# Find biggest class +biggest_class = None +biggest_class_length = 0 +for i in range(0, num_classes): + class_length = len([label for label in labels if label == i]) + if class_length > biggest_class_length: + biggest_class_length = class_length + biggest_class = i + +print("Biggest class: {}".format(biggest_class)) +print("Biggest class length: {}".format(biggest_class_length)) + +# Find the indices for the biggest class +indices = [] +for i, label in enumerate(labels): + if label == biggest_class: + indices.append(i) + +print("Biggest class indices: {}".format(str(indices))) + +# Ensure output directory exists +if not os.path.isdir(output_folder_path): + os.makedirs(output_folder_path) + +# Save the extracted faces +for i, index in enumerate(indices): + img, shape = images[index] + file_path = os.path.join(output_folder_path, "face_" + str(i)) + facerec.save_image_chip(img, shape, file_path) + + + + diff --git a/tools/python/src/face_recognition.cpp b/tools/python/src/face_recognition.cpp index 59112f8e1..68d7ad34a 100644 --- a/tools/python/src/face_recognition.cpp +++ b/tools/python/src/face_recognition.cpp @@ -9,6 +9,8 @@ #include #include #include "indexing.h" +#include +#include using namespace dlib; @@ -37,6 +39,79 @@ public: cropper->set_max_rotation_degrees(3); } + boost::python::list cluster(boost::python::list descriptors) + { + boost::python::list clusters; + + size_t num_descriptors = len(descriptors); + + // In particular, one simple thing we can do is face clustering. This next bit of code + // creates a graph of connected faces and then uses the Chinese whispers graph clustering + // algorithm to identify how many people there are and which faces belong to whom. + std::vector edges; + std::vector labels; + for (size_t i = 0; i < num_descriptors; ++i) + { + for (size_t j = i+1; j < num_descriptors; ++j) + { + // Faces are connected in the graph if they are close enough. Here we check if + // the distance between two face descriptors is less than 0.6, which is the + // decision threshold the network was trained to use. Although you can + // certainly use any other threshold you find useful. + matrix first_descriptor = boost::python::extract>(descriptors[i]); + matrix second_descriptor = boost::python::extract>(descriptors[j]); + + if (length(first_descriptor-second_descriptor) < 0.6) + edges.push_back(sample_pair(i,j)); + } + } + const auto num_clusters = chinese_whispers(edges, labels); + for (size_t i = 0; i < labels.size(); ++i) + { + clusters.append(labels[i]); + } + return clusters; + } + + void save_image_chip ( + object img, + const full_object_detection& face, + const std::string& chip_filename + ) + { + std::vector faces(1, face); + save_image_chips(img, faces, chip_filename); + return; + } + + void save_image_chips ( + object img, + const std::vector& faces, + const std::string& chip_filename + ) + { + int num_faces = faces.size(); + std::vector dets; + for (auto& f : faces) + dets.push_back(get_face_chip_details(f, 150, 0.25)); + dlib::array> face_chips; + extract_image_chips(numpy_rgb_image(img), dets, face_chips); + int i=0; + for (auto& chip : face_chips) { + i++; + if(num_faces > 1) + { + const std::string& file_name = chip_filename + "_" + std::to_string(i) + ".jpg"; + save_jpeg(chip, file_name); + } + else + { + const std::string& file_name = chip_filename + ".jpg"; + save_jpeg(chip, file_name); + } + } + } + matrix compute_face_descriptor ( object img, const full_object_detection& face, @@ -155,6 +230,15 @@ void bind_face_recognition() .def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptors, (arg("img"),arg("faces"),arg("num_jitters")=0), "Takes an image and an array of full_object_detections that reference faces in that image and converts them into 128D face descriptors. " "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor." + ) + .def("save_image_chip", &face_recognition_model_v1::save_image_chip, (arg("img"),arg("face"),arg("chip_filename")), + "Takes an image and a full_object_detection that references a face in that image and saves the face with the specified file name prefix" + ) + .def("save_image_chips", &face_recognition_model_v1::save_image_chips, (arg("img"),arg("faces"),arg("chip_filename")), + "Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix" + ) + .def("cluster", &face_recognition_model_v1::cluster, (arg("descriptors")), + "Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using chinese_whispers." ); }