diff --git a/python_examples/face_clustering.py b/python_examples/face_clustering.py index c5427cd4c..96515a780 100755 --- a/python_examples/face_clustering.py +++ b/python_examples/face_clustering.py @@ -42,9 +42,9 @@ from skimage import io if len(sys.argv) != 5: print( "Call this program like this:\n" - " ./face_clustering.py shape_predictor_68_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces output_folder\n" + " ./face_clustering.py shape_predictor_5_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces output_folder\n" "You can download a trained facial shape predictor and recognition model from:\n" - " http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2\n" + " http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2\n" " http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2") exit() @@ -63,7 +63,7 @@ facerec = dlib.face_recognition_model_v1(face_rec_model_path) descriptors = [] images = [] -# Now process all the images +# Now find all the faces and compute 128D face descriptors for each face. for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")): print("Processing file: {}".format(f)) img = io.imread(f) @@ -78,34 +78,17 @@ for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")): for k, d in enumerate(dets): # Get the landmarks/parts for the face in box d. shape = sp(img, d) - # Draw the face landmarks on the screen so we can see what face is currently being processed. # Compute the 128D vector that describes the face in img identified by - # shape. In general, if two face descriptor vectors have a Euclidean - # distance between them less than 0.6 then they are from the same - # person, otherwise they are from different people. Here we just print - # the vector to the screen. + # shape. face_descriptor = facerec.compute_face_descriptor(img, shape) descriptors.append(face_descriptor) images.append((img, shape)) - # It should also be noted that you can also call this function like this: - # face_descriptor = facerec.compute_face_descriptor(img, shape, 100) - # The version of the call without the 100 gets 99.13% accuracy on LFW - # while the version with 100 gets 99.38%. However, the 100 makes the - # call 100x slower to execute, so choose whatever version you like. To - # explain a little, the 3rd argument tells the code how many times to - # jitter/resample the image. When you set it to 100 it executes the - # face descriptor extraction 100 times on slightly modified versions of - # the face and returns the average result. You could also pick a more - # middle value, such as 10, which is only 10x slower but still gets an - # LFW accuracy of 99.3%. -labels = facerec.cluster(descriptors, 0.5) -label_classes = list(set(labels)) -label_classes.sort() -num_classes = len(label_classes) +# Now let's cluster the faces. +labels = dlib.chinese_whispers_clustering(descriptors, 0.5) +num_classes = len(set(labels)) print("Number of clusters: {}".format(num_classes)) -print("Labels classes: {}".format(str(label_classes))) # Find biggest class biggest_class = None @@ -116,8 +99,8 @@ for i in range(0, num_classes): biggest_class_length = class_length biggest_class = i -print("Biggest class: {}".format(biggest_class)) -print("Biggest class length: {}".format(biggest_class_length)) +print("Biggest cluster id number: {}".format(biggest_class)) +print("Number of faces in biggest cluster: {}".format(biggest_class_length)) # Find the indices for the biggest class indices = [] @@ -125,17 +108,18 @@ for i, label in enumerate(labels): if label == biggest_class: indices.append(i) -print("Biggest class indices: {}".format(str(indices))) +print("Indices of images in the biggest cluster: {}".format(str(indices))) # Ensure output directory exists if not os.path.isdir(output_folder_path): os.makedirs(output_folder_path) # Save the extracted faces +print("Saving faces in largest cluster to output folder...") for i, index in enumerate(indices): img, shape = images[index] file_path = os.path.join(output_folder_path, "face_" + str(i)) - facerec.save_image_chip(img, shape, file_path) + dlib.save_face_chip(img, shape, file_path) diff --git a/tools/python/src/face_recognition.cpp b/tools/python/src/face_recognition.cpp index 27e754317..f3c8204ef 100644 --- a/tools/python/src/face_recognition.cpp +++ b/tools/python/src/face_recognition.cpp @@ -39,79 +39,6 @@ public: cropper->set_max_rotation_degrees(3); } - boost::python::list cluster(boost::python::list descriptors, float threshold) - { - boost::python::list clusters; - - size_t num_descriptors = len(descriptors); - - // In particular, one simple thing we can do is face clustering. This next bit of code - // creates a graph of connected faces and then uses the Chinese whispers graph clustering - // algorithm to identify how many people there are and which faces belong to whom. - std::vector edges; - std::vector labels; - for (size_t i = 0; i < num_descriptors; ++i) - { - for (size_t j = i+1; j < num_descriptors; ++j) - { - // Faces are connected in the graph if they are close enough. Here we check if - // the distance between two face descriptors is less than 0.6, which is the - // decision threshold the network was trained to use. Although you can - // certainly use any other threshold you find useful. - matrix first_descriptor = boost::python::extract>(descriptors[i]); - matrix second_descriptor = boost::python::extract>(descriptors[j]); - - if (length(first_descriptor-second_descriptor) < threshold) - edges.push_back(sample_pair(i,j)); - } - } - const auto num_clusters = chinese_whispers(edges, labels); - for (size_t i = 0; i < labels.size(); ++i) - { - clusters.append(labels[i]); - } - return clusters; - } - - void save_image_chip ( - object img, - const full_object_detection& face, - const std::string& chip_filename - ) - { - std::vector faces(1, face); - save_image_chips(img, faces, chip_filename); - return; - } - - void save_image_chips ( - object img, - const std::vector& faces, - const std::string& chip_filename - ) - { - int num_faces = faces.size(); - std::vector dets; - for (auto& f : faces) - dets.push_back(get_face_chip_details(f, 150, 0.25)); - dlib::array> face_chips; - extract_image_chips(numpy_rgb_image(img), dets, face_chips); - int i=0; - for (auto& chip : face_chips) { - i++; - if(num_faces > 1) - { - const std::string& file_name = chip_filename + "_" + std::to_string(i) + ".jpg"; - save_jpeg(chip, file_name); - } - else - { - const std::string& file_name = chip_filename + ".jpg"; - save_jpeg(chip, file_name); - } - } - } - matrix compute_face_descriptor ( object img, const full_object_detection& face, @@ -215,6 +142,78 @@ private: anet_type net; }; +// ---------------------------------------------------------------------------------------- + +boost::python::list chinese_whispers_clustering(boost::python::list descriptors, float threshold) +{ + boost::python::list clusters; + + size_t num_descriptors = len(descriptors); + + // This next bit of code creates a graph of connected objects and then uses the Chinese + // whispers graph clustering algorithm to identify how many objects there are and which + // objects belong to which cluster. + std::vector edges; + std::vector labels; + for (size_t i = 0; i < num_descriptors; ++i) + { + for (size_t j = i+1; j < num_descriptors; ++j) + { + matrix& first_descriptor = boost::python::extract&>(descriptors[i]); + matrix& second_descriptor = boost::python::extract&>(descriptors[j]); + + if (length(first_descriptor-second_descriptor) < threshold) + edges.push_back(sample_pair(i,j)); + } + } + const auto num_clusters = chinese_whispers(edges, labels); + for (size_t i = 0; i < labels.size(); ++i) + { + clusters.append(labels[i]); + } + return clusters; +} + +void save_face_chips ( + object img, + const std::vector& faces, + const std::string& chip_filename +) +{ + int num_faces = faces.size(); + std::vector dets; + for (auto& f : faces) + dets.push_back(get_face_chip_details(f, 150, 0.25)); + dlib::array> face_chips; + extract_image_chips(numpy_rgb_image(img), dets, face_chips); + int i=0; + for (auto& chip : face_chips) + { + i++; + if(num_faces > 1) + { + const std::string& file_name = chip_filename + "_" + std::to_string(i) + ".jpg"; + save_jpeg(chip, file_name); + } + else + { + const std::string& file_name = chip_filename + ".jpg"; + save_jpeg(chip, file_name); + } + } +} + +void save_face_chip ( + object img, + const full_object_detection& face, + const std::string& chip_filename +) +{ + std::vector faces(1, face); + save_face_chips(img, faces, chip_filename); + return; +} + // ---------------------------------------------------------------------------------------- @@ -230,18 +229,19 @@ void bind_face_recognition() .def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptors, (arg("img"),arg("faces"),arg("num_jitters")=0), "Takes an image and an array of full_object_detections that reference faces in that image and converts them into 128D face descriptors. " "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor." - ) - .def("save_image_chip", &face_recognition_model_v1::save_image_chip, (arg("img"),arg("face"),arg("chip_filename")), - "Takes an image and a full_object_detection that references a face in that image and saves the face with the specified file name prefix" - ) - .def("save_image_chips", &face_recognition_model_v1::save_image_chips, (arg("img"),arg("faces"),arg("chip_filename")), - "Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix" - ) - .def("cluster", &face_recognition_model_v1::cluster, (arg("descriptors"), arg("threshold")), - "Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using chinese_whispers." ); } + def("save_face_chip", &save_face_chip, (arg("img"),arg("face"),arg("chip_filename")), + "Takes an image and a full_object_detection that references a face in that image and saves the face with the specified file name prefix. The face will be rotated upright and scaled to 150x150 pixels." + ); + def("save_face_chips", &save_face_chips, (arg("img"),arg("faces"),arg("chip_filename")), + "Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix. The faces will be rotated upright and scaled to 150x150 pixels." + ); + def("chinese_whispers_clustering", &chinese_whispers_clustering, (arg("descriptors"), arg("threshold")), + "Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using dlib::chinese_whispers." + ); + { typedef std::vector type; class_("full_object_detections", "An array of full_object_detection objects.")