Add a python wrapper for using the mmod face detector (#753)

2024-11-01 10:14:53 +08:00 · 2017-08-18 13:30:33 -07:00 · 2017-08-18 13:30:33 -07:00 · b6d2329c5e
commit b6d2329c5e
parent af88b0d56f
4 changed files with 171 additions and 2 deletions
--- a/python_examples/cnn_face_detector.py
+++ b/python_examples/cnn_face_detector.py
@ -0,0 +1,72 @@
+#!/usr/bin/python
+# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+#
+#   This example shows how to run a CNN based face detector using dlib.  The
+#   example loads a pretrained model and uses it to find faces in images.  The
+#   CNN model is much more accurate than the HOG based model shown in the
+#   face_detector.py example, but takes much more computational power to
+#   run, and is meant to be executed on a GPU to attain reasonable speed.
+#
+#   You can download the pre-trained model from:
+#       http://dlib.net/files/mmod_human_face_detector.dat.bz2
+#
+#   The examples/faces folder contains some jpg images of people.  You can run
+#   this program on them and see the detections by executing the
+#   following command:
+#       ./cnn_face_detector.py mmod_human_face_detector.dat ../examples/faces/*.jpg
+#
+#
+# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE
+#   You can install dlib using the command:
+#       pip install dlib
+#
+#   Alternatively, if you want to compile dlib yourself then go into the dlib
+#   root folder and run:
+#       python setup.py install
+#   or
+#       python setup.py install --yes USE_AVX_INSTRUCTIONS --yes DLIB_USE_CUDA
+#   if you have a CPU that supports AVX instructions, you have an Nvidia GPU
+#   and you have CUDA installed since this makes things run *much* faster.
+#
+#   Compiling dlib should work on any operating system so long as you have
+#   CMake and boost-python installed.  On Ubuntu, this can be done easily by
+#   running the command:
+#       sudo apt-get install libboost-python-dev cmake
+#
+#   Also note that this example requires scikit-image which can be installed
+#   via the command:
+#       pip install scikit-image
+#   Or downloaded from http://scikit-image.org/download.html.
+
+import sys
+
+import dlib
+from skimage import io
+
+if len(sys.argv) < 3:
+    print(
+        "Call this program like this:\n"
+        "   ./cnn_face_detector.py mmod_human_face_detector.dat ../examples/faces/*.jpg\n"
+        "You can get the mmod_human_face_detector.dat file from:\n"
+        "    http://dlib.net/files/mmod_human_face_detector.dat.bz2")
+    exit()
+
+cnn_face_detection_model = dlib.cnn_face_detection_model_v1(sys.argv[1])
+win = dlib.image_window()
+
+for f in sys.argv[2:]:
+    print("Processing file: {}".format(f))
+    img = io.imread(f)
+    # The 1 in the second argument indicates that we should upsample the image
+    # 1 time.  This will make everything bigger and allow us to detect more
+    # faces.
+    dets = cnn_face_detection_model.cnn_face_detector(img, 1)
+    print("Number of faces detected: {}".format(len(dets)))
+    for i, d in enumerate(dets):
+        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
+            i, d.left(), d.top(), d.right(), d.bottom()))
+
+    win.clear_overlay()
+    win.set_image(img)
+    win.add_overlay(dets)
+    dlib.hit_enter_to_continue()
--- a/tools/python/CMakeLists.txt
+++ b/tools/python/CMakeLists.txt
@ -26,6 +26,7 @@ set(python_srcs
   src/shape_predictor.cpp
   src/correlation_tracker.cpp
   src/face_recognition.cpp
+   src/cnn_face_detector.cpp
 )

 # Only add the GUI module if requested
@ -35,6 +36,6 @@ endif(NOT ${DLIB_NO_GUI_SUPPORT})

 add_python_module(dlib ${python_srcs})

-# When you run "make install" we will copy the compiled dlib.so (or dlib.pyd) 
+# When you run "make install" we will copy the compiled dlib.so (or dlib.pyd)
 # library file to the python_examples folder.
 install_dlib_to(../../python_examples)
--- a/tools/python/src/cnn_face_detector.cpp
+++ b/tools/python/src/cnn_face_detector.cpp
@ -0,0 +1,95 @@
+// Copyright (C) 2017  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+
+#include <dlib/python.h>
+#include <boost/shared_ptr.hpp>
+#include <dlib/matrix.h>
+#include <boost/python/slice.hpp>
+#include <dlib/geometry/vector.h>
+#include <dlib/dnn.h>
+#include <dlib/image_transforms.h>
+#include "indexing.h"
+
+using namespace dlib;
+using namespace std;
+using namespace boost::python;
+
+typedef matrix<double,0,1> cv;
+
+
+class cnn_face_detection_model_v1
+{
+
+public:
+
+    cnn_face_detection_model_v1(const std::string& model_filename)
+    {
+        deserialize(model_filename) >> net;
+    }
+
+    std::vector<rectangle> cnn_face_detector (
+        object pyimage,
+        const int upsample_num_times
+    )
+    {
+        pyramid_down<2> pyr;
+        std::vector<rectangle> rects;
+
+        // Copy the data into dlib based objects
+        matrix<rgb_pixel> image;
+        if (is_gray_python_image(pyimage))
+            assign_image(image, numpy_gray_image(pyimage));
+        else if (is_rgb_python_image(pyimage))
+            assign_image(image, numpy_rgb_image(pyimage));
+        else
+            throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
+
+        // Upsampling the image will allow us to detect smaller faces but will cause the
+        // program to use more RAM and run longer.
+        unsigned int levels = upsample_num_times;
+        while (levels > 0)
+        {
+            levels--;
+            pyramid_up(image, pyr);
+        }
+
+        auto dets = net(image);
+
+        // Scale the detection locations back to the original image size
+        // if the image was upscaled.
+        for (auto&& d : dets) {
+            d.rect = pyr.rect_down(d.rect, upsample_num_times);
+            rects.push_back(d.rect);
+        }
+
+        return rects;
+    }
+
+private:
+
+    template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
+    template <long num_filters, typename SUBNET> using con5  = con<num_filters,5,5,1,1,SUBNET>;
+
+    template <typename SUBNET> using downsampler  = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>;
+    template <typename SUBNET> using rcon5  = relu<affine<con5<45,SUBNET>>>;
+
+    using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
+
+    net_type net;
+};
+
+
+// ----------------------------------------------------------------------------------------
+
+void bind_cnn_face_detection()
+{
+    using boost::python::arg;
+    {
+    class_<cnn_face_detection_model_v1>("cnn_face_detection_model_v1", "This object detects human faces in an image.  The constructor loads the face detection model from a file. You can download a pre-trained model from http://dlib.net/files/mmod_human_face_detector.dat.bz2.", init<std::string>())
+        .def("cnn_face_detector", &cnn_face_detection_model_v1::cnn_face_detector, (arg("img"), arg("upsample_num_times")=0),
+            "Find faces in an image using a deep learning model.\n\
+          - Upsamples the image upsample_num_times before running the face \n\
+            detector."
+            );
+    }
+}
--- a/tools/python/src/dlib.cpp
+++ b/tools/python/src/dlib.cpp
@ -19,6 +19,7 @@ void bind_object_detection();
 void bind_shape_predictors();
 void bind_correlation_tracker();
 void bind_face_recognition();
+void bind_cnn_face_detection();

 #ifndef DLIB_NO_GUI_SUPPORT
 void bind_gui();
@ -51,8 +52,8 @@ BOOST_PYTHON_MODULE(dlib)
    bind_shape_predictors();
    bind_correlation_tracker();
    bind_face_recognition();
+    bind_cnn_face_detection();
 #ifndef DLIB_NO_GUI_SUPPORT
    bind_gui();
 #endif
 }
-