Add a python wrapper for using the mmod face detector (#753)

This commit is contained in:
Adam Geitgey 2017-08-18 13:30:33 -07:00 committed by Davis E. King
parent af88b0d56f
commit b6d2329c5e
4 changed files with 171 additions and 2 deletions

View File

@ -0,0 +1,72 @@
#!/usr/bin/python
# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
#
# This example shows how to run a CNN based face detector using dlib. The
# example loads a pretrained model and uses it to find faces in images. The
# CNN model is much more accurate than the HOG based model shown in the
# face_detector.py example, but takes much more computational power to
# run, and is meant to be executed on a GPU to attain reasonable speed.
#
# You can download the pre-trained model from:
# http://dlib.net/files/mmod_human_face_detector.dat.bz2
#
# The examples/faces folder contains some jpg images of people. You can run
# this program on them and see the detections by executing the
# following command:
# ./cnn_face_detector.py mmod_human_face_detector.dat ../examples/faces/*.jpg
#
#
# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE
# You can install dlib using the command:
# pip install dlib
#
# Alternatively, if you want to compile dlib yourself then go into the dlib
# root folder and run:
# python setup.py install
# or
# python setup.py install --yes USE_AVX_INSTRUCTIONS --yes DLIB_USE_CUDA
# if you have a CPU that supports AVX instructions, you have an Nvidia GPU
# and you have CUDA installed since this makes things run *much* faster.
#
# Compiling dlib should work on any operating system so long as you have
# CMake and boost-python installed. On Ubuntu, this can be done easily by
# running the command:
# sudo apt-get install libboost-python-dev cmake
#
# Also note that this example requires scikit-image which can be installed
# via the command:
# pip install scikit-image
# Or downloaded from http://scikit-image.org/download.html.
import sys
import dlib
from skimage import io
if len(sys.argv) < 3:
print(
"Call this program like this:\n"
" ./cnn_face_detector.py mmod_human_face_detector.dat ../examples/faces/*.jpg\n"
"You can get the mmod_human_face_detector.dat file from:\n"
" http://dlib.net/files/mmod_human_face_detector.dat.bz2")
exit()
cnn_face_detection_model = dlib.cnn_face_detection_model_v1(sys.argv[1])
win = dlib.image_window()
for f in sys.argv[2:]:
print("Processing file: {}".format(f))
img = io.imread(f)
# The 1 in the second argument indicates that we should upsample the image
# 1 time. This will make everything bigger and allow us to detect more
# faces.
dets = cnn_face_detection_model.cnn_face_detector(img, 1)
print("Number of faces detected: {}".format(len(dets)))
for i, d in enumerate(dets):
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
i, d.left(), d.top(), d.right(), d.bottom()))
win.clear_overlay()
win.set_image(img)
win.add_overlay(dets)
dlib.hit_enter_to_continue()

View File

@ -26,6 +26,7 @@ set(python_srcs
src/shape_predictor.cpp
src/correlation_tracker.cpp
src/face_recognition.cpp
src/cnn_face_detector.cpp
)
# Only add the GUI module if requested
@ -35,6 +36,6 @@ endif(NOT ${DLIB_NO_GUI_SUPPORT})
add_python_module(dlib ${python_srcs})
# When you run "make install" we will copy the compiled dlib.so (or dlib.pyd)
# When you run "make install" we will copy the compiled dlib.so (or dlib.pyd)
# library file to the python_examples folder.
install_dlib_to(../../python_examples)

View File

@ -0,0 +1,95 @@
// Copyright (C) 2017 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/python.h>
#include <boost/shared_ptr.hpp>
#include <dlib/matrix.h>
#include <boost/python/slice.hpp>
#include <dlib/geometry/vector.h>
#include <dlib/dnn.h>
#include <dlib/image_transforms.h>
#include "indexing.h"
using namespace dlib;
using namespace std;
using namespace boost::python;
typedef matrix<double,0,1> cv;
class cnn_face_detection_model_v1
{
public:
cnn_face_detection_model_v1(const std::string& model_filename)
{
deserialize(model_filename) >> net;
}
std::vector<rectangle> cnn_face_detector (
object pyimage,
const int upsample_num_times
)
{
pyramid_down<2> pyr;
std::vector<rectangle> rects;
// Copy the data into dlib based objects
matrix<rgb_pixel> image;
if (is_gray_python_image(pyimage))
assign_image(image, numpy_gray_image(pyimage));
else if (is_rgb_python_image(pyimage))
assign_image(image, numpy_rgb_image(pyimage));
else
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
// Upsampling the image will allow us to detect smaller faces but will cause the
// program to use more RAM and run longer.
unsigned int levels = upsample_num_times;
while (levels > 0)
{
levels--;
pyramid_up(image, pyr);
}
auto dets = net(image);
// Scale the detection locations back to the original image size
// if the image was upscaled.
for (auto&& d : dets) {
d.rect = pyr.rect_down(d.rect, upsample_num_times);
rects.push_back(d.rect);
}
return rects;
}
private:
template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2,2,SUBNET>;
template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>;
template <typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16,SUBNET>>>>>>>>>;
template <typename SUBNET> using rcon5 = relu<affine<con5<45,SUBNET>>>;
using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
net_type net;
};
// ----------------------------------------------------------------------------------------
void bind_cnn_face_detection()
{
using boost::python::arg;
{
class_<cnn_face_detection_model_v1>("cnn_face_detection_model_v1", "This object detects human faces in an image. The constructor loads the face detection model from a file. You can download a pre-trained model from http://dlib.net/files/mmod_human_face_detector.dat.bz2.", init<std::string>())
.def("cnn_face_detector", &cnn_face_detection_model_v1::cnn_face_detector, (arg("img"), arg("upsample_num_times")=0),
"Find faces in an image using a deep learning model.\n\
- Upsamples the image upsample_num_times before running the face \n\
detector."
);
}
}

View File

@ -19,6 +19,7 @@ void bind_object_detection();
void bind_shape_predictors();
void bind_correlation_tracker();
void bind_face_recognition();
void bind_cnn_face_detection();
#ifndef DLIB_NO_GUI_SUPPORT
void bind_gui();
@ -51,8 +52,8 @@ BOOST_PYTHON_MODULE(dlib)
bind_shape_predictors();
bind_correlation_tracker();
bind_face_recognition();
bind_cnn_face_detection();
#ifndef DLIB_NO_GUI_SUPPORT
bind_gui();
#endif
}