Exposed jitter_image in Python and added an example (#980)

* Exposed jitter_image in Python and added an example * Return Numpy array directly * Require numpy during setup * Added install of Numpy before builds * Changed pip install for user only due to security issues. * Removed malloc * Made presence of Numpy during compile optional. * Conflict * Refactored get_face_chip/get_face_chips to use Numpy as well.
2024-11-01 10:14:53 +08:00 · 2017-12-08 22:59:27 +08:00 · 2017-12-08 22:59:27 +08:00 · ac292309c1
commit ac292309c1
parent a865e63552
12 changed files with 364 additions and 83 deletions
--- a/appveyor.yml
+++ b/appveyor.yml
@ -6,6 +6,7 @@ build_script:
  # build test
  - mkdir %APPVEYOR_BUILD_FOLDER%\build_test
  - cd %APPVEYOR_BUILD_FOLDER%\build_test
+  - pip install numpy
  - cmake -G "Visual Studio 14 2015 Win64" ../dlib/test
  - cmake --build . --config %CONFIGURATION% 

--- a/dlib/travis/build-and-test.sh
+++ b/dlib/travis/build-and-test.sh
@ -16,6 +16,7 @@ if [ "$VARIANT" = "examples" ]; then
 fi

 if [ "$VARIANT" = "python-api" ]; then
+  pip install --user numpy
  ../cmake/bin/cmake ../tools/python -DCMAKE_BUILD_TYPE=Release
  ../cmake/bin/cmake --build . --target install -- -j 2

--- a/examples/faces/Tom_Cruise_avp_2014_4.jpg
+++ b/examples/faces/Tom_Cruise_avp_2014_4.jpg
--- a/python_examples/face_alignment.py
+++ b/python_examples/face_alignment.py
@ -77,15 +77,13 @@ for detection in dets:
 # images = dlib.get_face_chips(img, faces, size=160, padding=0.25)
 images = dlib.get_face_chips(img, faces, size=320)
 for image in images:
-    cv_rgb_image = np.array(image).astype(np.uint8)
-    cv_bgr_img = cv2.cvtColor(cv_rgb_image, cv2.COLOR_RGB2BGR)
+    cv_bgr_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    cv2.imshow('image',cv_bgr_img)
    cv2.waitKey(0)

 # It is also possible to get a single chip
 image = dlib.get_face_chip(img, faces[0])
-cv_rgb_image = np.array(image).astype(np.uint8)
-cv_bgr_img = cv2.cvtColor(cv_rgb_image, cv2.COLOR_RGB2BGR)
+cv_bgr_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
 cv2.imshow('image',cv_bgr_img)
 cv2.waitKey(0)

--- a/python_examples/face_jitter.py
+++ b/python_examples/face_jitter.py
@ -0,0 +1,96 @@
+#!/usr/bin/python
+# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+#
+#   This example shows how faces are jittered and data augmentation using dlib's disturb_colors
+#   takes place during the training of a face recognition model using metric learning.
+#
+# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE
+#   You can install dlib using the command:
+#       pip install dlib
+#
+#   Alternatively, if you want to compile dlib yourself then go into the dlib
+#   root folder and run:
+#       python setup.py install
+#   or
+#       python setup.py install --yes USE_AVX_INSTRUCTIONS
+#   if you have a CPU that supports AVX instructions, since this makes some
+#   things run faster.  This code will also use CUDA if you have CUDA and cuDNN
+#   installed.
+#
+#   Compiling dlib should work on any operating system so long as you have
+#   CMake and boost-python installed.  On Ubuntu, this can be done easily by
+#   running the command:
+#       sudo apt-get install libboost-python-dev cmake
+#
+#   Also note that this example requires OpenCV and Numpy which can be installed
+#   via the command:
+#       pip install opencv-python numpy
+#   Or downloaded from http://opencv.org/releases.html
+#
+#   The image file used in this example is in the public domain:
+#   https://commons.wikimedia.org/wiki/File:Tom_Cruise_avp_2014_4.jpg
+import sys
+
+import dlib
+import cv2
+import numpy as np
+
+def show_jittered_images(jittered_images):
+    '''
+        Shows the specified jittered images one by one
+    '''
+    for img in jittered_images:
+        cv_bgr_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        cv2.imshow('image',cv_bgr_img)
+        cv2.waitKey(0)
+
+if len(sys.argv) != 2:
+    print(
+        "Call this program like this:\n"
+        "   ./face_jitter.py shape_predictor_5_face_landmarks.dat\n"
+        "You can download a trained facial shape predictor from:\n"
+        "    http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2\n")
+    exit()
+
+predictor_path = sys.argv[1]
+face_file_path = "../examples/faces/Tom_Cruise_avp_2014_4.jpg"
+
+# Load all the models we need: a detector to find the faces, a shape predictor
+# to find face landmarks so we can precisely localize the face
+detector = dlib.get_frontal_face_detector()
+sp = dlib.shape_predictor(predictor_path)
+
+# Load the image using OpenCV
+bgr_img = cv2.imread(face_file_path)
+if bgr_img is None:
+    print("Sorry, we could not load '{}' as an image".format(face_file_path))
+    exit()
+
+# Convert to RGB since dlib uses RGB images
+img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB)
+
+# Ask the detector to find the bounding boxes of each face.
+dets = detector(img)
+
+num_faces = len(dets)
+
+# Find the 5 face landmarks we need to do the alignment.
+faces = dlib.full_object_detections()
+for detection in dets:
+    faces.append(sp(img, detection))
+
+# Get the aligned face image and show it
+image = dlib.get_face_chip(img, faces[0], size=320)
+cv_rgb_image = np.array(image).astype(np.uint8)
+cv_bgr_img = cv2.cvtColor(cv_rgb_image, cv2.COLOR_RGB2BGR)
+cv2.imshow('image',cv_bgr_img)
+cv2.waitKey(0)
+
+# Show 5 jittered images without data augmentation
+jittered_images = dlib.jitter_image(cv_rgb_image, num_jitters=5)
+show_jittered_images(jittered_images)
+
+# Show 5 jittered images with data augmentation
+jittered_images = dlib.jitter_image(cv_rgb_image, num_jitters=5, disturb_colors=True)
+show_jittered_images(jittered_images)
+cv2.destroyAllWindows()
--- a/setup.py
+++ b/setup.py
@ -53,6 +53,8 @@ import signal
 from threading import Thread
 import time
 import re
+import pkg_resources
+import textwrap


 # change directory to this module path
@ -613,6 +615,21 @@ class build_ext(_build_ext):
        # cmake will do the heavy lifting, just pick up the fruits of its labour
        pass

+def is_installed(requirement):
+    try:
+        pkg_resources.require(requirement)
+    except pkg_resources.ResolutionError:
+        return False
+    else:
+        return True
+
+if not is_installed('numpy>=1.5.1'):
+    print(textwrap.dedent("""
+            Warning: Functions that return numpy arrays need Numpy (>= v1.5.1) installed!
+            You can install numpy and then run this setup again:
+            $ pip install numpy
+            """), file=sys.stderr)
+
 setup(
    name='dlib',
    version=read_version(),
--- a/tools/python/CMakeLists.txt
+++ b/tools/python/CMakeLists.txt
@ -8,6 +8,22 @@ add_definitions(-DDLIB_NO_ABORT_ON_2ND_FATAL_ERROR)

 include(../../dlib/cmake_utils/add_python_module)

+# Test for numpy
+FIND_PACKAGE(PythonInterp)
+IF(PYTHONINTERP_FOUND)
+    EXECUTE_PROCESS( COMMAND ${PYTHON_EXECUTABLE} -c "import numpy" OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NUMPYRC)
+    IF(NUMPYRC EQUAL 1)
+        MESSAGE(WARNING "Numpy not found. Functions that return numpy arrays will throw exceptions!")
+    ELSE(NUMPYRC EQUAL 1)
+        MESSAGE(STATUS "Found Python with installed numpy package")
+        EXECUTE_PROCESS( COMMAND ${PYTHON_EXECUTABLE} -c "import sys; from numpy import get_include; sys.stdout.write(get_include())" OUTPUT_VARIABLE NUMPY_INCLUDE_PATH)
+        MESSAGE(STATUS "Numpy include path '${NUMPY_INCLUDE_PATH}'")
+        include_directories(${NUMPY_INCLUDE_PATH})
+    ENDIF(NUMPYRC EQUAL 1)
+ELSE(PYTHONINTERP_FOUND)
+    MESSAGE(FATAL_ERROR "Could not find Python interpreter")
+ENDIF(PYTHONINTERP_FOUND)
+
 add_definitions(-DDLIB_VERSION=${DLIB_VERSION})

 # Tell cmake to compile all these cpp files into a dlib python module.
@ -33,6 +49,17 @@ set(python_srcs
   src/global_optimization.cpp
 )

+# Only add the Numpy returning functions if Numpy is present
+IF(NUMPYRC EQUAL 1)
+    list(APPEND python_srcs src/numpy_returns_stub.cpp)
+ELSE(NUMPYRC EQUAL 1)
+    list(APPEND python_srcs src/numpy_returns.cpp)
+ENDIF(NUMPYRC EQUAL 1)
+
+if(NOT ${DLIB_NO_GUI_SUPPORT})
+   list(APPEND python_srcs src/gui.cpp)
+endif(NOT ${DLIB_NO_GUI_SUPPORT})
+
 # Only add the GUI module if requested
 if(NOT ${DLIB_NO_GUI_SUPPORT})
   list(APPEND python_srcs src/gui.cpp)
--- a/tools/python/src/dlib.cpp
+++ b/tools/python/src/dlib.cpp
@ -21,6 +21,7 @@ void bind_correlation_tracker();
 void bind_face_recognition();
 void bind_cnn_face_detection();
 void bind_global_optimization();
+void bind_numpy_returns();

 #ifndef DLIB_NO_GUI_SUPPORT
 void bind_gui();
@ -55,6 +56,7 @@ BOOST_PYTHON_MODULE(dlib)
    bind_face_recognition();
    bind_cnn_face_detection();
    bind_global_optimization();
+    bind_numpy_returns();
 #ifndef DLIB_NO_GUI_SUPPORT
    bind_gui();
 #endif
--- a/tools/python/src/face_recognition.cpp
+++ b/tools/python/src/face_recognition.cpp
@ -209,76 +209,6 @@ void save_face_chip (
 BOOST_PYTHON_FUNCTION_OVERLOADS(save_face_chip_with_defaults, save_face_chip, 3, 5)
 BOOST_PYTHON_FUNCTION_OVERLOADS(save_face_chips_with_defaults, save_face_chips, 3, 5)

-// ----------------------------------------------------------------------------------------
-
-boost::python::list get_face_chips (
-    object img,
-    const std::vector<full_object_detection>& faces,
-    size_t size = 150,
-    float padding = 0.25
-)
-{
-    if (!is_rgb_python_image(img))
-        throw dlib::error("Unsupported image type, must be RGB image.");
-
-    if (faces.size() < 1) {
-        throw dlib::error("No face were specified in the faces array.");
-    }
-
-    boost::python::list chips_list;
-
-    std::vector<chip_details> dets;
-    for (auto& f : faces)
-        dets.push_back(get_face_chip_details(f, size, padding));
-    dlib::array<matrix<rgb_pixel>> face_chips;
-    extract_image_chips(numpy_rgb_image(img), dets, face_chips);
-
-    for (auto& chip : face_chips) 
-    {
-        boost::python::list img;
-        
-        for(size_t row=0; row<size; row++) {
-            boost::python::list row_list;
-            for(size_t col=0; col<size; col++) {
-                rgb_pixel pixel = chip(row, col);
-                boost::python::list item;
-                
-                item.append(pixel.red);
-                item.append(pixel.green);
-                item.append(pixel.blue);
-                row_list.append(item);
-            }
-            img.append(row_list);
-        }
-
-        chips_list.append(img);
-    }
-    return chips_list;
-}
-
-boost::python::list get_face_chip (
-    object img,
-    const full_object_detection& face,
-    size_t size = 150,
-    float padding = 0.25
-)
-{
-    std::vector<full_object_detection> faces(1, face);
-    boost::python::list result = get_face_chips(img, faces, size, padding);
-    size_t num_images = boost::python::len(result);
-    if(num_images == 1) {
-        return boost::python::extract<boost::python::list>(result[0]); 
-    } else {
-        throw dlib::error("No face chips found!");
-    }
-}
-
-BOOST_PYTHON_FUNCTION_OVERLOADS(get_face_chip_with_defaults, get_face_chip, 2, 4)
-BOOST_PYTHON_FUNCTION_OVERLOADS(get_face_chips_with_defaults, get_face_chips, 2, 4)
-
-
-// ----------------------------------------------------------------------------------------
-
 void bind_face_recognition()
 {
    using boost::python::arg;
@ -302,14 +232,6 @@ void bind_face_recognition()
 	"Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix.  The faces will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.",
 	(arg("img"), arg("faces"), arg("chip_filename"), arg("size"), arg("padding"))
    ));
-    def("get_face_chip", &get_face_chip, get_face_chip_with_defaults(
-	"Takes an image and a full_object_detection that references a face in that image and returns the face as a list of lists representing the image.  The face will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.", 
-	(arg("img"), arg("face"), arg("size"), arg("padding"))
-    ));
-    def("get_face_chips", &get_face_chips, get_face_chips_with_defaults(
-	"Takes an image and a full_object_detections object that reference faces in that image and returns the faces as a list of list of lists representing the image.  The faces will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.",
-	(arg("img"), arg("faces"), arg("size"), arg("padding"))
-    ));
    def("chinese_whispers_clustering", &chinese_whispers_clustering, (arg("descriptors"), arg("threshold")),
        "Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using dlib::chinese_whispers."
        );
--- a/tools/python/src/image.cpp
+++ b/tools/python/src/image.cpp
@ -1,6 +1,7 @@
 #include <dlib/python.h>
 #include <boost/python/args.hpp>
 #include "dlib/pixel.h"
+#include <dlib/image_transforms.h>

 using namespace dlib;
 using namespace std;
@ -25,10 +26,10 @@ string print_rgb_pixel_repr(const rgb_pixel& p)
 }

 // ----------------------------------------------------------------------------------------
+
 void bind_image_classes()
 {
    using boost::python::arg;
-
    class_<rgb_pixel>("rgb_pixel")
        .def(init<unsigned char,unsigned char,unsigned char>( (arg("red"),arg("green"),arg("blue")) ))
        .def("__str__", &print_rgb_pixel_str)
--- a/tools/python/src/numpy_returns.cpp
+++ b/tools/python/src/numpy_returns.cpp
@ -0,0 +1,149 @@
+#include <dlib/python.h>
+#include <boost/python/args.hpp>
+#include "dlib/pixel.h"
+#include <dlib/image_transforms.h>
+
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#include <numpy/ndarrayobject.h>
+
+dlib::rand rnd_jitter;
+
+using namespace dlib;
+using namespace std;
+using namespace boost::python;
+
+// ----------------------------------------------------------------------------------------
+
+boost::python::list get_jitter_images(object img, size_t num_jitters = 1, bool disturb_colors = false)
+{
+    if (!is_rgb_python_image(img))
+        throw dlib::error("Unsupported image type, must be RGB image.");
+
+    // Convert the image to matrix<rgb_pixel> for processing
+    matrix<rgb_pixel> img_mat;
+    assign_image(img_mat, numpy_rgb_image(img));
+
+    // The top level list (containing 1 or more images) to return to python
+    boost::python::list jitter_list;
+
+    size_t rows = num_rows(img_mat);
+    size_t cols = num_columns(img_mat);
+
+    // Size of the numpy array
+    npy_intp dims[3] = { num_rows(img_mat), num_columns(img_mat), 3};
+
+    for (int i = 0; i < num_jitters; ++i) {
+        // Get a jittered crop
+        matrix<rgb_pixel> crop = dlib::jitter_image(img_mat, rnd_jitter);
+        // If required disturb colors of the image
+        if(disturb_colors)
+            dlib::disturb_colors(crop, rnd_jitter);
+        
+        PyObject *arr = PyArray_SimpleNew(3, dims, NPY_UINT8);
+        npy_uint8 *outdata = (npy_uint8 *) PyArray_DATA((PyArrayObject*) arr);
+        memcpy(outdata, image_data(crop), rows * width_step(crop));
+                
+        boost::python::handle<> handle(arr);
+        // Append image to jittered image list
+        jitter_list.append(object(handle));
+    }
+           
+    return jitter_list;
+}
+
+BOOST_PYTHON_FUNCTION_OVERLOADS(get_jitter_images_with_defaults, get_jitter_images, 1, 3)
+
+// ----------------------------------------------------------------------------------------
+
+boost::python::list get_face_chips (
+    object img,
+    const std::vector<full_object_detection>& faces,
+    size_t size = 150,
+    float padding = 0.25
+)
+{
+    if (!is_rgb_python_image(img))
+        throw dlib::error("Unsupported image type, must be RGB image.");
+
+    if (faces.size() < 1) {
+        throw dlib::error("No face were specified in the faces array.");
+    }
+
+    boost::python::list chips_list;
+
+    std::vector<chip_details> dets;
+    for (auto& f : faces)
+        dets.push_back(get_face_chip_details(f, size, padding));
+    dlib::array<matrix<rgb_pixel>> face_chips;
+    extract_image_chips(numpy_rgb_image(img), dets, face_chips);
+
+    npy_intp rows = size;
+    npy_intp cols = size;
+
+    // Size of the numpy array
+    npy_intp dims[3] = { rows, cols, 3};
+
+    for (auto& chip : face_chips) 
+    {
+        PyObject *arr = PyArray_SimpleNew(3, dims, NPY_UINT8);
+        npy_uint8 *outdata = (npy_uint8 *) PyArray_DATA((PyArrayObject*) arr);
+        memcpy(outdata, image_data(chip), rows * width_step(chip));
+        boost::python::handle<> handle(arr);
+
+        // Append image to chips list
+        chips_list.append(object(handle));
+    }
+    return chips_list;
+}
+
+object get_face_chip (
+    object img,
+    const full_object_detection& face,
+    size_t size = 150,
+    float padding = 0.25
+)
+{
+    if (!is_rgb_python_image(img))
+        throw dlib::error("Unsupported image type, must be RGB image.");
+
+    matrix<rgb_pixel> chip;
+    extract_image_chip(numpy_rgb_image(img), get_face_chip_details(face, size, padding), chip);
+
+    // Size of the numpy array
+    npy_intp dims[3] = { num_rows(chip), num_columns(chip), 3};
+
+    PyObject *arr = PyArray_SimpleNew(3, dims, NPY_UINT8);
+    npy_uint8 *outdata = (npy_uint8 *) PyArray_DATA((PyArrayObject *) arr);
+    memcpy(outdata, image_data(chip), num_rows(chip) * width_step(chip));
+    boost::python::handle<> handle(arr);
+    return object(handle);
+}
+
+BOOST_PYTHON_FUNCTION_OVERLOADS(get_face_chip_with_defaults, get_face_chip, 2, 4)
+BOOST_PYTHON_FUNCTION_OVERLOADS(get_face_chips_with_defaults, get_face_chips, 2, 4)
+
+
+// ----------------------------------------------------------------------------------------
+
+void bind_numpy_returns()
+{
+    using boost::python::arg;
+    import_array();
+
+    def("jitter_image", &get_jitter_images, get_jitter_images_with_defaults(
+    "Takes an image and returns a list of jittered images."
+    "The returned list contains num_jitters images (default is 1)."
+    "If disturb_colors is set to True, the colors of the image are disturbed (default is False)", 
+    (arg("img"), arg("num_jitters"), arg("disturb_colors"))
+    ));
+
+    def("get_face_chip", &get_face_chip, get_face_chip_with_defaults(
+	"Takes an image and a full_object_detection that references a face in that image and returns the face as a Numpy array representing the image.  The face will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.", 
+	(arg("img"), arg("face"), arg("size"), arg("padding"))
+    ));
+
+    def("get_face_chips", &get_face_chips, get_face_chips_with_defaults(
+	"Takes an image and a full_object_detections object that reference faces in that image and returns the faces as a list of Numpy arrays representing the image.  The faces will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.",
+	(arg("img"), arg("faces"), arg("size"), arg("padding"))
+    ));
+}
--- a/tools/python/src/numpy_returns_stub.cpp
+++ b/tools/python/src/numpy_returns_stub.cpp
@ -0,0 +1,67 @@
+#include <dlib/python.h>
+#include <boost/python/args.hpp>
+#include "dlib/pixel.h"
+#include <dlib/image_transforms.h>
+
+using namespace dlib;
+using namespace std;
+using namespace boost::python;
+
+// ----------------------------------------------------------------------------------------
+
+boost::python::list get_jitter_images(object img, size_t num_jitters = 1, bool disturb_colors = false)
+{
+    throw dlib::error("jitter_image is only supported if you compiled dlib with numpy installed!");
+}
+
+BOOST_PYTHON_FUNCTION_OVERLOADS(get_jitter_images_with_defaults, get_jitter_images, 1, 3)
+
+// ----------------------------------------------------------------------------------------
+
+boost::python::list get_face_chips (
+    object img,
+    const std::vector<full_object_detection>& faces,
+    size_t size = 150,
+    float padding = 0.25
+)
+{
+    throw dlib::error("get_face_chips is only supported if you compiled dlib with numpy installed!");
+}
+
+object get_face_chip (
+    object img,
+    const full_object_detection& face,
+    size_t size = 150,
+    float padding = 0.25
+)
+{
+    throw dlib::error("get_face_chip is only supported if you compiled dlib with numpy installed!");
+}
+
+
+BOOST_PYTHON_FUNCTION_OVERLOADS(get_face_chip_with_defaults, get_face_chip, 2, 4)
+BOOST_PYTHON_FUNCTION_OVERLOADS(get_face_chips_with_defaults, get_face_chips, 2, 4)
+
+// ----------------------------------------------------------------------------------------
+
+void bind_numpy_returns()
+{
+    using boost::python::arg;
+
+    def("jitter_image", &get_jitter_images, get_jitter_images_with_defaults(
+    "Takes an image and returns a list of jittered images."
+    "The returned list contains num_jitters images (default is 1)."
+    "If disturb_colors is set to True, the colors of the image are disturbed (default is False)", 
+    (arg("img"), arg("num_jitters"), arg("disturb_colors"))
+    ));
+
+    def("get_face_chip", &get_face_chip, get_face_chip_with_defaults(
+	"Takes an image and a full_object_detection that references a face in that image and returns the face as a Numpy array representing the image.  The face will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.", 
+	(arg("img"), arg("face"), arg("size"), arg("padding"))
+    ));
+
+    def("get_face_chips", &get_face_chips, get_face_chips_with_defaults(
+	"Takes an image and a full_object_detections object that reference faces in that image and returns the faces as a list of Numpy arrays representing the image.  The faces will be rotated upright and scaled to 150x150 pixels or with the optional specified size and padding.",
+	(arg("img"), arg("faces"), arg("size"), arg("padding"))
+    ));
+}