From 1e830a32855a1b7b0e7b0bc132cf8e67b227e0bf Mon Sep 17 00:00:00 2001 From: Branko Kokanovic Date: Tue, 28 Aug 2018 00:37:46 +0200 Subject: [PATCH 1/2] Face recognition This change adds support to retrieve 128D face descriptor for a given landmark. Since now we have full pipeline, README.md has "general usage" section and integration test is added. Also, return from FaceLandmarkDetection is changed, so it can be given to FaceRecognition without changes. All obtained values are crosschecked to match with values from python versions (however, if num_jitters is > 1 in FaceRecognition, values don't match between PHP and Python, I suspect it is related to usage of dlib::rand, but still investigating).. --- README.md | 81 ++++++++-- config.m4 | 3 +- pdlib.cc | 36 +++++ php_pdlib.h | 16 ++ src/cnn_face_detection.cc | 4 +- src/face_landmark_detection.cc | 38 ++--- src/face_recognition.cc | 189 ++++++++++++++++++++++++ src/face_recognition.h | 58 ++++++++ tests/face_recognition_ctor_error.phpt | 15 ++ tests/integration_face_recognition.phpt | 60 ++++++++ tests/lenna.jpg | Bin 0 -> 470746 bytes 11 files changed, 468 insertions(+), 32 deletions(-) create mode 100644 src/face_recognition.cc create mode 100644 src/face_recognition.h create mode 100644 tests/face_recognition_ctor_error.phpt create mode 100644 tests/integration_face_recognition.phpt create mode 100755 tests/lenna.jpg diff --git a/README.md b/README.md index ed42ab8..05a3412 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,15 @@ # PDlib - A PHP extension for Dlib -A PHP extension ## Requirements - Dlib 19.13+ - PHP 7.0+ -- C++ 11 +- C++11 -## Dependence +## Dependencies ### Dlib -Install Dlib as share library +Install Dlib as shared library ```bash git clone git@github.com:davisking/dlib.git @@ -33,23 +32,62 @@ make sudo make install ``` -## Configure +### Configure PHP installation -``` +```bash vim youpath/php.ini ``` -Write the below content into `php.ini` +Append the content below into `php.ini` ``` [pdlib] extension="pdlib.so" ``` +## Tests + +For tests, you will need to have bz2 extension installed. On Ubuntu, it boils to: +```bash +sudo apt-get install php-bz2 +``` + +After you successfully compiled everything, just run: +```bash +make test +``` + ## Usage +### General Usage + +Good starting point can be `tests/integration_face_recognition.phpt`. Check that first. + +Basically, if you just quickly want to get from your image to 128D descriptor of faces in image, +here is really minimal example how: + +```php +detect($img_path); +foreach($detected_faces as $detected_face) { + $fld = new FaceLandmarkDetection("landmark_model.dat"); + $landmarks = $fld->detect($img_path, $detected_face); + $fr = new FaceRecognition("recognition_model.dat"); + $descriptor = $fr->computeDescriptor($img_path, $landmarks); + // Optionally use descriptor later in `dlib_chinese_whispers` function +} +``` + +Location from where to get these models can be found on DLib website, as well as in `tests/integration_face_recognition.phpt` test. + +### Specific use cases #### face detection + +If you want to use HOG based approach: + ```php detect("image.jpg"); +// $detected_face is indexed array, where values are assoc arrays with "top", "bottom", "left" and "right" values +``` + +CNN model can get you slightly better results, but is much, much more demanding (CPU and memory, GPU is also preferred). + #### face landmark detection ```php @@ -68,7 +116,6 @@ var_dump($faceCount); // face landmark detection $landmarks = dlib_face_landmark_detection("~/a.jpg"); var_dump($landmarks); - ``` Additionally, you can also use class-based approach: @@ -83,6 +130,19 @@ $parts = $fld->detect("path/to/image.jpg", $rect); Note that, if you use class-based approach, you need to feed bounding box rectangle with values obtained from `dlib_face_detection`. If you use `dlib_face_landmark_detection`, everything is already done for you (and you are using HOG face detection model). +#### face recognition (aka getting face descriptor) + +```php + $rect_of_faces_obtained_with_CnnFaceDetection, + "parts" => $parts_obtained_with_FaceLandmarkDetection); +$descriptor = $fr->computeDescriptor($img_path, $landmarks); +// $descriptor is 128D array +``` + #### chinese whispers Provides raw access to dlib's `chinese_whispers` function. @@ -98,13 +158,12 @@ Returned value is also numeric array, containing obtained labels. // $labels will look like [0,0,1]. $edges = [[0,0], [0,1], [1,1], [2,2]]; $labels = dlib_chinese_whispers($edges); - ``` ## Features - [x] 1.Face Detection - [x] 2.Face Landmark Detection -- [ ] 3.Deep Face Recognition +- [x] 3.Deep Face Recognition - [x] 4.Deep Learning Face Detection - [x] 5. Raw chinese_whispers diff --git a/config.m4 b/config.m4 index b8143b3..7e990f3 100644 --- a/config.m4 +++ b/config.m4 @@ -28,7 +28,8 @@ if test "$PHP_PDLIB" != "no"; then src/chinese_whispers.cc \ src/face_detection.cc \ src/face_landmark_detection.cc \ - src/cnn_face_detection.cc" + src/face_recognition.cc \ + src/cnn_face_detection.cc " AC_MSG_CHECKING(for pkg-config) if test ! -f "$PKG_CONFIG"; then diff --git a/pdlib.cc b/pdlib.cc index 1fa615e..76bf514 100644 --- a/pdlib.cc +++ b/pdlib.cc @@ -30,6 +30,7 @@ extern "C" { #include "php_pdlib.h" #include "src/chinese_whispers.h" #include "src/face_detection.h" +#include "src/face_recognition.h" #include "src/cnn_face_detection.h" #include "src/face_landmark_detection.h" @@ -46,6 +47,9 @@ static zend_object_handlers cnn_face_detection_obj_handlers; static zend_class_entry *face_landmark_detection_ce = nullptr; static zend_object_handlers face_landmark_detection_obj_handlers; +static zend_class_entry *face_recognition_ce = nullptr; +static zend_object_handlers face_recognition_obj_handlers; + /* {{{ PHP_INI */ /* Remove comments and fill if you need to have entries in php.ini @@ -142,6 +146,29 @@ static void php_face_landmark_detection_free(zend_object *object) zend_object_std_dtor(object); } +const zend_function_entry face_recognition_class_methods[] = { + PHP_ME(FaceRecognition, __construct, face_recognition_ctor_arginfo, ZEND_ACC_PUBLIC) + PHP_ME(FaceRecognition, computeDescriptor, face_recognition_compute_descriptor_arginfo, ZEND_ACC_PUBLIC) + PHP_FE_END +}; + +zend_object* php_face_recognition_new(zend_class_entry *class_type TSRMLS_DC) +{ + face_recognition *fr = (face_recognition*)ecalloc(1, sizeof(face_recognition)); + zend_object_std_init(&fr->std, class_type TSRMLS_CC); + object_properties_init(&fr->std, class_type); + fr->std.handlers = &face_recognition_obj_handlers; + + return &fr->std; +} + +static void php_face_recognition_free(zend_object *object) +{ + face_recognition *fr = (face_recognition*)((char*)object - XtOffsetOf(face_recognition, std)); + delete fr->net; + zend_object_std_dtor(object); +} + /* {{{ PHP_MINIT_FUNCTION */ PHP_MINIT_FUNCTION(pdlib) @@ -165,6 +192,15 @@ PHP_MINIT_FUNCTION(pdlib) face_landmark_detection_obj_handlers.offset = XtOffsetOf(face_landmark_detection, std); face_landmark_detection_obj_handlers.free_obj = php_face_landmark_detection_free; + // FaceRecognition class definition + // + INIT_CLASS_ENTRY(ce, "FaceRecognition", face_recognition_class_methods); + face_recognition_ce = zend_register_internal_class(&ce TSRMLS_CC); + face_recognition_ce->create_object = php_face_recognition_new; + memcpy(&face_recognition_obj_handlers, zend_get_std_object_handlers(), sizeof(zend_object_handlers)); + face_recognition_obj_handlers.offset = XtOffsetOf(face_recognition, std); + face_recognition_obj_handlers.free_obj = php_face_recognition_free; + /* If you have INI entries, uncomment these lines REGISTER_INI_ENTRIES(); */ diff --git a/php_pdlib.h b/php_pdlib.h index 27f5223..ddd3dfb 100644 --- a/php_pdlib.h +++ b/php_pdlib.h @@ -61,6 +61,22 @@ ZEND_END_MODULE_GLOBALS(pdlib) ZEND_TSRMLS_CACHE_EXTERN() #endif +#define PARSE_LONG_FROM_ARRAY(hashtable, key, error_key_missing, error_key_not_long) \ + zval* data##key; \ + /* Tries to find given key in array */ \ + data##key = zend_hash_str_find(hashtable, #key, sizeof(#key)-1); \ + if (data##key == nullptr) { \ + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, #error_key_missing); \ + return; \ + } \ + \ + /* We also need to check proper type of value in associative array */ \ + if (Z_TYPE_P(data##key) != IS_LONG) { \ + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, #error_key_not_long); \ + return; \ + } \ + zend_long key = Z_LVAL_P(data##key); \ + #endif /* PHP_PDLIB_H */ diff --git a/src/cnn_face_detection.cc b/src/cnn_face_detection.cc index c4fad7a..16f9d66 100644 --- a/src/cnn_face_detection.cc +++ b/src/cnn_face_detection.cc @@ -79,13 +79,13 @@ PHP_METHOD(CnnFaceDetection, detect) auto dets = (*pnet)(img); int rect_count = 0; array_init(return_value); - + // Scale the detection locations back to the original image size // if the image was upscaled. // for (auto&& d: dets) { d.rect = pyr.rect_down(d.rect, upsample_num); - // Create new assoc array with dimensions of found rectt and confidence + // Create new assoc array with dimensions of found rect and confidence // zval rect_arr; array_init(&rect_arr); diff --git a/src/face_landmark_detection.cc b/src/face_landmark_detection.cc index 3c205c7..d254c00 100644 --- a/src/face_landmark_detection.cc +++ b/src/face_landmark_detection.cc @@ -102,20 +102,8 @@ PHP_METHOD(FaceLandmarkDetection, __construct) // Helper macro to automatically have parsing of "top"/"bottom"/"left"/"right" #define PARSE_BOUNDING_BOX_EDGE(side) \ - zval* data##side; \ - /* Tries to find given key in array */ \ - data##side = zend_hash_str_find(bounding_box_hash, #side, sizeof(#side)-1); \ - if (data##side == nullptr) { \ - zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Bounding box (second argument) is missing " #side "key"); \ - return; \ - } \ - \ - /* We also need to check proper type of value in associative array */ \ - if (Z_TYPE_P(data##side) != IS_LONG) { \ - zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Value of bounding box's (second argument) " #side " key is not long type"); \ - return; \ - } \ - zend_long side = Z_LVAL_P(data##side); \ + PARSE_LONG_FROM_ARRAY(bounding_box_hash, side, \ + "Bounding box (second argument) is missing " #side "key", "Value of bounding box's (second argument) " #side " key is not long type") PHP_METHOD(FaceLandmarkDetection, detect) { @@ -127,15 +115,15 @@ PHP_METHOD(FaceLandmarkDetection, detect) // Parse path to image and bounding box. Bounding box is associative array of 4 elements - "top", "bottom", "left" and "right". // if (zend_parse_parameters(ZEND_NUM_ARGS(), "sa", &img_path, &img_path_len, &bounding_box) == FAILURE){ - zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Unable to parse detect arguments"); + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Unable to parse detect arguments."); return; } // Check that bounding box have exactly 4 elements HashTable *bounding_box_hash = Z_ARRVAL_P(bounding_box); uint32_t bounding_box_num_elements = zend_hash_num_elements(bounding_box_hash); - if (bounding_box_num_elements != 4) { - zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Bounding box (second argument) needs to have exactly 4 elements"); + if (bounding_box_num_elements < 4) { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Bounding box (second argument) needs to have at least 4 elements"); return; } @@ -158,14 +146,28 @@ PHP_METHOD(FaceLandmarkDetection, detect) // Each key is one part from shape. Value of each part is associative array of keys "x" and "y". // array_init(return_value); + + zval rect_arr, parts_arr; + array_init(&rect_arr); + array_init(&parts_arr); + for (int i = 0; i < shape.num_parts(); i++) { zval part; array_init(&part); dlib::point p = shape.part(i); add_assoc_long(&part, "x", p.x()); add_assoc_long(&part, "y", p.y()); - add_next_index_zval(return_value, &part); + add_next_index_zval(&parts_arr, &part); } + + const rectangle& r = shape.get_rect(); + add_assoc_long(&rect_arr, "left", r.left()); + add_assoc_long(&rect_arr, "top", r.top()); + add_assoc_long(&rect_arr, "right", r.right()); + add_assoc_long(&rect_arr, "bottom", r.bottom()); + + add_assoc_zval(return_value, "rect", &rect_arr); + add_assoc_zval(return_value, "parts", &parts_arr); } catch (exception& e) { zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, e.what()); return; diff --git a/src/face_recognition.cc b/src/face_recognition.cc new file mode 100644 index 0000000..99a3d97 --- /dev/null +++ b/src/face_recognition.cc @@ -0,0 +1,189 @@ +#include "../php_pdlib.h" +#include "face_recognition.h" + +#include + +#include + +using namespace std; +using namespace dlib; + +static inline face_recognition *php_face_recognition_from_obj(zend_object *obj) { + return (face_recognition*)((char*)(obj) - XtOffsetOf(face_recognition, std)); +} + +#define Z_FACE_RECOGNITION_P(zv) php_face_recognition_from_obj(Z_OBJ_P((zv))) + +PHP_METHOD(FaceRecognition, __construct) +{ + char *sz_face_recognition_model_path; + size_t face_recognition_model_path_len; + + face_recognition *fr = Z_FACE_RECOGNITION_P(getThis()); + + if (NULL == fr) { + php_error_docref(NULL TSRMLS_CC, E_ERROR, "Unable to find obj in FaceRecognition::__construct()"); + return; + } + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", + &sz_face_recognition_model_path, &face_recognition_model_path_len) == FAILURE){ + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Unable to parse face_recognition_model_path"); + return; + } + + try { + string face_recognition_model_path(sz_face_recognition_model_path, face_recognition_model_path_len); + fr->net = new anet_type; + deserialize(face_recognition_model_path) >> *(fr->net); + } catch (exception& e) { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, e.what()); + return; + } +} + +std::vector> pdlib_jitter_image( + const matrix& img, + const int num_jitters, + dlib::rand& rnd) { + std::vector> crops; + for (int i = 0; i < num_jitters; ++i) + crops.push_back(dlib::jitter_image(img,rnd)); + return crops; +} + + +// Helper macro to automatically have parsing of "top"/"bottom"/"left"/"right" +// +#define PARSE_BOUNDING_BOX_EDGE(side) \ + PARSE_LONG_FROM_ARRAY(rect_hash, side, \ + "Shape's rect array is missing " #side "key", "Shape's rect array's " #side " key is not long type") + +// Helper macro to parse "x"/"y" +// +#define PARSE_POINT(coord) \ + PARSE_LONG_FROM_ARRAY(part_hash, coord, \ + #coord " coordinate key is missing in parts array", #coord " coordinate key is not of long type") + + +PHP_METHOD(FaceRecognition, computeDescriptor) +{ + char *img_path; + size_t img_path_len; + zval *shape; + long num_jitters = 1; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "sa|l", &img_path, &img_path_len, &shape, &num_jitters) == FAILURE){ + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Unable to parse computeDescriptor arguments"); + return; + } + + HashTable *shape_hash = Z_ARRVAL_P(shape); + uint32_t shape_hash_num_elements = zend_hash_num_elements(shape_hash); + if (shape_hash_num_elements != 2) { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Shape (second argument) needs to have exactly 2 elements - keys \"rect\" and \"parts\""); + return; + } + + zval *rect_zval = zend_hash_str_find(shape_hash, "rect", sizeof("rect")-1); + if (rect_zval == nullptr) { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Shape (second argument) array needs to have \"rect\" key"); \ + return; + } + if (Z_TYPE_P(rect_zval) != IS_ARRAY) { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Value of shape's key \"rect\" must be array"); + return; + } + HashTable *rect_hash = Z_ARRVAL_P(rect_zval); + PARSE_BOUNDING_BOX_EDGE(top) + PARSE_BOUNDING_BOX_EDGE(bottom) + PARSE_BOUNDING_BOX_EDGE(left) + PARSE_BOUNDING_BOX_EDGE(right) + rectangle rect(left, top, right, bottom); + + + zval *parts_zval = zend_hash_str_find(shape_hash, "parts", sizeof("parts")-1); + if (parts_zval == nullptr) { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Shape (second argument) array needs to have \"parts\" key"); \ + return; + } + if (Z_TYPE_P(parts_zval) != IS_ARRAY) { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Value of shape's key \"parts\" must be array"); + return; + } + HashTable *parts_hash = Z_ARRVAL_P(parts_zval); + HashPosition parts_pos; + uint32_t parts_count = zend_hash_num_elements(parts_hash); + point parts_points[parts_count]; + + if ((parts_count != 5) && (parts_count != 68)) { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, + "The full_object_detection must use the iBUG 300W 68 point face landmark style or dlib's 5 point style"); + return; + } + + for (zend_hash_internal_pointer_reset_ex(parts_hash, &parts_pos); + zend_hash_has_more_elements_ex(parts_hash, &parts_pos) == SUCCESS; + zend_hash_move_forward_ex(parts_hash, &parts_pos) + ) { + zend_string* str_index = {0}; + zend_ulong num_index; + zval *part_zval = zend_hash_get_current_data_ex(parts_hash, &parts_pos); + switch (zend_hash_get_current_key_ex(parts_hash, &str_index, &num_index, &parts_pos)) { + case HASH_KEY_IS_LONG: + if (Z_TYPE_P(part_zval) == IS_ARRAY) + { + HashTable *part_hash = Z_ARRVAL_P(part_zval); + PARSE_POINT(x) + PARSE_POINT(y) + if (num_index > parts_count) { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Internal error, bad parsing of parts array"); + return; + } + parts_points[num_index] = point(x, y); + } else { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Values from parts array must be arrays with \"x\" and \"y\" keys"); + return; + } + break; + case HASH_KEY_IS_STRING: + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Parts array must be indexed and it contains string keys"); + return; + break; + } + } + + std::vector parts; + for (unsigned int i = 0; i < parts_count; i++) { + parts.push_back(parts_points[i]); + } + + try { + face_recognition *fr = Z_FACE_RECOGNITION_P(getThis()); + full_object_detection fod(rect, parts); + matrix img; + load_image(img, img_path); + + std::vector dets; + dets.push_back(get_face_chip_details(fod, 150, 0.25)); + dlib::array> face_chips; + extract_image_chips(img, dets, face_chips); + + array_init(return_value); + matrix face_descriptor; + if (num_jitters <= 1) { + std::vector> face_descriptors = fr->net->operator()(face_chips, 16); + face_descriptor = face_descriptors[0]; + } else { + matrix& face_chip = face_chips[0]; + face_descriptor = mean(mat(fr->net->operator()(pdlib_jitter_image(face_chip, num_jitters, fr->rnd), 16))); + } + + for (auto& d : face_descriptor) { + add_next_index_double(return_value, d); + } + } catch (exception& e) { + zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, e.what()); + return; + } +} diff --git a/src/face_recognition.h b/src/face_recognition.h new file mode 100644 index 0000000..0939d41 --- /dev/null +++ b/src/face_recognition.h @@ -0,0 +1,58 @@ +// +// Created by branko at kokanovic dot org on 2018/8/26. +// + +#ifndef PHP_DLIB_FACE_RECOGNITION_H +#define PHP_DLIB_FACE_RECOGNITION_H + +#include + +using namespace dlib; + +template