commit
54f3b75139
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 pdlib contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1,189 @@
|
||||
#include "../php_pdlib.h"
|
||||
#include "face_recognition.h"
|
||||
|
||||
#include <zend_exceptions.h>
|
||||
|
||||
#include <dlib/image_io.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace dlib;
|
||||
|
||||
static inline face_recognition *php_face_recognition_from_obj(zend_object *obj) {
|
||||
return (face_recognition*)((char*)(obj) - XtOffsetOf(face_recognition, std));
|
||||
}
|
||||
|
||||
#define Z_FACE_RECOGNITION_P(zv) php_face_recognition_from_obj(Z_OBJ_P((zv)))
|
||||
|
||||
PHP_METHOD(FaceRecognition, __construct)
|
||||
{
|
||||
char *sz_face_recognition_model_path;
|
||||
size_t face_recognition_model_path_len;
|
||||
|
||||
face_recognition *fr = Z_FACE_RECOGNITION_P(getThis());
|
||||
|
||||
if (NULL == fr) {
|
||||
php_error_docref(NULL TSRMLS_CC, E_ERROR, "Unable to find obj in FaceRecognition::__construct()");
|
||||
return;
|
||||
}
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s",
|
||||
&sz_face_recognition_model_path, &face_recognition_model_path_len) == FAILURE){
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Unable to parse face_recognition_model_path");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
string face_recognition_model_path(sz_face_recognition_model_path, face_recognition_model_path_len);
|
||||
fr->net = new anet_type;
|
||||
deserialize(face_recognition_model_path) >> *(fr->net);
|
||||
} catch (exception& e) {
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, e.what());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<matrix<rgb_pixel>> pdlib_jitter_image(
|
||||
const matrix<rgb_pixel>& img,
|
||||
const int num_jitters,
|
||||
dlib::rand& rnd) {
|
||||
std::vector<matrix<rgb_pixel>> crops;
|
||||
for (int i = 0; i < num_jitters; ++i)
|
||||
crops.push_back(dlib::jitter_image(img,rnd));
|
||||
return crops;
|
||||
}
|
||||
|
||||
|
||||
// Helper macro to automatically have parsing of "top"/"bottom"/"left"/"right"
|
||||
//
|
||||
#define PARSE_BOUNDING_BOX_EDGE(side) \
|
||||
PARSE_LONG_FROM_ARRAY(rect_hash, side, \
|
||||
"Shape's rect array is missing " #side "key", "Shape's rect array's " #side " key is not long type")
|
||||
|
||||
// Helper macro to parse "x"/"y"
|
||||
//
|
||||
#define PARSE_POINT(coord) \
|
||||
PARSE_LONG_FROM_ARRAY(part_hash, coord, \
|
||||
#coord " coordinate key is missing in parts array", #coord " coordinate key is not of long type")
|
||||
|
||||
|
||||
PHP_METHOD(FaceRecognition, computeDescriptor)
|
||||
{
|
||||
char *img_path;
|
||||
size_t img_path_len;
|
||||
zval *shape;
|
||||
long num_jitters = 1;
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sa|l", &img_path, &img_path_len, &shape, &num_jitters) == FAILURE){
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Unable to parse computeDescriptor arguments");
|
||||
return;
|
||||
}
|
||||
|
||||
HashTable *shape_hash = Z_ARRVAL_P(shape);
|
||||
uint32_t shape_hash_num_elements = zend_hash_num_elements(shape_hash);
|
||||
if (shape_hash_num_elements != 2) {
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Shape (second argument) needs to have exactly 2 elements - keys \"rect\" and \"parts\"");
|
||||
return;
|
||||
}
|
||||
|
||||
zval *rect_zval = zend_hash_str_find(shape_hash, "rect", sizeof("rect")-1);
|
||||
if (rect_zval == nullptr) {
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Shape (second argument) array needs to have \"rect\" key"); \
|
||||
return;
|
||||
}
|
||||
if (Z_TYPE_P(rect_zval) != IS_ARRAY) {
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Value of shape's key \"rect\" must be array");
|
||||
return;
|
||||
}
|
||||
HashTable *rect_hash = Z_ARRVAL_P(rect_zval);
|
||||
PARSE_BOUNDING_BOX_EDGE(top)
|
||||
PARSE_BOUNDING_BOX_EDGE(bottom)
|
||||
PARSE_BOUNDING_BOX_EDGE(left)
|
||||
PARSE_BOUNDING_BOX_EDGE(right)
|
||||
rectangle rect(left, top, right, bottom);
|
||||
|
||||
|
||||
zval *parts_zval = zend_hash_str_find(shape_hash, "parts", sizeof("parts")-1);
|
||||
if (parts_zval == nullptr) {
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Shape (second argument) array needs to have \"parts\" key"); \
|
||||
return;
|
||||
}
|
||||
if (Z_TYPE_P(parts_zval) != IS_ARRAY) {
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Value of shape's key \"parts\" must be array");
|
||||
return;
|
||||
}
|
||||
HashTable *parts_hash = Z_ARRVAL_P(parts_zval);
|
||||
HashPosition parts_pos;
|
||||
uint32_t parts_count = zend_hash_num_elements(parts_hash);
|
||||
point parts_points[parts_count];
|
||||
|
||||
if ((parts_count != 5) && (parts_count != 68)) {
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC,
|
||||
"The full_object_detection must use the iBUG 300W 68 point face landmark style or dlib's 5 point style");
|
||||
return;
|
||||
}
|
||||
|
||||
for (zend_hash_internal_pointer_reset_ex(parts_hash, &parts_pos);
|
||||
zend_hash_has_more_elements_ex(parts_hash, &parts_pos) == SUCCESS;
|
||||
zend_hash_move_forward_ex(parts_hash, &parts_pos)
|
||||
) {
|
||||
zend_string* str_index = {0};
|
||||
zend_ulong num_index;
|
||||
zval *part_zval = zend_hash_get_current_data_ex(parts_hash, &parts_pos);
|
||||
switch (zend_hash_get_current_key_ex(parts_hash, &str_index, &num_index, &parts_pos)) {
|
||||
case HASH_KEY_IS_LONG:
|
||||
if (Z_TYPE_P(part_zval) == IS_ARRAY)
|
||||
{
|
||||
HashTable *part_hash = Z_ARRVAL_P(part_zval);
|
||||
PARSE_POINT(x)
|
||||
PARSE_POINT(y)
|
||||
if (num_index > parts_count) {
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Internal error, bad parsing of parts array");
|
||||
return;
|
||||
}
|
||||
parts_points[num_index] = point(x, y);
|
||||
} else {
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Values from parts array must be arrays with \"x\" and \"y\" keys");
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case HASH_KEY_IS_STRING:
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, "Parts array must be indexed and it contains string keys");
|
||||
return;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<point> parts;
|
||||
for (unsigned int i = 0; i < parts_count; i++) {
|
||||
parts.push_back(parts_points[i]);
|
||||
}
|
||||
|
||||
try {
|
||||
face_recognition *fr = Z_FACE_RECOGNITION_P(getThis());
|
||||
full_object_detection fod(rect, parts);
|
||||
matrix<rgb_pixel> img;
|
||||
load_image(img, img_path);
|
||||
|
||||
std::vector<chip_details> dets;
|
||||
dets.push_back(get_face_chip_details(fod, 150, 0.25));
|
||||
dlib::array<matrix<rgb_pixel>> face_chips;
|
||||
extract_image_chips(img, dets, face_chips);
|
||||
|
||||
array_init(return_value);
|
||||
matrix<float,0,1> face_descriptor;
|
||||
if (num_jitters <= 1) {
|
||||
std::vector<matrix<float,0,1>> face_descriptors = fr->net->operator()(face_chips, 16);
|
||||
face_descriptor = face_descriptors[0];
|
||||
} else {
|
||||
matrix<rgb_pixel>& face_chip = face_chips[0];
|
||||
face_descriptor = mean(mat(fr->net->operator()(pdlib_jitter_image(face_chip, num_jitters, fr->rnd), 16)));
|
||||
}
|
||||
|
||||
for (auto& d : face_descriptor) {
|
||||
add_next_index_double(return_value, d);
|
||||
}
|
||||
} catch (exception& e) {
|
||||
zend_throw_exception_ex(zend_ce_exception, 0 TSRMLS_CC, e.what());
|
||||
return;
|
||||
}
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
//
|
||||
// Created by branko at kokanovic dot org on 2018/8/26.
|
||||
//
|
||||
|
||||
#ifndef PHP_DLIB_FACE_RECOGNITION_H
|
||||
#define PHP_DLIB_FACE_RECOGNITION_H
|
||||
|
||||
#include <dlib/dnn.h>
|
||||
|
||||
using namespace dlib;
|
||||
|
||||
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
|
||||
using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
|
||||
|
||||
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
|
||||
using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>;
|
||||
|
||||
template <int N, template <typename> class BN, int stride, typename SUBNET>
|
||||
using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;
|
||||
|
||||
template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>;
|
||||
template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;
|
||||
|
||||
template <typename SUBNET> using alevel0 = ares_down<256,SUBNET>;
|
||||
template <typename SUBNET> using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>;
|
||||
template <typename SUBNET> using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>;
|
||||
template <typename SUBNET> using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>;
|
||||
template <typename SUBNET> using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>;
|
||||
|
||||
using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything<
|
||||
alevel0<
|
||||
alevel1<
|
||||
alevel2<
|
||||
alevel3<
|
||||
alevel4<
|
||||
max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2,
|
||||
input_rgb_image_sized<150>
|
||||
>>>>>>>>>>>>;
|
||||
|
||||
typedef struct _face_recognition {
|
||||
anet_type *net;
|
||||
zend_object std;
|
||||
dlib::rand rnd;
|
||||
} face_recognition;
|
||||
|
||||
ZEND_BEGIN_ARG_INFO_EX(face_recognition_ctor_arginfo, 0, 0, 1)
|
||||
ZEND_ARG_INFO(0, face_recognition_model_path)
|
||||
ZEND_END_ARG_INFO()
|
||||
PHP_METHOD(FaceRecognition, __construct);
|
||||
|
||||
ZEND_BEGIN_ARG_INFO_EX(face_recognition_compute_descriptor_arginfo, 0, 0, 3)
|
||||
ZEND_ARG_INFO(0, img_path)
|
||||
ZEND_ARG_INFO(0, landmarks)
|
||||
ZEND_ARG_INFO(0, num_jitters)
|
||||
ZEND_END_ARG_INFO()
|
||||
PHP_METHOD(FaceRecognition, computeDescriptor);
|
||||
|
||||
#endif //PHP_DLIB_FACE_RECOGNITION_H
|
@ -0,0 +1,15 @@
|
||||
--TEST--
|
||||
Testing FaceRecognition constructor without arguments
|
||||
--SKIPIF--
|
||||
<?php if (!extension_loaded("pdlib")) print "skip"; ?>
|
||||
--FILE--
|
||||
<?php
|
||||
try {
|
||||
new FaceRecognition();
|
||||
} catch (Exception $e) {
|
||||
var_dump($e->getMessage());
|
||||
}
|
||||
?>
|
||||
--EXPECT--
|
||||
Warning: FaceRecognition::__construct() expects exactly 1 parameter, 0 given in /home/branko/pdlib/tests/face_recognition_ctor_error.php on line 3
|
||||
string(43) "Unable to parse face_recognition_model_path"
|
@ -0,0 +1,60 @@
|
||||
--TEST--
|
||||
Full test for face recognition - download models, detect faces, landmark detection and face recognition.
|
||||
--SKIPIF--
|
||||
<?php if (!extension_loaded("pdlib") || (function_exists("bzopen"))) print "skip"; ?>
|
||||
--FILE--
|
||||
<?php
|
||||
$models = array(
|
||||
"detection" => array("uri"=>"http://dlib.net/files/mmod_human_face_detector.dat.bz2"),
|
||||
"prediction" => array("uri"=>"http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2"),
|
||||
"recognition" => array("uri"=>"http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2")
|
||||
);
|
||||
|
||||
// Check if there are models in local tmp. Download them if not (lazy caching).
|
||||
//
|
||||
foreach ($models as $modelName => $modelBag) {
|
||||
printf("Processing %s model\n", $modelName);
|
||||
$bz2_filename = array_values(array_slice(explode("/", $modelBag["uri"]), -1))[0];
|
||||
$temp_bz2_file = sys_get_temp_dir() . "/" . $bz2_filename;
|
||||
$dat_filename = array_values(array_slice(explode(".", $bz2_filename), 0))[0] . ".dat";
|
||||
$temp_dat_file = sys_get_temp_dir() . "/" . $dat_filename;
|
||||
$models[$modelName]["local_path"] = $temp_dat_file;
|
||||
|
||||
if (file_exists($temp_dat_file)) {
|
||||
continue;
|
||||
}
|
||||
file_put_contents($temp_bz2_file, fopen($modelBag["uri"], 'r'));
|
||||
$bz = bzopen($temp_bz2_file, "r");
|
||||
$decompressed_file = "";
|
||||
while (!feof($bz)) {
|
||||
$decompressed_file .= bzread($bz, 4096);
|
||||
}
|
||||
bzclose($bz);
|
||||
|
||||
file_put_contents($temp_dat_file, $decompressed_file);
|
||||
}
|
||||
|
||||
printf("Detection\n");
|
||||
$fd = new CnnFaceDetection($models["detection"]["local_path"]);
|
||||
$detected_faces = $fd->detect(__DIR__ . "/lenna.jpg");
|
||||
printf("Faces found = %d\n", count($detected_faces));
|
||||
foreach($detected_faces as $index => $detected_face) {
|
||||
printf("Face[%d] in bounding box (left=%d, top=%d, right=%d, bottom=%d)\n", $index,
|
||||
$detected_face["left"], $detected_face["top"], $detected_face["right"], $detected_face["bottom"]);
|
||||
$fld = new FaceLandmarkDetection($models["prediction"]["local_path"]);
|
||||
$landmarks = $fld->detect(__DIR__ . "/lenna.jpg", $detected_face);
|
||||
printf("Since we used model with 5 shape predictions, we found %d landmark parts\n", count($landmarks["parts"]));
|
||||
$fr = new FaceRecognition($models["recognition"]["local_path"]);
|
||||
$descriptor = $fr->computeDescriptor(__DIR__ . "/lenna.jpg", $landmarks);
|
||||
printf("Descriptor is vector of %d dimensions\n", count($descriptor));
|
||||
}
|
||||
?>
|
||||
--EXPECT--
|
||||
Processing detection model
|
||||
Processing prediction model
|
||||
Processing recognition model
|
||||
Detection
|
||||
Faces found = 1
|
||||
Face[0] in bounding box (left=187, top=186, right=357, bottom=355)
|
||||
Since we used model with 5 shape predictions, we found 5 landmark parts
|
||||
Descriptor is vector of 128 dimensions
|
After Width: | Height: | Size: 460 KiB |
Loading…
Reference in new issue