Re-add the cached object detector

A little bit hacky, but should be fine. Supports both fhog
detectors and the "cached" simple_object_detector. Also, maintains
the upscale parameter for testing
This commit is contained in:
Patrick Snape 2014-12-12 16:22:57 +00:00
parent fbe597be03
commit 37af35b55e
7 changed files with 264 additions and 232 deletions

View File

@ -73,14 +73,12 @@ dlib.train_simple_object_detector(training_xml_path, "detector.svm", options)
# average precision.
print("") # Print blank line to create gap from previous output
print("Training accuracy: {}".format(
dlib.test_simple_object_detector(training_xml_path, "detector.svm",
upsample_amount=1)))
dlib.test_simple_object_detector(training_xml_path, "detector.svm")))
# However, to get an idea if it really worked without overfitting we need to
# run it on images it wasn't trained on. The next line does this. Happily, we
# see that the object detector works perfectly on the testing images.
print("Testing accuracy: {}".format(
dlib.test_simple_object_detector(testing_xml_path, "detector.svm",
upsample_amount=1)))
dlib.test_simple_object_detector(testing_xml_path, "detector.svm")))
# Now let's use the detector as you would in a normal application. First we
# will load it from disk.

View File

@ -6,6 +6,7 @@
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/image_processing/render_face_detections.h>
#include <dlib/gui_widgets.h>
#include "simple_object_detector_py.h"
using namespace dlib;
using namespace std;
@ -13,18 +14,7 @@ using namespace boost::python;
// ----------------------------------------------------------------------------------------
// Forward declaration of the simple_object_detector
typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > simple_object_detector;
void image_window_set_image_fhog_detector (
image_window& win,
const frontal_face_detector& det
)
{
win.set_image(draw_fhog(det));
}
void image_window_set_image_simple_detector (
image_window& win,
const simple_object_detector& det
)
@ -32,6 +22,14 @@ void image_window_set_image_simple_detector (
win.set_image(draw_fhog(det));
}
void image_window_set_image_simple_detector_py (
image_window& win,
const simple_object_detector_py& det
)
{
win.set_image(draw_fhog(det.detector));
}
// ----------------------------------------------------------------------------------------
void image_window_set_image (
@ -103,7 +101,7 @@ void bind_gui()
"Make the image_window display the given image.")
.def("set_image", image_window_set_image_fhog_detector, arg("detector"),
"Make the image_window display the given HOG detector's filters.")
.def("set_image", image_window_set_image_simple_detector, arg("detector"),
.def("set_image", image_window_set_image_simple_detector_py, arg("detector"),
"Make the image_window display the given HOG detector's filters.")
.def("set_title", (set_title_funct)&type::set_title, arg("title"),
"Set the title of the window to the given value.")

View File

@ -8,6 +8,7 @@
#include <dlib/image_processing/frontal_face_detector.h>
#include "indexing.h"
#include "simple_object_detector.h"
#include "simple_object_detector_py.h"
#include "conversion.h"
using namespace dlib;
@ -48,77 +49,7 @@ string print_rectangle_repr(const rectangle& r)
// ----------------------------------------------------------------------------------------
std::vector<rectangle> run_detector_with_upscale (
simple_object_detector& detector,
object img,
const unsigned int upsampling_amount
)
{
pyramid_down<2> pyr;
if (is_gray_python_image(img))
{
array2d<unsigned char> temp;
if (upsampling_amount == 0)
{
return detector(numpy_gray_image(img));
}
else
{
pyramid_up(numpy_gray_image(img), temp, pyr);
unsigned int levels = upsampling_amount-1;
while (levels > 0)
{
levels--;
pyramid_up(temp);
}
std::vector<rectangle> res = detector(temp);
for (unsigned long i = 0; i < res.size(); ++i)
res[i] = pyr.rect_down(res[i], upsampling_amount);
return res;
}
}
else if (is_rgb_python_image(img))
{
array2d<rgb_pixel> temp;
if (upsampling_amount == 0)
{
return detector(numpy_rgb_image(img));
}
else
{
pyramid_up(numpy_rgb_image(img), temp, pyr);
unsigned int levels = upsampling_amount-1;
while (levels > 0)
{
levels--;
pyramid_up(temp);
}
std::vector<rectangle> res = detector(temp);
for (unsigned long i = 0; i < res.size(); ++i)
res[i] = pyr.rect_down(res[i], upsampling_amount);
return res;
}
}
else
{
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
}
}
void save_simple_object_detector(const simple_object_detector& detector, const std::string& detector_output_filename)
{
std::ofstream fout(detector_output_filename.c_str(), std::ios::binary);
int version = 1;
serialize(detector, fout);
serialize(version, fout);
}
// ----------------------------------------------------------------------------------------
inline simple_object_detector train_simple_object_detector_on_images_py (
inline simple_object_detector_py train_simple_object_detector_on_images_py (
const boost::python::list& pyimages,
const boost::python::list& pyboxes,
const simple_object_detector_training_options& options
@ -140,7 +71,7 @@ inline simple_test_results test_simple_object_detector_with_images_py (
const boost::python::list& pyimages,
const boost::python::list& pyboxes,
simple_object_detector& detector,
const unsigned int unsample_amount
const unsigned int upsampling_amount
)
{
const unsigned long num_images = len(pyimages);
@ -152,7 +83,27 @@ inline simple_test_results test_simple_object_detector_with_images_py (
dlib::array<array2d<rgb_pixel> > images(num_images);
images_and_nested_params_to_dlib(pyimages, pyboxes, images, boxes);
return test_simple_object_detector_with_images(images, unsample_amount, boxes, ignore, detector);
return test_simple_object_detector_with_images(images, upsampling_amount, boxes, ignore, detector);
}
// ----------------------------------------------------------------------------------------
inline simple_test_results test_simple_object_detector_py_with_images_py (
const boost::python::list& pyimages,
const boost::python::list& pyboxes,
simple_object_detector_py& detector,
const int upsampling_amount
)
{
// Allow users to pass an upsampling amount ELSE use the one cached on the object
// Anything less than 0 is ignored and the cached value is used.
unsigned int final_upsampling_amount = 0;
if (upsampling_amount >= 0)
final_upsampling_amount = upsampling_amount;
else
final_upsampling_amount = detector.upsampling_amount;
return test_simple_object_detector_with_images_py(pyimages, pyboxes, detector.detector, final_upsampling_amount);
}
// ----------------------------------------------------------------------------------------
@ -246,9 +197,10 @@ ensures \n\
- The trained object detector is returned.");
def("test_simple_object_detector", test_simple_object_detector,
(arg("dataset_filename"), arg("detector_filename"), arg("upsample_amount")=0),
// Please see test_simple_object_detector for the reason upsampling_amount is -1
(arg("dataset_filename"), arg("detector_filename"), arg("upsampling_amount")=-1),
"requires \n\
- Optionally, take the number of times to upsample the testing images. \n\
- Optionally, take the number of times to upsample the testing images (upsampling_amount >= 0). \n\
ensures \n\
- Loads an image dataset from dataset_filename. We assume dataset_filename is \n\
a file using the XML format written by save_image_dataset_metadata(). \n\
@ -264,12 +216,31 @@ ensures \n\
);
def("test_simple_object_detector", test_simple_object_detector_with_images_py,
(arg("images"), arg("boxes"), arg("detector"), arg("upsample_amount")=0),
(arg("images"), arg("boxes"), arg("detector"), arg("upsampling_amount")=0),
"requires \n\
- len(images) == len(boxes) \n\
- images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\
- boxes should be a list of lists of dlib.rectangle object. \n\
- Optionally, take the number of times to upsample the testing images (upsampling_amount >= 0). \n\
ensures \n\
- Loads a simple_object_detector from the file detector_filename. This means \n\
detector_filename should be a file produced by the train_simple_object_detector() \n\
routine. \n\
- This function tests the detector against the dataset and returns the \n\
precision, recall, and average precision of the detector. In fact, The \n\
return value of this function is identical to that of dlib's \n\
test_object_detection_function() routine. Therefore, see the documentation \n\
for test_object_detection_function() for a detailed definition of these \n\
metrics. "
);
def("test_simple_object_detector", test_simple_object_detector_py_with_images_py,
// Please see test_simple_object_detector_py_with_images_py for the reason upsampling_amount is -1
(arg("images"), arg("boxes"), arg("detector"), arg("upsampling_amount")=-1),
"requires \n\
- len(images) == len(boxes) \n\
- images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\
- boxes should be a list of lists of dlib.rectangle object. \n\
- Optionally, take the number of times to upsample the testing images. \n\
ensures \n\
- Loads a simple_object_detector from the file detector_filename. This means \n\
detector_filename should be a file produced by the train_simple_object_detector() \n\
@ -283,7 +254,7 @@ ensures \n\
);
{
typedef simple_object_detector type;
class_<type>("simple_object_detector",
class_<type>("fhog_object_detector",
"This object represents a sliding window histogram-of-oriented-gradients based object detector.")
.def("__init__", make_constructor(&load_object_from_file<type>),
"Loads a simple_object_detector from a file that contains the output of the \n\
@ -304,6 +275,35 @@ ensures \n\
.def_pickle(serialize_pickle<type>());
}
{
typedef simple_object_detector_py type;
class_<type>("simple_object_detector",
"This object represents a sliding window histogram-of-oriented-gradients based object detector.")
.def("__init__", make_constructor(&load_object_from_file<type>),
"Loads a simple_object_detector from a file that contains the output of the \n\
train_simple_object_detector() routine.")
.def("__call__", &type::run_detector1, (arg("image"), arg("upsample_num_times")),
"requires \n\
- image is a numpy ndarray containing either an 8bit grayscale or RGB \n\
image. \n\
- upsample_num_times >= 0 \n\
ensures \n\
- This function runs the object detector on the input image and returns \n\
a list of detections. \n\
- Upsamples the image upsample_num_times before running the basic \n\
detector. If you don't know how many times you want to upsample then \n\
don't provide a value for upsample_num_times and an appropriate \n\
default will be used.")
.def("__call__", &type::run_detector2, (arg("image")),
"requires \n\
- image is a numpy ndarray containing either an 8bit grayscale or RGB \n\
image. \n\
ensures \n\
- This function runs the object detector on the input image and returns \n\
a list of detections.")
.def("save", save_simple_object_detector_py, (arg("detector_output_filename")), "Save a simple_object_detector to the provided path.")
.def_pickle(serialize_pickle<type>());
}
{
typedef std::vector<rectangle> type;
class_<type>("rectangles", "An array of rectangle objects.")
.def(vector_indexing_suite<type>())

View File

@ -0,0 +1,48 @@
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SERIALIZE_OBJECT_DETECTOR_H__
#define DLIB_SERIALIZE_OBJECT_DETECTOR_H__
#include "simple_object_detector_py.h"
namespace dlib
{
inline void serialize (const dlib::simple_object_detector_py& item, std::ostream& out)
{
int version = 1;
serialize(item.detector, out);
serialize(version, out);
serialize(item.upsampling_amount, out);
}
inline void deserialize (dlib::simple_object_detector_py& item, std::istream& in)
{
int version = 0;
deserialize(item.detector, in);
deserialize(version, in);
if (version != 1)
throw dlib::serialization_error("Unexpected version found while deserializing a simple_object_detector.");
deserialize(item.upsampling_amount, in);
}
inline void save_simple_object_detector_py(const simple_object_detector_py& detector, const std::string& detector_output_filename)
{
std::ofstream fout(detector_output_filename.c_str(), std::ios::binary);
int version = 1;
serialize(detector.detector, fout);
serialize(version, fout);
serialize(detector.upsampling_amount, fout);
}
// ----------------------------------------------------------------------------------------
inline void save_simple_object_detector(const simple_object_detector& detector, const std::string& detector_output_filename)
{
std::ofstream fout(detector_output_filename.c_str(), std::ios::binary);
int version = 1;
serialize(detector, fout);
serialize(version, fout);
}
}
#endif // DLIB_SERIALIZE_OBJECT_DETECTOR_H__

View File

@ -3,7 +3,6 @@
#ifndef DLIB_SIMPLE_ObJECT_DETECTOR_H__
#define DLIB_SIMPLE_ObJECT_DETECTOR_H__
#include "simple_object_detector_abstract.h"
#include "dlib/image_processing/object_detector.h"
#include "dlib/string.h"
#include "dlib/image_processing/scan_fhog_pyramid.h"
@ -11,6 +10,7 @@
#include "dlib/geometry.h"
#include "dlib/data_io/load_image_dataset.h"
#include "dlib/image_processing/remove_unobtainable_rectangles.h"
#include "serialize_object_detector.h"
namespace dlib
@ -127,7 +127,7 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template <typename image_array>
inline simple_object_detector train_simple_object_detector_on_images (
inline simple_object_detector_py train_simple_object_detector_on_images (
const std::string& dataset_filename, // can be "" if it's not applicable
image_array& images,
std::vector<std::vector<rectangle> >& boxes,
@ -168,15 +168,15 @@ namespace dlib
trainer.be_verbose();
}
unsigned long upsample_amount = 0;
unsigned long upsampling_amount = 0;
// now make sure all the boxes are obtainable by the scanner. We will try and
// upsample the images at most two times to help make the boxes obtainable.
std::vector<std::vector<rectangle> > temp(boxes), removed;
removed = remove_unobtainable_rectangles(trainer, images, temp);
while (impl::contains_any_boxes(removed) && upsample_amount < 2)
while (impl::contains_any_boxes(removed) && upsampling_amount < 2)
{
++upsample_amount;
++upsampling_amount;
if (options.be_verbose)
std::cout << "Upsample images..." << std::endl;
upsample_image_dataset<pyramid_down<2> >(images, boxes, ignore);
@ -199,18 +199,18 @@ namespace dlib
std::cout << "Training with epsilon: " << options.epsilon << std::endl;
std::cout << "Trained using " << options.num_threads << " threads."<< std::endl;
std::cout << "Trained with sliding window " << width << " pixels wide by " << height << " pixels tall." << std::endl;
if (upsample_amount != 0)
if (upsampling_amount != 0)
{
// Unsampled images # time(s) to allow detection of small boxes
std::cout << "Upsampled images " << upsample_amount;
std::cout << ((upsample_amount > 1) ? " times" : " time");
std::cout << "Upsampled images " << upsampling_amount;
std::cout << ((upsampling_amount > 1) ? " times" : " time");
std::cout << " to allow detection of small boxes." << std::endl;
}
if (options.add_left_right_image_flips)
std::cout << "Trained on both left and right flipped versions of images." << std::endl;
}
return detector;
return simple_object_detector_py(detector, upsampling_amount);
}
// ----------------------------------------------------------------------------------------
@ -225,12 +225,9 @@ namespace dlib
std::vector<std::vector<rectangle> > boxes, ignore;
ignore = load_image_dataset(images, boxes, dataset_filename);
simple_object_detector detector = train_simple_object_detector_on_images(dataset_filename, images, boxes, ignore, options);
simple_object_detector_py detector = train_simple_object_detector_on_images(dataset_filename, images, boxes, ignore, options);
std::ofstream fout(detector_output_filename.c_str(), std::ios::binary);
int version = 1;
serialize(detector, fout);
serialize(version, fout);
save_simple_object_detector_py(detector, detector_output_filename);
if (options.be_verbose)
std::cout << "Saved detector to file " << detector_output_filename << std::endl;
@ -268,7 +265,7 @@ namespace dlib
inline const simple_test_results test_simple_object_detector (
const std::string& dataset_filename,
const std::string& detector_filename,
const unsigned int upsample_amount
const int upsample_amount
)
{
// Load all the testing images
@ -276,7 +273,8 @@ namespace dlib
std::vector<std::vector<rectangle> > boxes, ignore;
ignore = load_image_dataset(images, boxes, dataset_filename);
// Load the detector off disk
// Load the detector off disk (We have to use the explicit serialization here
// so that we have an open file stream)
simple_object_detector detector;
int version = 0;
std::ifstream fin(detector_filename.c_str(), std::ios::binary);
@ -287,7 +285,25 @@ namespace dlib
if (version != 1)
throw error("Unknown simple_object_detector format.");
return test_simple_object_detector_with_images(images, upsample_amount, boxes, ignore, detector);
/* Here we need a little hack to deal with whether we are going to be loading a
* simple_object_detector (possibly trained outside of Python) or a
* simple_object_detector_py (definitely trained from Python). In order to do
* this we peek into the filestream to see if there is more data after the
* version number. If there is, it will be the upsampling amount. Therefore,
* by default we set the upsampling amount to -1 so that we can catch when
* no upsampling amount has been passed (numbers less than 0). If -1 is
* passed, we assume no upsampling and use 0. If a number > 0 is passed,
* we use that, else we use the upsampling amount cached with the detector
* (if it exists).
*/
unsigned int final_upsampling_amount = 0;
const unsigned int cached_upsample_amount = fin.peek();
if (upsample_amount >= 0)
final_upsampling_amount = upsample_amount;
else if (cached_upsample_amount != std::char_traits<wchar_t>::eof()) // peek() returns EOF if no more data
deserialize(final_upsampling_amount, fin);
return test_simple_object_detector_with_images(images, final_upsampling_amount, boxes, ignore, detector);
}
// ----------------------------------------------------------------------------------------

View File

@ -1,121 +0,0 @@
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_SIMPLE_ObJECT_DETECTOR_ABSTRACT_H__
#ifdef DLIB_SIMPLE_ObJECT_DETECTOR_ABSTRACT_H__
#include <dlib/image_processing/object_detector_abstract.h>
#include <dlib/image_processing/scan_fhog_pyramid_abstract.h>
#include <dlib/svm/structural_object_detection_trainer_abstract.h>
#include <dlib/data_io/image_dataset_metadata.h>
#include <dlib/matrix.h>
namespace dlib
{
// ----------------------------------------------------------------------------------------
struct fhog_training_options
{
/*!
WHAT THIS OBJECT REPRESENTS
This object is a container for the options to the train_simple_object_detector()
routine. The parameters have the following interpretations:
- be_verbose: If true, train_simple_object_detector() will print out a
lot of information to the screen while training.
- add_left_right_image_flips: if true, train_simple_object_detector()
will assume the objects are left/right symmetric and add in left
right flips of the training images. This doubles the size of the
training dataset.
- num_threads: train_simple_object_detector() will use this many
threads of execution. Set this to the number of CPU cores on your
machine to obtain the fastest training speed.
- detection_window_size: The sliding window used will have about this
many pixels inside it.
- C is the usual SVM C regularization parameter. So it is passed to
structural_object_detection_trainer::set_c(). Larger values of C
will encourage the trainer to fit the data better but might lead to
overfitting. Therefore, you must determine the proper setting of
this parameter experimentally.
- epsilon is the stopping epsilon. Smaller values make the trainer's
solver more accurate but might take longer to train.
!*/
fhog_training_options()
{
be_verbose = false;
add_left_right_image_flips = false;
num_threads = 4;
detection_window_size = 80*80;
C = 1;
epsilon = 0.01;
}
bool be_verbose;
bool add_left_right_image_flips;
unsigned long num_threads;
unsigned long detection_window_size;
double C;
double epsilon;
};
// ----------------------------------------------------------------------------------------
typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > simple_object_detector;
// ----------------------------------------------------------------------------------------
void train_simple_object_detector (
const std::string& dataset_filename,
const std::string& detector_output_filename,
const fhog_training_options& options
);
/*!
requires
- options.C > 0
ensures
- Uses the structural_object_detection_trainer to train a
simple_object_detector based on the labeled images in the XML file
dataset_filename. This function assumes the file dataset_filename is in the
XML format produced by the save_image_dataset_metadata() routine.
- This function will apply a reasonable set of default parameters and
preprocessing techniques to the training procedure for simple_object_detector
objects. So the point of this function is to provide you with a very easy
way to train a basic object detector.
- The trained object detector is serialized to the file detector_output_filename.
!*/
// ----------------------------------------------------------------------------------------
struct simple_test_results
{
double precision;
double recall;
double average_precision;
};
inline const simple_test_results test_simple_object_detector (
const std::string& dataset_filename,
const std::string& detector_filename
);
/*!
ensures
- Loads an image dataset from dataset_filename. We assume dataset_filename is
a file using the XML format written by save_image_dataset_metadata().
- Loads a simple_object_detector from the file detector_filename. This means
detector_filename should be a file produced by the train_simple_object_detector()
routine defined above.
- This function tests the detector against the dataset and returns three
numbers that tell you how well the detector does at detecting the objects in
the dataset. The return value of this function is identical to that of
test_object_detection_function(). Therefore, see the documentation for
test_object_detection_function() for an extended definition of these metrics.
!*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_SIMPLE_ObJECT_DETECTOR_ABSTRACT_H__

View File

@ -0,0 +1,93 @@
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SIMPLE_OBJECT_DETECTOR_PY_H__
#define DLIB_SIMPLE_OBJECT_DETECTOR_PY_H__
#include <dlib/python.h>
#include <dlib/matrix.h>
#include <boost/python/args.hpp>
#include <dlib/geometry.h>
#include <dlib/image_processing/frontal_face_detector.h>
namespace dlib
{
typedef object_detector<scan_fhog_pyramid<pyramid_down<6> > > simple_object_detector;
inline std::vector<dlib::rectangle> run_detector_with_upscale (
dlib::simple_object_detector& detector,
boost::python::object img,
const unsigned int upsampling_amount
)
{
pyramid_down<2> pyr;
if (is_gray_python_image(img))
{
array2d<unsigned char> temp;
if (upsampling_amount == 0)
{
return detector(numpy_gray_image(img));
}
else
{
pyramid_up(numpy_gray_image(img), temp, pyr);
unsigned int levels = upsampling_amount-1;
while (levels > 0)
{
levels--;
pyramid_up(temp);
}
std::vector<rectangle> res = detector(temp);
for (unsigned long i = 0; i < res.size(); ++i)
res[i] = pyr.rect_down(res[i], upsampling_amount);
return res;
}
}
else if (is_rgb_python_image(img))
{
array2d<rgb_pixel> temp;
if (upsampling_amount == 0)
{
return detector(numpy_rgb_image(img));
}
else
{
pyramid_up(numpy_rgb_image(img), temp, pyr);
unsigned int levels = upsampling_amount-1;
while (levels > 0)
{
levels--;
pyramid_up(temp);
}
std::vector<rectangle> res = detector(temp);
for (unsigned long i = 0; i < res.size(); ++i)
res[i] = pyr.rect_down(res[i], upsampling_amount);
return res;
}
}
else
{
throw dlib::error("Unsupported image type, must be 8bit gray or RGB image.");
}
}
struct simple_object_detector_py
{
simple_object_detector detector;
unsigned int upsampling_amount;
simple_object_detector_py() {}
simple_object_detector_py(simple_object_detector& _detector, unsigned int _upsampling_amount) :
detector(_detector), upsampling_amount(_upsampling_amount) {}
std::vector<dlib::rectangle> run_detector1 (boost::python::object img, const unsigned int upsampling_amount_)
{ return run_detector_with_upscale(detector, img, upsampling_amount_); }
std::vector<dlib::rectangle> run_detector2 (boost::python::object img)
{ return run_detector_with_upscale(detector, img, upsampling_amount); }
};
}
#endif // DLIB_SIMPLE_OBJECT_DETECTOR_PY_H__