mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Clarified a few comments and simplified the serialization code a bit.
Also just cleaned up a few minor details.
This commit is contained in:
parent
773fe59a34
commit
1ab3482597
@ -59,16 +59,20 @@ for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
|
|||||||
win.clear_overlay()
|
win.clear_overlay()
|
||||||
win.set_image(img)
|
win.set_image(img)
|
||||||
|
|
||||||
|
# Ask the detector to find the bounding boxes of each face. The 1 in the
|
||||||
|
# second argument indicates that we should upsample the image 1 time. This
|
||||||
|
# will make everything bigger and allow us to detect more faces.
|
||||||
dets = detector(img, 1)
|
dets = detector(img, 1)
|
||||||
print("Number of faces detected: {}".format(len(dets)))
|
print("Number of faces detected: {}".format(len(dets)))
|
||||||
for k, d in enumerate(dets):
|
for k, d in enumerate(dets):
|
||||||
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
|
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
|
||||||
k, d.left(), d.top(), d.right(), d.bottom()))
|
k, d.left(), d.top(), d.right(), d.bottom()))
|
||||||
shapes = predictor(img, d)
|
# Get the landmarks/parts for the face in box d.
|
||||||
print("Part 0: {}, Part 1: {} ...".format(shapes.part(0),
|
shape = predictor(img, d)
|
||||||
shapes.part(1)))
|
print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
|
||||||
# Add all facial landmarks one at a time
|
shape.part(1)))
|
||||||
win.add_overlay(shapes)
|
# Draw the face landmarks on the screen.
|
||||||
|
win.add_overlay(shape)
|
||||||
|
|
||||||
win.add_overlay(dets)
|
win.add_overlay(dets)
|
||||||
raw_input("Hit enter to continue")
|
raw_input("Hit enter to continue")
|
||||||
|
@ -2,9 +2,8 @@
|
|||||||
# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
||||||
#
|
#
|
||||||
# This simple example shows how to call dlib's optimal linear assignment
|
# This simple example shows how to call dlib's optimal linear assignment
|
||||||
# problem solver.
|
# problem solver. It is an implementation of the famous Hungarian algorithm
|
||||||
# It is an implementation of the famous Hungarian algorithm and is quite fast,
|
# and is quite fast, operating in O(N^3) time.
|
||||||
# operating in O(N^3) time.
|
|
||||||
#
|
#
|
||||||
# COMPILING THE DLIB PYTHON INTERFACE
|
# COMPILING THE DLIB PYTHON INTERFACE
|
||||||
# Dlib comes with a compiled python interface for python 2.7 on MS Windows. If
|
# Dlib comes with a compiled python interface for python 2.7 on MS Windows. If
|
||||||
|
@ -83,45 +83,47 @@ def print_segment(sentence, names):
|
|||||||
sys.stdout.write("\n")
|
sys.stdout.write("\n")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Now let's make some training data. Each example is a sentence as well as a
|
# Now let's make some training data. Each example is a sentence as well as a
|
||||||
# set of ranges which indicate the locations of any names.
|
# set of ranges which indicate the locations of any names.
|
||||||
names = dlib.ranges() # make an array of dlib.range objects.
|
names = dlib.ranges() # make an array of dlib.range objects.
|
||||||
segments = dlib.rangess() # make an array of arrays of dlib.range objects.
|
segments = dlib.rangess() # make an array of arrays of dlib.range objects.
|
||||||
sentences = ["The other day I saw a man named Jim Smith",
|
sentences = []
|
||||||
"Davis King is the main author of the dlib Library",
|
|
||||||
"Bob Jones is a name and so is George Clinton",
|
|
||||||
"My dog is named Bob Barker",
|
|
||||||
"ABC is an acronym but John James Smith is a name",
|
|
||||||
"No names in this sentence at all"]
|
|
||||||
|
|
||||||
|
sentences.append("The other day I saw a man named Jim Smith")
|
||||||
# We want to detect person names. So we note that the name is located within
|
# We want to detect person names. So we note that the name is located within
|
||||||
# the range [8, 10). Note that we use half open ranges to identify segments.
|
# the range [8, 10). Note that we use half open ranges to identify segments.
|
||||||
# So in this case, the segment identifies the string "Jim Smith".
|
# So in this case, the segment identifies the string "Jim Smith".
|
||||||
names.append(dlib.range(8, 10))
|
names.append(dlib.range(8, 10))
|
||||||
segments.append(names)
|
segments.append(names)
|
||||||
# make names empty for use again below
|
names.clear() # make names empty for use again below
|
||||||
names.clear()
|
|
||||||
|
|
||||||
|
sentences.append("Davis King is the main author of the dlib Library")
|
||||||
names.append(dlib.range(0, 2))
|
names.append(dlib.range(0, 2))
|
||||||
segments.append(names)
|
segments.append(names)
|
||||||
names.clear()
|
names.clear()
|
||||||
|
|
||||||
|
sentences.append("Bob Jones is a name and so is George Clinton")
|
||||||
names.append(dlib.range(0, 2))
|
names.append(dlib.range(0, 2))
|
||||||
names.append(dlib.range(8, 10))
|
names.append(dlib.range(8, 10))
|
||||||
segments.append(names)
|
segments.append(names)
|
||||||
names.clear()
|
names.clear()
|
||||||
|
|
||||||
|
sentences.append("My dog is named Bob Barker")
|
||||||
names.append(dlib.range(4, 6))
|
names.append(dlib.range(4, 6))
|
||||||
segments.append(names)
|
segments.append(names)
|
||||||
names.clear()
|
names.clear()
|
||||||
|
|
||||||
|
sentences.append("ABC is an acronym but John James Smith is a name")
|
||||||
names.append(dlib.range(5, 8))
|
names.append(dlib.range(5, 8))
|
||||||
segments.append(names)
|
segments.append(names)
|
||||||
names.clear()
|
names.clear()
|
||||||
|
|
||||||
|
sentences.append("No names in this sentence at all")
|
||||||
segments.append(names)
|
segments.append(names)
|
||||||
names.clear()
|
names.clear()
|
||||||
|
|
||||||
|
|
||||||
# Now before we can pass these training sentences to the dlib tools we need to
|
# Now before we can pass these training sentences to the dlib tools we need to
|
||||||
# convert them into arrays of vectors as discussed above. We can use either a
|
# convert them into arrays of vectors as discussed above. We can use either a
|
||||||
# sparse or dense representation depending on our needs. In this example, we
|
# sparse or dense representation depending on our needs. In this example, we
|
||||||
|
@ -36,6 +36,7 @@ if len(sys.argv) != 2:
|
|||||||
exit()
|
exit()
|
||||||
faces_folder = sys.argv[1]
|
faces_folder = sys.argv[1]
|
||||||
|
|
||||||
|
|
||||||
# Now let's do the training. The train_simple_object_detector() function has a
|
# Now let's do the training. The train_simple_object_detector() function has a
|
||||||
# bunch of options, all of which come with reasonable default values. The next
|
# bunch of options, all of which come with reasonable default values. The next
|
||||||
# few lines goes over some of these options.
|
# few lines goes over some of these options.
|
||||||
@ -55,6 +56,9 @@ options.C = 5
|
|||||||
options.num_threads = 4
|
options.num_threads = 4
|
||||||
options.be_verbose = True
|
options.be_verbose = True
|
||||||
|
|
||||||
|
|
||||||
|
training_xml_path = os.path.join(faces_folder, "training.xml")
|
||||||
|
testing_xml_path = os.path.join(faces_folder, "testing.xml")
|
||||||
# This function does the actual training. It will save the final detector to
|
# This function does the actual training. It will save the final detector to
|
||||||
# detector.svm. The input is an XML file that lists the images in the training
|
# detector.svm. The input is an XML file that lists the images in the training
|
||||||
# dataset and also contains the positions of the face boxes. To create your
|
# dataset and also contains the positions of the face boxes. To create your
|
||||||
@ -63,11 +67,10 @@ options.be_verbose = True
|
|||||||
# images with boxes. To see how to use it read the tools/imglab/README.txt
|
# images with boxes. To see how to use it read the tools/imglab/README.txt
|
||||||
# file. But for this example, we just use the training.xml file included with
|
# file. But for this example, we just use the training.xml file included with
|
||||||
# dlib.
|
# dlib.
|
||||||
training_xml_path = os.path.join(faces_folder, "training.xml")
|
|
||||||
testing_xml_path = os.path.join(faces_folder, "testing.xml")
|
|
||||||
|
|
||||||
dlib.train_simple_object_detector(training_xml_path, "detector.svm", options)
|
dlib.train_simple_object_detector(training_xml_path, "detector.svm", options)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Now that we have a face detector we can test it. The first statement tests
|
# Now that we have a face detector we can test it. The first statement tests
|
||||||
# it on the training data. It will print(the precision, recall, and then)
|
# it on the training data. It will print(the precision, recall, and then)
|
||||||
# average precision.
|
# average precision.
|
||||||
@ -80,6 +83,10 @@ print("Training accuracy: {}".format(
|
|||||||
print("Testing accuracy: {}".format(
|
print("Testing accuracy: {}".format(
|
||||||
dlib.test_simple_object_detector(testing_xml_path, "detector.svm")))
|
dlib.test_simple_object_detector(testing_xml_path, "detector.svm")))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Now let's use the detector as you would in a normal application. First we
|
# Now let's use the detector as you would in a normal application. First we
|
||||||
# will load it from disk.
|
# will load it from disk.
|
||||||
detector = dlib.simple_object_detector("detector.svm")
|
detector = dlib.simple_object_detector("detector.svm")
|
||||||
@ -106,6 +113,12 @@ for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
|
|||||||
win.add_overlay(dets)
|
win.add_overlay(dets)
|
||||||
raw_input("Hit enter to continue")
|
raw_input("Hit enter to continue")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Finally, note that you don't have to use the XML based input to
|
# Finally, note that you don't have to use the XML based input to
|
||||||
# train_simple_object_detector(). If you have already loaded your training
|
# train_simple_object_detector(). If you have already loaded your training
|
||||||
# images and bounding boxes for the objects then you can call it as shown
|
# images and bounding boxes for the objects then you can call it as shown
|
||||||
@ -126,10 +139,10 @@ boxes_img2 = ([dlib.rectangle(left=154, top=46, right=228, bottom=121),
|
|||||||
boxes = [boxes_img1, boxes_img2]
|
boxes = [boxes_img1, boxes_img2]
|
||||||
|
|
||||||
detector2 = dlib.train_simple_object_detector(images, boxes, options)
|
detector2 = dlib.train_simple_object_detector(images, boxes, options)
|
||||||
# We could save this detector by uncommenting the following
|
# We could save this detector to disk by uncommenting the following.
|
||||||
#detector2.save('detector2.svm')
|
#detector2.save('detector2.svm')
|
||||||
|
|
||||||
# Now let's load the trained detector and look at its HOG filter!
|
# Now let's look at its HOG filter!
|
||||||
win_det.set_image(detector2)
|
win_det.set_image(detector2)
|
||||||
raw_input("Hit enter to continue")
|
raw_input("Hit enter to continue")
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
# In particular, we will train a face landmarking model based on a small
|
# In particular, we will train a face landmarking model based on a small
|
||||||
# dataset and then evaluate it. If you want to visualize the output of the
|
# dataset and then evaluate it. If you want to visualize the output of the
|
||||||
# trained model on some images then you can run the
|
# trained model on some images then you can run the
|
||||||
# face_landmark_detection.py example program with sp.dat as the input
|
# face_landmark_detection.py example program with predictor.dat as the input
|
||||||
# model.
|
# model.
|
||||||
#
|
#
|
||||||
# It should also be noted that this kind of model, while often used for face
|
# It should also be noted that this kind of model, while often used for face
|
||||||
@ -49,7 +49,7 @@ options = dlib.shape_predictor_training_options()
|
|||||||
# Now make the object responsible for training the model.
|
# Now make the object responsible for training the model.
|
||||||
# This algorithm has a bunch of parameters you can mess with. The
|
# This algorithm has a bunch of parameters you can mess with. The
|
||||||
# documentation for the shape_predictor_trainer explains all of them.
|
# documentation for the shape_predictor_trainer explains all of them.
|
||||||
# You should also read Kazemi paper which explains all the parameters
|
# You should also read Kazemi's paper which explains all the parameters
|
||||||
# in great detail. However, here I'm just setting three of them
|
# in great detail. However, here I'm just setting three of them
|
||||||
# differently than their default values. I'm doing this because we
|
# differently than their default values. I'm doing this because we
|
||||||
# have a very small dataset. In particular, setting the oversampling
|
# have a very small dataset. In particular, setting the oversampling
|
||||||
@ -63,33 +63,35 @@ options.nu = 0.05
|
|||||||
options.tree_depth = 2
|
options.tree_depth = 2
|
||||||
options.be_verbose = True
|
options.be_verbose = True
|
||||||
|
|
||||||
# This function does the actual training. It will save the final predictor to
|
# dlib.train_shape_predictor() does the actual training. It will save the
|
||||||
# predictor.dat. The input is an XML file that lists the images in the training
|
# final predictor to predictor.dat. The input is an XML file that lists the
|
||||||
# dataset and also contains the positions of the face parts.
|
# images in the training dataset and also contains the positions of the face
|
||||||
|
# parts.
|
||||||
training_xml_path = os.path.join(faces_folder, "training_with_face_landmarks.xml")
|
training_xml_path = os.path.join(faces_folder, "training_with_face_landmarks.xml")
|
||||||
testing_xml_path = os.path.join(faces_folder, "testing_with_face_landmarks.xml")
|
|
||||||
|
|
||||||
dlib.train_shape_predictor(training_xml_path, "predictor.dat", options)
|
dlib.train_shape_predictor(training_xml_path, "predictor.dat", options)
|
||||||
|
|
||||||
# Now that we have a facial landmark predictor we can test it. The first
|
# Now that we have a model we can test it. dlib.test_shape_predictor()
|
||||||
# statement tests it on the training data. It will print the mean average error
|
# measures the average distance between a face landmark output by the
|
||||||
print("") # Print blank line to create gap from previous output
|
# shape_predictor and where it should be according to the truth data.
|
||||||
print("Training accuracy: {}".format(
|
print("\nTraining accuracy: {}".format(
|
||||||
dlib.test_shape_predictor(training_xml_path, "predictor.dat")))
|
dlib.test_shape_predictor(training_xml_path, "predictor.dat")))
|
||||||
# However, to get an idea if it really worked without overfitting we need to
|
# The real test is to see how well it does on data it wasn't trained on. We
|
||||||
# run it on images it wasn't trained on. The next line does this. Happily, we
|
# trained it on a very small dataset so the accuracy is not extremely high, but
|
||||||
# see that the object detector works perfectly on the testing images.
|
# it's still doing quite good. Moreover, if you train it on one of the large
|
||||||
|
# face landmarking datasets you will obtain state-of-the-art results, as shown
|
||||||
|
# in the Kazemi paper.
|
||||||
|
testing_xml_path = os.path.join(faces_folder, "testing_with_face_landmarks.xml")
|
||||||
print("Testing accuracy: {}".format(
|
print("Testing accuracy: {}".format(
|
||||||
dlib.test_shape_predictor(testing_xml_path, "predictor.dat")))
|
dlib.test_shape_predictor(testing_xml_path, "predictor.dat")))
|
||||||
|
|
||||||
# Now let's use the detector as you would in a normal application. First we
|
# Now let's it as you would in a normal application. First we will load it
|
||||||
# will load it from disk. We also need to load a face detector to provide the
|
# from disk. We also need to load a face detector to provide the initial
|
||||||
# initial estimate of the facial location
|
# estimate of the facial location.
|
||||||
detector = dlib.get_frontal_face_detector()
|
|
||||||
predictor = dlib.shape_predictor("predictor.dat")
|
predictor = dlib.shape_predictor("predictor.dat")
|
||||||
|
detector = dlib.get_frontal_face_detector()
|
||||||
|
|
||||||
# Now let's run the detector and predictor over the images in the faces folder
|
# Now let's run the detector and shape_predictor over the images in the faces
|
||||||
# and display the results.
|
# folder and display the results.
|
||||||
print("Showing detections and predictions on the images in the faces folder...")
|
print("Showing detections and predictions on the images in the faces folder...")
|
||||||
win = dlib.image_window()
|
win = dlib.image_window()
|
||||||
for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
|
for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
|
||||||
@ -99,21 +101,21 @@ for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
|
|||||||
win.clear_overlay()
|
win.clear_overlay()
|
||||||
win.set_image(img)
|
win.set_image(img)
|
||||||
|
|
||||||
|
# Ask the detector to find the bounding boxes of each face. The 1 in the
|
||||||
|
# second argument indicates that we should upsample the image 1 time. This
|
||||||
|
# will make everything bigger and allow us to detect more faces.
|
||||||
dets = detector(img, 1)
|
dets = detector(img, 1)
|
||||||
print("Number of faces detected: {}".format(len(dets)))
|
print("Number of faces detected: {}".format(len(dets)))
|
||||||
for k, d in enumerate(dets):
|
for k, d in enumerate(dets):
|
||||||
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
|
print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
|
||||||
k, d.left(), d.top(), d.right(), d.bottom()))
|
k, d.left(), d.top(), d.right(), d.bottom()))
|
||||||
shapes = predictor(img, d)
|
# Get the landmarks/parts for the face in box d.
|
||||||
print("Part 0: {}, Part 1: {} ...".format(shapes.part(0),
|
shape = predictor(img, d)
|
||||||
shapes.part(1)))
|
print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
|
||||||
# Add all facial landmarks one at a time
|
shape.part(1)))
|
||||||
win.add_overlay(shapes)
|
# Draw the face landmarks on the screen.
|
||||||
|
win.add_overlay(shape)
|
||||||
|
|
||||||
win.add_overlay(dets)
|
win.add_overlay(dets)
|
||||||
raw_input("Hit enter to continue")
|
raw_input("Hit enter to continue")
|
||||||
|
|
||||||
# Finally, note that you don't have to use the XML based input to
|
|
||||||
# train_shape_predictor(). If you have already loaded your training
|
|
||||||
# images and fll_object_detections for the objects then you can call it with
|
|
||||||
# the existing objects.
|
|
||||||
|
@ -51,9 +51,7 @@ void add_overlay_rect (
|
|||||||
const rgb_pixel& color
|
const rgb_pixel& color
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
std::vector<rectangle> rects;
|
win.add_overlay(rect, color);
|
||||||
rects.push_back(rect);
|
|
||||||
win.add_overlay(rects, color);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_overlay_parts (
|
void add_overlay_parts (
|
||||||
@ -62,9 +60,7 @@ void add_overlay_parts (
|
|||||||
const rgb_pixel& color
|
const rgb_pixel& color
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
std::vector<full_object_detection> detections;
|
win.add_overlay(render_face_detections(detection, color));
|
||||||
detections.push_back(detection);
|
|
||||||
win.add_overlay(render_face_detections(detections, color));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
boost::shared_ptr<image_window> make_image_window_from_image(object img)
|
boost::shared_ptr<image_window> make_image_window_from_image(object img)
|
||||||
|
@ -257,8 +257,9 @@ ensures \n\
|
|||||||
class_<type>("fhog_object_detector",
|
class_<type>("fhog_object_detector",
|
||||||
"This object represents a sliding window histogram-of-oriented-gradients based object detector.")
|
"This object represents a sliding window histogram-of-oriented-gradients based object detector.")
|
||||||
.def("__init__", make_constructor(&load_object_from_file<type>),
|
.def("__init__", make_constructor(&load_object_from_file<type>),
|
||||||
"Loads a simple_object_detector from a file that contains the output of the \n\
|
"Loads an object detector from a file that contains the output of the \n\
|
||||||
train_simple_object_detector() routine.")
|
train_simple_object_detector() routine or a serialized C++ object of type\n\
|
||||||
|
object_detector<scan_fhog_pyramid<pyramid_down<6>>>.")
|
||||||
.def("__call__", run_detector_with_upscale, (arg("image"), arg("upsample_num_times")=0),
|
.def("__call__", run_detector_with_upscale, (arg("image"), arg("upsample_num_times")=0),
|
||||||
"requires \n\
|
"requires \n\
|
||||||
- image is a numpy ndarray containing either an 8bit grayscale or RGB \n\
|
- image is a numpy ndarray containing either an 8bit grayscale or RGB \n\
|
||||||
|
@ -39,9 +39,10 @@ namespace dlib
|
|||||||
inline void save_simple_object_detector(const simple_object_detector& detector, const std::string& detector_output_filename)
|
inline void save_simple_object_detector(const simple_object_detector& detector, const std::string& detector_output_filename)
|
||||||
{
|
{
|
||||||
std::ofstream fout(detector_output_filename.c_str(), std::ios::binary);
|
std::ofstream fout(detector_output_filename.c_str(), std::ios::binary);
|
||||||
int version = 1;
|
|
||||||
serialize(detector, fout);
|
serialize(detector, fout);
|
||||||
serialize(version, fout);
|
// Don't need to save version of upsampling amount because want to write out the
|
||||||
|
// object detector just like the C++ code that serializes an object_detector would.
|
||||||
|
// We also don't know the upsampling amount in this case anyway.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,9 +38,7 @@ full_object_detection run_predictor (
|
|||||||
void save_shape_predictor(const shape_predictor& predictor, const std::string& predictor_output_filename)
|
void save_shape_predictor(const shape_predictor& predictor, const std::string& predictor_output_filename)
|
||||||
{
|
{
|
||||||
std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
|
std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
|
||||||
int version = 1;
|
|
||||||
serialize(predictor, fout);
|
serialize(predictor, fout);
|
||||||
serialize(version, fout);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------------------
|
||||||
@ -95,7 +93,7 @@ inline shape_predictor train_shape_predictor_on_images_py (
|
|||||||
throw dlib::error("The length of the detections list must match the length of the images list.");
|
throw dlib::error("The length of the detections list must match the length of the images list.");
|
||||||
|
|
||||||
std::vector<std::vector<full_object_detection> > detections(num_images);
|
std::vector<std::vector<full_object_detection> > detections(num_images);
|
||||||
dlib::array<array2d<rgb_pixel> > images(num_images);
|
dlib::array<array2d<unsigned char> > images(num_images);
|
||||||
images_and_nested_params_to_dlib(pyimages, pydetections, images, detections);
|
images_and_nested_params_to_dlib(pyimages, pydetections, images, detections);
|
||||||
|
|
||||||
return train_shape_predictor_on_images(images, detections, options);
|
return train_shape_predictor_on_images(images, detections, options);
|
||||||
@ -121,9 +119,9 @@ inline double test_shape_predictor_with_images_py (
|
|||||||
std::vector<std::vector<double> > scales;
|
std::vector<std::vector<double> > scales;
|
||||||
if (num_scales > 0)
|
if (num_scales > 0)
|
||||||
scales.resize(num_scales);
|
scales.resize(num_scales);
|
||||||
dlib::array<array2d<rgb_pixel> > images(num_images);
|
dlib::array<array2d<unsigned char> > images(num_images);
|
||||||
|
|
||||||
// Now copy the data into dlib based objects so we can call the trainer.
|
// Now copy the data into dlib based objects so we can call the testing routine.
|
||||||
for (unsigned long i = 0; i < num_images; ++i)
|
for (unsigned long i = 0; i < num_images; ++i)
|
||||||
{
|
{
|
||||||
const unsigned long num_boxes = len(pydetections[i]);
|
const unsigned long num_boxes = len(pydetections[i]);
|
||||||
@ -193,7 +191,7 @@ void bind_shape_predictors()
|
|||||||
&type::nu,
|
&type::nu,
|
||||||
"The regularization parameter. Larger values of this parameter \
|
"The regularization parameter. Larger values of this parameter \
|
||||||
will cause the algorithm to fit the training data better but may also \
|
will cause the algorithm to fit the training data better but may also \
|
||||||
cause overfitting.")
|
cause overfitting. The value must be in the range (0, 1].")
|
||||||
.add_property("oversampling_amount", &type::oversampling_amount,
|
.add_property("oversampling_amount", &type::oversampling_amount,
|
||||||
&type::oversampling_amount,
|
&type::oversampling_amount,
|
||||||
"The number of randomly selected initial starting points sampled for each training example")
|
"The number of randomly selected initial starting points sampled for each training example")
|
||||||
@ -232,7 +230,7 @@ train_shape_predictor() routine.")
|
|||||||
- box is the bounding box to begin the shape prediction inside. \n\
|
- box is the bounding box to begin the shape prediction inside. \n\
|
||||||
ensures \n\
|
ensures \n\
|
||||||
- This function runs the shape predictor on the input image and returns \n\
|
- This function runs the shape predictor on the input image and returns \n\
|
||||||
a single full object detection.")
|
a single full_object_detection.")
|
||||||
.def("save", save_shape_predictor, (arg("predictor_output_filename")), "Save a shape_predictor to the provided path.")
|
.def("save", save_shape_predictor, (arg("predictor_output_filename")), "Save a shape_predictor to the provided path.")
|
||||||
.def_pickle(serialize_pickle<type>());
|
.def_pickle(serialize_pickle<type>());
|
||||||
}
|
}
|
||||||
@ -241,36 +239,28 @@ ensures \n\
|
|||||||
(arg("images"), arg("object_detections"), arg("options")),
|
(arg("images"), arg("object_detections"), arg("options")),
|
||||||
"requires \n\
|
"requires \n\
|
||||||
- options.lambda > 0 \n\
|
- options.lambda > 0 \n\
|
||||||
- options.nu > 0 \n\
|
- 0 < options.nu <= 1 \n\
|
||||||
- options.feature_pool_region_padding >= 0 \n\
|
- options.feature_pool_region_padding >= 0 \n\
|
||||||
- len(images) == len(object_detections) \n\
|
- len(images) == len(object_detections) \n\
|
||||||
- images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\
|
- images should be a list of numpy matrices that represent images, either RGB or grayscale. \n\
|
||||||
- object_detections should be a list of lists of dlib.full_object_detection objects. \
|
- object_detections should be a list of lists of dlib.full_object_detection objects. \
|
||||||
Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\
|
Each dlib.full_object_detection contains the bounding box and the lists of points that make up the object parts.\n\
|
||||||
ensures \n\
|
ensures \n\
|
||||||
- Uses the shape_predictor_trainer to train a \n\
|
- Uses dlib's shape_predictor_trainer object to train a \n\
|
||||||
shape_predictor based on the provided labeled images and full object detections.\n\
|
shape_predictor based on the provided labeled images, full_object_detections, and options.\n\
|
||||||
- This function will apply a reasonable set of default parameters and \n\
|
|
||||||
preprocessing techniques to the training procedure for shape_predictors \n\
|
|
||||||
objects. So the point of this function is to provide you with a very easy \n\
|
|
||||||
way to train a basic shape predictor. \n\
|
|
||||||
- The trained shape_predictor is returned");
|
- The trained shape_predictor is returned");
|
||||||
|
|
||||||
def("train_shape_predictor", train_shape_predictor,
|
def("train_shape_predictor", train_shape_predictor,
|
||||||
(arg("dataset_filename"), arg("predictor_output_filename"), arg("options")),
|
(arg("dataset_filename"), arg("predictor_output_filename"), arg("options")),
|
||||||
"requires \n\
|
"requires \n\
|
||||||
- options.lambda > 0 \n\
|
- options.lambda > 0 \n\
|
||||||
- options.nu > 0 \n\
|
- 0 < options.nu <= 1 \n\
|
||||||
- options.feature_pool_region_padding >= 0 \n\
|
- options.feature_pool_region_padding >= 0 \n\
|
||||||
ensures \n\
|
ensures \n\
|
||||||
- Uses the shape_predictor_trainer to train a \n\
|
- Uses dlib's shape_predictor_trainer to train a \n\
|
||||||
shape_predictor based on the labeled images in the XML file \n\
|
shape_predictor based on the labeled images in the XML file \n\
|
||||||
dataset_filename. This function assumes the file dataset_filename is in the \n\
|
dataset_filename and the provided options. This function assumes the file dataset_filename is in the \n\
|
||||||
XML format produced by dlib's save_image_dataset_metadata() routine. \n\
|
XML format produced by dlib's save_image_dataset_metadata() routine. \n\
|
||||||
- This function will apply a reasonable set of default parameters and \n\
|
|
||||||
preprocessing techniques to the training procedure for shape_predictors \n\
|
|
||||||
objects. So the point of this function is to provide you with a very easy \n\
|
|
||||||
way to train a basic shape predictor. \n\
|
|
||||||
- The trained shape predictor is serialized to the file predictor_output_filename.");
|
- The trained shape predictor is serialized to the file predictor_output_filename.");
|
||||||
|
|
||||||
def("test_shape_predictor", test_shape_predictor_py,
|
def("test_shape_predictor", test_shape_predictor_py,
|
||||||
|
@ -73,8 +73,8 @@ namespace dlib
|
|||||||
{
|
{
|
||||||
if (options.lambda <= 0)
|
if (options.lambda <= 0)
|
||||||
throw error("Invalid lambda value given to train_shape_predictor(), lambda must be > 0.");
|
throw error("Invalid lambda value given to train_shape_predictor(), lambda must be > 0.");
|
||||||
if (options.nu <= 0)
|
if (!(0 < options.nu && options.nu <= 1))
|
||||||
throw error("Invalid nu value given to train_shape_predictor(), nu must be > 0.");
|
throw error("Invalid nu value given to train_shape_predictor(). It is required that 0 < nu <= 1.");
|
||||||
if (options.feature_pool_region_padding < 0)
|
if (options.feature_pool_region_padding < 0)
|
||||||
throw error("Invalid feature_pool_region_padding value given to train_shape_predictor(), feature_pool_region_padding must be >= 0.");
|
throw error("Invalid feature_pool_region_padding value given to train_shape_predictor(), feature_pool_region_padding must be >= 0.");
|
||||||
|
|
||||||
@ -123,16 +123,13 @@ namespace dlib
|
|||||||
const shape_predictor_training_options& options
|
const shape_predictor_training_options& options
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
dlib::array<array2d<rgb_pixel> > images;
|
dlib::array<array2d<unsigned char> > images;
|
||||||
std::vector<std::vector<full_object_detection> > objects;
|
std::vector<std::vector<full_object_detection> > objects;
|
||||||
load_image_dataset(images, objects, dataset_filename);
|
load_image_dataset(images, objects, dataset_filename);
|
||||||
|
|
||||||
shape_predictor predictor = train_shape_predictor_on_images(images, objects, options);
|
shape_predictor predictor = train_shape_predictor_on_images(images, objects, options);
|
||||||
|
|
||||||
std::ofstream fout(predictor_output_filename.c_str(), std::ios::binary);
|
serialize(predictor_output_filename) << predictor;
|
||||||
int version = 1;
|
|
||||||
serialize(predictor, fout);
|
|
||||||
serialize(version, fout);
|
|
||||||
|
|
||||||
if (options.be_verbose)
|
if (options.be_verbose)
|
||||||
std::cout << "Training complete, saved predictor to file " << predictor_output_filename << std::endl;
|
std::cout << "Training complete, saved predictor to file " << predictor_output_filename << std::endl;
|
||||||
@ -165,7 +162,7 @@ namespace dlib
|
|||||||
)
|
)
|
||||||
{
|
{
|
||||||
// Load the images, no scales can be provided
|
// Load the images, no scales can be provided
|
||||||
dlib::array<array2d<rgb_pixel> > images;
|
dlib::array<array2d<unsigned char> > images;
|
||||||
// This interface cannot take the scales parameter.
|
// This interface cannot take the scales parameter.
|
||||||
std::vector<std::vector<double> > scales;
|
std::vector<std::vector<double> > scales;
|
||||||
std::vector<std::vector<full_object_detection> > objects;
|
std::vector<std::vector<full_object_detection> > objects;
|
||||||
@ -173,14 +170,7 @@ namespace dlib
|
|||||||
|
|
||||||
// Load the shape predictor
|
// Load the shape predictor
|
||||||
shape_predictor predictor;
|
shape_predictor predictor;
|
||||||
int version = 0;
|
deserialize(predictor_filename) >> predictor;
|
||||||
std::ifstream fin(predictor_filename.c_str(), std::ios::binary);
|
|
||||||
if (!fin)
|
|
||||||
throw error("Unable to open file " + predictor_filename);
|
|
||||||
deserialize(predictor, fin);
|
|
||||||
deserialize(version, fin);
|
|
||||||
if (version != 1)
|
|
||||||
throw error("Unknown shape_predictor format.");
|
|
||||||
|
|
||||||
return test_shape_predictor_with_images(images, objects, scales, predictor);
|
return test_shape_predictor_with_images(images, objects, scales, predictor);
|
||||||
}
|
}
|
||||||
|
@ -276,32 +276,33 @@ namespace dlib
|
|||||||
// Load the detector off disk (We have to use the explicit serialization here
|
// Load the detector off disk (We have to use the explicit serialization here
|
||||||
// so that we have an open file stream)
|
// so that we have an open file stream)
|
||||||
simple_object_detector detector;
|
simple_object_detector detector;
|
||||||
int version = 0;
|
|
||||||
std::ifstream fin(detector_filename.c_str(), std::ios::binary);
|
std::ifstream fin(detector_filename.c_str(), std::ios::binary);
|
||||||
if (!fin)
|
if (!fin)
|
||||||
throw error("Unable to open file " + detector_filename);
|
throw error("Unable to open file " + detector_filename);
|
||||||
deserialize(detector, fin);
|
deserialize(detector, fin);
|
||||||
deserialize(version, fin);
|
|
||||||
if (version != 1)
|
|
||||||
throw error("Unknown simple_object_detector format.");
|
|
||||||
|
|
||||||
/* Here we need a little hack to deal with whether we are going to be loading a
|
/* Here we need a little hack to deal with whether we are going to be loading a
|
||||||
* simple_object_detector (possibly trained outside of Python) or a
|
* simple_object_detector (possibly trained outside of Python) or a
|
||||||
* simple_object_detector_py (definitely trained from Python). In order to do
|
* simple_object_detector_py (definitely trained from Python). In order to do this
|
||||||
* this we peek into the filestream to see if there is more data after the
|
* we peek into the filestream to see if there is more data after the object
|
||||||
* version number. If there is, it will be the upsampling amount. Therefore,
|
* detector. If there is, it will be the version and upsampling amount. Therefore,
|
||||||
* by default we set the upsampling amount to -1 so that we can catch when
|
* by default we set the upsampling amount to -1 so that we can catch when no
|
||||||
* no upsampling amount has been passed (numbers less than 0). If -1 is
|
* upsampling amount has been passed (numbers less than 0). If -1 is passed, we
|
||||||
* passed, we assume no upsampling and use 0. If a number > 0 is passed,
|
* assume no upsampling and use 0. If a number > 0 is passed, we use that, else we
|
||||||
* we use that, else we use the upsampling amount cached with the detector
|
* use the upsampling amount saved in the detector file (if it exists).
|
||||||
* (if it exists).
|
|
||||||
*/
|
*/
|
||||||
unsigned int final_upsampling_amount = 0;
|
unsigned int final_upsampling_amount = 0;
|
||||||
const unsigned int cached_upsample_amount = fin.peek();
|
if (fin.peek() != EOF)
|
||||||
|
{
|
||||||
|
int version = 0;
|
||||||
|
deserialize(version, fin);
|
||||||
|
if (version != 1)
|
||||||
|
throw error("Unknown simple_object_detector format.");
|
||||||
|
deserialize(final_upsampling_amount, fin);
|
||||||
|
}
|
||||||
if (upsample_amount >= 0)
|
if (upsample_amount >= 0)
|
||||||
final_upsampling_amount = upsample_amount;
|
final_upsampling_amount = upsample_amount;
|
||||||
else if (cached_upsample_amount != std::char_traits<wchar_t>::eof()) // peek() returns EOF if no more data
|
|
||||||
deserialize(final_upsampling_amount, fin);
|
|
||||||
|
|
||||||
return test_simple_object_detector_with_images(images, final_upsampling_amount, boxes, ignore, detector);
|
return test_simple_object_detector_with_images(images, final_upsampling_amount, boxes, ignore, detector);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user