Gave the shape_predictor_trainer the ability to learn from datasets where

some landmarks are missing.
This commit is contained in:
Davis King 2015-06-12 09:00:21 -04:00
parent 3286a0a342
commit 2d57a7546e
2 changed files with 107 additions and 21 deletions

View File

@ -563,6 +563,7 @@ namespace dlib
// make sure the objects agree on the number of parts and that there is at
// least one full_object_detection.
unsigned long num_parts = 0;
std::vector<int> part_present;
for (unsigned long i = 0; i < objects.size(); ++i)
{
for (unsigned long j = 0; j < objects[i].size(); ++j)
@ -574,6 +575,7 @@ namespace dlib
"\t shape_predictor shape_predictor_trainer::train()"
<< "\n\t You can't give objects that don't have any parts to the trainer."
);
part_present.resize(num_parts);
}
else
{
@ -584,12 +586,22 @@ namespace dlib
<< "\n\t num_parts: " << num_parts
);
}
for (unsigned long p = 0; p < objects[i][j].num_parts(); ++p)
{
if (objects[i][j].part(p) != OBJECT_PART_NOT_PRESENT)
part_present[p] = 1;
}
}
}
DLIB_CASSERT(num_parts != 0,
"\t shape_predictor shape_predictor_trainer::train()"
<< "\n\t You must give at least one full_object_detection if you want to train a shape model and it must have parts."
);
DLIB_CASSERT(sum(mat(part_present)) == (long)num_parts,
"\t shape_predictor shape_predictor_trainer::train()"
<< "\n\t Each part must appear at least once in this training data. That is, "
<< "\n\t you can't have a part that is always set to OBJECT_PART_NOT_PRESENT."
);
@ -646,19 +658,33 @@ namespace dlib
private:
static matrix<float,0,1> object_to_shape (
const full_object_detection& obj
static void object_to_shape (
const full_object_detection& obj,
matrix<float,0,1>& shape,
matrix<float,0,1>& present // a mask telling which elements of #shape are present.
)
{
matrix<float,0,1> shape(obj.num_parts()*2);
shape.set_size(obj.num_parts()*2);
present.set_size(obj.num_parts()*2);
const point_transform_affine tform_from_img = impl::normalizing_tform(obj.get_rect());
for (unsigned long i = 0; i < obj.num_parts(); ++i)
{
vector<float,2> p = tform_from_img(obj.part(i));
shape(2*i) = p.x();
shape(2*i+1) = p.y();
if (obj.part(i) != OBJECT_PART_NOT_PRESENT)
{
vector<float,2> p = tform_from_img(obj.part(i));
shape(2*i) = p.x();
shape(2*i+1) = p.y();
present(2*i) = 1;
present(2*i+1) = 1;
}
else
{
shape(2*i) = 0;
shape(2*i+1) = 0;
present(2*i) = 0;
present(2*i+1) = 0;
}
}
return shape;
}
struct training_sample
@ -671,7 +697,9 @@ namespace dlib
pixel when you look it up relative to the shape in current_shape.
- target_shape == The truth shape. Stays constant during the whole
training process.
training process (except for the parts that are not present, those are
always equal to the current_shape values).
- present == 0/1 mask saying which parts of target_shape are present.
- rect == the position of the object in the image_idx-th image. All shape
coordinates are coded relative to this rectangle.
!*/
@ -679,6 +707,7 @@ namespace dlib
unsigned long image_idx;
rectangle rect;
matrix<float,0,1> target_shape;
matrix<float,0,1> present;
matrix<float,0,1> current_shape;
std::vector<float> feature_pixel_values;
@ -688,6 +717,7 @@ namespace dlib
std::swap(image_idx, item.image_idx);
std::swap(rect, item.rect);
target_shape.swap(item.target_shape);
present.swap(item.present);
current_shape.swap(item.current_shape);
feature_pixel_values.swap(item.feature_pixel_values);
}
@ -727,17 +757,38 @@ namespace dlib
// Now all the parts contain the ranges for the leaves so we can use them to
// compute the average leaf values.
matrix<float,0,1> present_counts(samples[0].target_shape.size());
tree.leaf_values.resize(parts.size());
for (unsigned long i = 0; i < parts.size(); ++i)
{
// Get the present counts for each dimension so we can divide each
// dimension by the number of observations we have on it to find the mean
// displacement in each leaf.
present_counts = 0;
for (unsigned long j = parts[i].first; j < parts[i].second; ++j)
present_counts += samples[j].present;
present_counts = dlib::reciprocal(present_counts);
if (parts[i].second != parts[i].first)
tree.leaf_values[i] = sums[num_split_nodes+i]*get_nu()/(parts[i].second - parts[i].first);
tree.leaf_values[i] = pointwise_multiply(present_counts,sums[num_split_nodes+i]*get_nu());
else
tree.leaf_values[i] = zeros_matrix(samples[0].target_shape);
// now adjust the current shape based on these predictions
for (unsigned long j = parts[i].first; j < parts[i].second; ++j)
{
samples[j].current_shape += tree.leaf_values[i];
// For parts that aren't present in the training data, we just make
// sure that the target shape always matches and therefore gives zero
// error. So this makes the algorithm simply ignore non-present
// landmarks.
for (long k = 0; k < samples[j].present.size(); ++k)
{
// if this part is not present
if (samples[j].present(k) == 0)
samples[j].target_shape(k) = samples[j].current_shape(k);
}
}
}
return tree;
@ -867,7 +918,7 @@ namespace dlib
{
samples.clear();
matrix<float,0,1> mean_shape;
long count = 0;
matrix<float,0,1> count;
// first fill out the target shapes
for (unsigned long i = 0; i < objects.size(); ++i)
{
@ -876,15 +927,15 @@ namespace dlib
training_sample sample;
sample.image_idx = i;
sample.rect = objects[i][j].get_rect();
sample.target_shape = object_to_shape(objects[i][j]);
object_to_shape(objects[i][j], sample.target_shape, sample.present);
for (unsigned long itr = 0; itr < get_oversampling_amount(); ++itr)
samples.push_back(sample);
mean_shape += sample.target_shape;
++count;
count += sample.present;
}
}
mean_shape /= count;
mean_shape = pointwise_multiply(mean_shape,reciprocal(count));
// now go pick random initial shapes
for (unsigned long i = 0; i < samples.size(); ++i)
@ -897,12 +948,35 @@ namespace dlib
}
else
{
// Pick a random convex combination of two of the target shapes and use
// that as the initial shape for this sample.
const unsigned long rand_idx = rnd.get_random_32bit_number()%samples.size();
const unsigned long rand_idx2 = rnd.get_random_32bit_number()%samples.size();
const double alpha = rnd.get_random_double();
samples[i].current_shape = alpha*samples[rand_idx].target_shape + (1-alpha)*samples[rand_idx2].target_shape;
samples[i].current_shape.set_size(0);
matrix<float,0,1> hits(mean_shape.size());
hits = 0;
int iter = 0;
// Pick a few samples at random and randomly average them together to
// make the initial shape. Note that we make sure we get at least one
// observation (i.e. non-OBJECT_PART_NOT_PRESENT) on each part
// location.
while(min(hits) == 0 || iter < 2)
{
++iter;
const unsigned long rand_idx = rnd.get_random_32bit_number()%samples.size();
const double alpha = rnd.get_random_double()+0.1;
samples[i].current_shape += alpha*samples[rand_idx].target_shape;
hits += alpha*samples[rand_idx].present;
}
samples[i].current_shape = pointwise_multiply(samples[i].current_shape, reciprocal(hits));
}
}
for (unsigned long i = 0; i < samples.size(); ++i)
{
for (long k = 0; k < samples[i].present.size(); ++k)
{
// if this part is not present
if (samples[i].present(k) == 0)
samples[i].target_shape(k) = samples[i].current_shape(k);
}
}
@ -1029,8 +1103,11 @@ namespace dlib
for (unsigned long k = 0; k < det.num_parts(); ++k)
{
double score = length(det.part(k) - objects[i][j].part(k))/scale;
rs.add(score);
if (objects[i][j].part(k) != OBJECT_PART_NOT_PRESENT)
{
double score = length(det.part(k) - objects[i][j].part(k))/scale;
rs.add(score);
}
}
}
}

View File

@ -359,6 +359,9 @@ namespace dlib
- images.size() > 0
- for some i: objects[i].size() != 0
(i.e. there has to be at least one full_object_detection in the training set)
- for all valid p, there must exist i and j such that:
objects[i][j].part(p) != OBJECT_PART_NOT_PRESENT.
(i.e. You can't define a part that is always set to OBJECT_PART_NOT_PRESENT.)
- for all valid i,j,k,l:
- objects[i][j].num_parts() == objects[k][l].num_parts()
(i.e. all objects must agree on the number of parts)
@ -370,6 +373,10 @@ namespace dlib
shape_predictor, SP, such that:
SP(images[i], objects[i][j].get_rect()) == objects[i][j]
This learned SP object is then returned.
- Not all parts are required to be observed for all objects. So if you
have training instances with missing parts then set the part positions
equal to OBJECT_PART_NOT_PRESENT and this algorithm will basically ignore
those missing parts.
!*/
};
@ -408,6 +415,8 @@ namespace dlib
and compare the result with the truth part positions in objects[i][j]. We
then return the average distance (measured in pixels) between a predicted
part location and its true position.
- Note that any parts in objects that are set to OBJECT_PART_NOT_PRESENT are
simply ignored.
- if (scales.size() != 0) then
- Each time we compute the distance between a predicted part location and
its true location in objects[i][j] we divide the distance by