mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Added scan_image_movable_parts()
This commit is contained in:
parent
ab57847911
commit
3a2b8ee4a1
@ -22,20 +22,20 @@ namespace dlib
|
||||
|
||||
inline rectangle bounding_box_of_rects (
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& rects,
|
||||
const point& origin
|
||||
const point& position
|
||||
)
|
||||
/*!
|
||||
ensures
|
||||
- returns the smallest rectangle that contains all the
|
||||
rectangles in rects. That is, returns the rectangle that
|
||||
contains translate_rect(rects[i].second,origin) for all valid i.
|
||||
contains translate_rect(rects[i].second,position) for all valid i.
|
||||
!*/
|
||||
{
|
||||
rectangle rect;
|
||||
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
rect += translate_rect(rects[i].second,origin);
|
||||
rect += translate_rect(rects[i].second,position);
|
||||
}
|
||||
|
||||
return rect;
|
||||
@ -72,7 +72,7 @@ namespace dlib
|
||||
double sum_of_rects_in_images (
|
||||
const image_array_type& images,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& rects,
|
||||
const point& origin
|
||||
const point& position
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT(all_images_same_size(images),
|
||||
@ -101,13 +101,89 @@ namespace dlib
|
||||
for (unsigned long i = 0; i < rects.size(); ++i)
|
||||
{
|
||||
const typename image_array_type::type& img = images[rects[i].first];
|
||||
const rectangle rect = get_rect(img).intersect(translate_rect(rects[i].second,origin));
|
||||
const rectangle rect = get_rect(img).intersect(translate_rect(rects[i].second,position));
|
||||
temp += sum(matrix_cast<ptype>(subm(array_to_matrix(img), rect)));
|
||||
}
|
||||
|
||||
return static_cast<double>(temp);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
double sum_of_rects_in_images_movable_parts (
|
||||
const image_array_type& images,
|
||||
const rectangle& window,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
|
||||
const point& position
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT(all_images_same_size(images) && center(window) == point(0,0),
|
||||
"\t double sum_of_rects_in_images_movable_parts()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
|
||||
<< "\n\t center(window): " << center(window)
|
||||
);
|
||||
#ifdef ENABLE_ASSERTS
|
||||
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(fixed_rects[i].first < images.size(),
|
||||
"\t double sum_of_rects_in_images_movable_parts()"
|
||||
<< "\n\t fixed_rects["<<i<<"].first must refer to a valid image."
|
||||
<< "\n\t fixed_rects["<<i<<"].first: " << fixed_rects[i].first
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
}
|
||||
for (unsigned long i = 0; i < movable_rects.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(movable_rects[i].first < images.size(),
|
||||
"\t double sum_of_rects_in_images_movable_parts()"
|
||||
<< "\n\t movable_rects["<<i<<"].first must refer to a valid image."
|
||||
<< "\n\t movable_rects["<<i<<"].first: " << movable_rects[i].first
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
DLIB_ASSERT(center(movable_rects[i].second) == point(0,0),
|
||||
"\t double sum_of_rects_in_images_movable_parts()"
|
||||
<< "\n\t movable_rects["<<i<<"].second: " << movable_rects[i].second
|
||||
);
|
||||
}
|
||||
#endif
|
||||
typedef typename image_array_type::type::type pixel_type;
|
||||
typedef typename promote<pixel_type>::type ptype;
|
||||
|
||||
ptype temp = 0;
|
||||
|
||||
// compute TOTAL_FIXED part
|
||||
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
|
||||
{
|
||||
const typename image_array_type::type& img = images[fixed_rects[i].first];
|
||||
const rectangle rect = get_rect(img).intersect(translate_rect(fixed_rects[i].second,position));
|
||||
temp += sum(matrix_cast<ptype>(subm(array_to_matrix(img), rect)));
|
||||
}
|
||||
|
||||
if (images.size() > 0)
|
||||
{
|
||||
// compute TOTAL_MOVABLE part
|
||||
array2d<ptype> tempimg(images[0].nr(), images[0].nc());
|
||||
for (unsigned long i = 0; i < movable_rects.size(); ++i)
|
||||
{
|
||||
const typename image_array_type::type& img = images[movable_rects[i].first];
|
||||
|
||||
assign_all_pixels(tempimg, 0);
|
||||
sum_filter(img, tempimg, movable_rects[i].second);
|
||||
|
||||
const rectangle rect = get_rect(tempimg).intersect(translate_rect(window,position));
|
||||
if (rect.is_empty() == false)
|
||||
temp += std::max(0,max(matrix_cast<ptype>(subm(array_to_matrix(tempimg), rect))));
|
||||
}
|
||||
}
|
||||
|
||||
return static_cast<double>(temp);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
@ -188,6 +264,120 @@ namespace dlib
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
void scan_image_movable_parts (
|
||||
std::vector<std::pair<double, point> >& dets,
|
||||
const image_array_type& images,
|
||||
const rectangle& window,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
|
||||
const double thresh,
|
||||
const unsigned long max_dets
|
||||
)
|
||||
{
|
||||
DLIB_ASSERT(images.size() > 0 && all_images_same_size(images) &&
|
||||
center(window) == point(0,0) && window.area() > 0,
|
||||
"\t void scan_image_movable_parts()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t all_images_same_size(images): " << all_images_same_size(images)
|
||||
<< "\n\t center(window): " << center(window)
|
||||
<< "\n\t window.area(): " << window.area()
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
#ifdef ENABLE_ASSERTS
|
||||
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(fixed_rects[i].first < images.size(),
|
||||
"\t void scan_image_movable_parts()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t fixed_rects["<<i<<"].first must refer to a valid image."
|
||||
<< "\n\t fixed_rects["<<i<<"].first: " << fixed_rects[i].first
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
}
|
||||
for (unsigned long i = 0; i < movable_rects.size(); ++i)
|
||||
{
|
||||
DLIB_ASSERT(movable_rects[i].first < images.size(),
|
||||
"\t void scan_image_movable_parts()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t movable_rects["<<i<<"].first must refer to a valid image."
|
||||
<< "\n\t movable_rects["<<i<<"].first: " << movable_rects[i].first
|
||||
<< "\n\t images.size(): " << images.size()
|
||||
);
|
||||
DLIB_ASSERT(center(movable_rects[i].second) == point(0,0) &&
|
||||
movable_rects[i].second.area() > 0,
|
||||
"\t void scan_image_movable_parts()"
|
||||
<< "\n\t Invalid arguments given to this function."
|
||||
<< "\n\t movable_rects["<<i<<"].second: " << movable_rects[i].second
|
||||
<< "\n\t movable_rects["<<i<<"].second.area(): " << movable_rects[i].second.area()
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
dets.clear();
|
||||
if (max_dets == 0)
|
||||
return;
|
||||
if (movable_rects.size() == 0 && fixed_rects.size() == 0)
|
||||
return;
|
||||
|
||||
|
||||
typedef typename image_array_type::type::type pixel_type;
|
||||
typedef typename promote<pixel_type>::type ptype;
|
||||
|
||||
array2d<ptype> accum(images[0].nr(), images[0].nc());
|
||||
assign_all_pixels(accum, 0);
|
||||
|
||||
for (unsigned long i = 0; i < fixed_rects.size(); ++i)
|
||||
sum_filter(images[fixed_rects[i].first], accum, fixed_rects[i].second);
|
||||
|
||||
array2d<ptype> temp(accum.nr(), accum.nc());
|
||||
for (unsigned long i = 0; i < movable_rects.size(); ++i)
|
||||
{
|
||||
const rectangle rect = movable_rects[i].second;
|
||||
assign_all_pixels(temp, 0);
|
||||
sum_filter(images[movable_rects[i].first], temp, rect);
|
||||
max_filter(temp, accum, window.width(), window.height(), 0);
|
||||
}
|
||||
|
||||
// TODO, make this block its own function and reuse it in scan_image().
|
||||
unsigned long count = 0;
|
||||
dlib::rand rnd;
|
||||
for (long r = 0; r < accum.nr(); ++r)
|
||||
{
|
||||
for (long c = 0; c < accum.nc(); ++c)
|
||||
{
|
||||
const ptype cur_sum = accum[r][c];
|
||||
if (cur_sum >= thresh)
|
||||
{
|
||||
++count;
|
||||
|
||||
if (dets.size() < max_dets)
|
||||
{
|
||||
dets.push_back(std::make_pair(cur_sum, point(c,r)));
|
||||
}
|
||||
else
|
||||
{
|
||||
// The idea here is to cause us to randomly sample possible detection
|
||||
// locations throughout the image rather than just stopping the detection
|
||||
// procedure once we hit the max_dets limit. So this method will result
|
||||
// in a random subsample of all the detections >= thresh being in dets
|
||||
// at the end of scan_image_movable_parts().
|
||||
const unsigned long random_index = rnd.get_random_32bit_number()%count;
|
||||
if (random_index < dets.size())
|
||||
{
|
||||
dets[random_index] = std::make_pair(cur_sum, point(c,r));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ namespace dlib
|
||||
double sum_of_rects_in_images (
|
||||
const image_array_type& images,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& rects,
|
||||
const point& origin
|
||||
const point& position
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
@ -50,11 +50,52 @@ namespace dlib
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
ensures
|
||||
- returns the sum of the pixels inside the given rectangles. To be precise,
|
||||
let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, origin)
|
||||
let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, position)
|
||||
from the image images[rects[i].first]. Then this function returns the
|
||||
sum of RECT_SUM[i] for all the valid values of i.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
double sum_of_rects_in_images_movable_parts (
|
||||
const image_array_type& images,
|
||||
const rectangle& window,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
|
||||
const point& position
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_array_type == an implementation of array/array_kernel_abstract.h
|
||||
- image_array_type::type == an implementation of array2d/array2d_kernel_abstract.h
|
||||
- image_array_type::type::type == a scalar pixel type (e.g. int rather than rgb_pixel)
|
||||
- all_images_same_size(images) == true
|
||||
- center(window) == point(0,0)
|
||||
- for all valid i:
|
||||
- fixed_rects[i].first < images.size()
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
- for all valid i:
|
||||
- movable_rects[i].first < images.size()
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
- center(movable_rects[i].second) == point(0,0)
|
||||
ensures
|
||||
- returns the sum of the pixels inside fixed_rects as well as the sum of the pixels
|
||||
inside movable_rects when these latter rectangles are placed at their highest
|
||||
scoring locations inside the given window. To be precise:
|
||||
- let RECT_SUM(r,x) = sum of pixels inside the rectangle translate_rect(r.second, x)
|
||||
from the image images[r.first].
|
||||
- let WIN_MAX(i) = The maximum value of RECT_SUM(movable_rects[i],X) when maximizing
|
||||
over all the X such that translate_rect(window,position).contains(X) == true.
|
||||
|
||||
- let TOTAL_FIXED == sum over all elements R in fixed_rects of: RECT_SUM(R,position)
|
||||
- let TOTAL_MOVABLE == sum over all valid i of: max(WIN_MAX(i), 0)
|
||||
|
||||
Then this function returns TOTAL_FIXED + TOTAL_MOVABLE.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
@ -90,6 +131,54 @@ namespace dlib
|
||||
test.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename image_array_type
|
||||
>
|
||||
void scan_image_movable_parts (
|
||||
std::vector<std::pair<double, point> >& dets,
|
||||
const image_array_type& images,
|
||||
const rectangle& window,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
|
||||
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
|
||||
const double thresh,
|
||||
const unsigned long max_dets
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- image_array_type == an implementation of array/array_kernel_abstract.h
|
||||
- image_array_type::type == an implementation of array2d/array2d_kernel_abstract.h
|
||||
- image_array_type::type::type == a scalar pixel type (e.g. int rather than rgb_pixel)
|
||||
- images.size() > 0
|
||||
- all_images_same_size(images) == true
|
||||
- center(window) == point(0,0)
|
||||
- window.area() > 0
|
||||
- for all valid i:
|
||||
- fixed_rects[i].first < images.size()
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
- for all valid i:
|
||||
- movable_rects[i].first < images.size()
|
||||
(i.e. all the rectangles must reference valid elements of images)
|
||||
- center(movable_rects[i].second) == point(0,0)
|
||||
- movable_rects[i].second.area() > 0
|
||||
ensures
|
||||
- Scans the given window over the images and reports the locations with a score bigger
|
||||
than thresh.
|
||||
- Specifically, we have:
|
||||
- #dets.size() <= max_dets
|
||||
(note that dets is cleared before new detections are added by scan_image_movable_parts())
|
||||
- for all valid i:
|
||||
- #dets[i].first == sum_of_rects_in_images_movable_parts(images,
|
||||
window,
|
||||
fixed_rects,
|
||||
movable_rects,
|
||||
#dets[i].second) >= thresh
|
||||
- if (there are more than max_dets locations that pass the above threshold test) then
|
||||
- #dets == a random subsample of all the locations which passed the threshold
|
||||
test.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user