Add input_grayscale_image_pyramid, issue #354 (#1761)

Add input_grayscale_image_pyramid
This commit is contained in:
Facundo Galán 2019-05-25 18:18:01 -03:00 committed by Davis E. King
parent 0ecb49b94e
commit 8001b924e6
2 changed files with 370 additions and 102 deletions

View File

@ -590,8 +590,204 @@ namespace dlib
// ----------------------------------------------------------------------------------------
namespace detail {
template <typename PYRAMID_TYPE>
class input_rgb_image_pyramid
class input_image_pyramid
{
public:
virtual ~input_image_pyramid() = 0;
typedef PYRAMID_TYPE pyramid_type;
unsigned long get_pyramid_padding() const { return pyramid_padding; }
void set_pyramid_padding(unsigned long value) { pyramid_padding = value; }
unsigned long get_pyramid_outer_padding() const { return pyramid_outer_padding; }
void set_pyramid_outer_padding(unsigned long value) { pyramid_outer_padding = value; }
bool image_contained_point(
const tensor& data,
const point& p
) const
{
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
DLIB_CASSERT(rects.size() > 0);
return rects[0].contains(p + rects[0].tl_corner());
}
drectangle tensor_space_to_image_space(
const tensor& data,
drectangle r
) const
{
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
return tiled_pyramid_to_image<pyramid_type>(rects, r);
}
drectangle image_space_to_tensor_space (
const tensor& data,
double scale,
drectangle r
) const
{
DLIB_CASSERT(0 < scale && scale <= 1, "scale: " << scale);
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
return image_to_tiled_pyramid<pyramid_type>(rects, scale, r);
}
protected:
template <typename forward_iterator>
void to_tensor_init (
forward_iterator ibegin,
forward_iterator iend,
resizable_tensor &data,
unsigned int k
) const
{
DLIB_CASSERT(std::distance(ibegin, iend) > 0);
auto nr = ibegin->nr();
auto nc = ibegin->nc();
// make sure all the input matrices have the same dimensions
for (auto i = ibegin; i != iend; ++i)
{
DLIB_CASSERT(i->nr() == nr && i->nc() == nc,
"\t input_grayscale_image_pyramid::to_tensor()"
<< "\n\t All matrices given to to_tensor() must have the same dimensions."
<< "\n\t nr: " << nr
<< "\n\t nc: " << nc
<< "\n\t i->nr(): " << i->nr()
<< "\n\t i->nc(): " << i->nc()
);
}
long NR, NC;
pyramid_type pyr;
auto& rects = data.annotation().get<std::vector<rectangle>>();
impl::compute_tiled_image_pyramid_details(pyr, nr, nc, pyramid_padding, pyramid_outer_padding, rects,
NR, NC);
// initialize data to the right size to contain the stuff in the iterator range.
data.set_size(std::distance(ibegin, iend), k, NR, NC);
// We need to zero the image before doing the pyramid, since the pyramid
// creation code doesn't write to all parts of the image. We also take
// care to avoid triggering any device to hosts copies.
auto ptr = data.host_write_only();
for (size_t i = 0; i < data.size(); ++i)
ptr[i] = 0;
}
// now build the image pyramid into data. This does the same thing as
// standard create_tiled_pyramid(), except we use the GPU if one is available.
void create_tiled_pyramid (
const std::vector<rectangle>& rects,
resizable_tensor& data
) const
{
for (size_t i = 1; i < rects.size(); ++i) {
alias_tensor src(data.num_samples(), data.k(), rects[i - 1].height(), rects[i - 1].width());
alias_tensor dest(data.num_samples(), data.k(), rects[i].height(), rects[i].width());
auto asrc = src(data, data.nc() * rects[i - 1].top() + rects[i - 1].left());
auto adest = dest(data, data.nc() * rects[i].top() + rects[i].left());
tt::resize_bilinear(adest, data.nc(), data.nr() * data.nc(),
asrc, data.nc(), data.nr() * data.nc());
}
}
unsigned long pyramid_padding = 10;
unsigned long pyramid_outer_padding = 11;
};
template <typename PYRAMID_TYPE>
input_image_pyramid<PYRAMID_TYPE>::~input_image_pyramid() {}
}
// ----------------------------------------------------------------------------------------
template <typename PYRAMID_TYPE>
class input_grayscale_image_pyramid : public detail::input_image_pyramid<PYRAMID_TYPE>
{
public:
typedef matrix<unsigned char> input_type;
typedef PYRAMID_TYPE pyramid_type;
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
forward_iterator iend,
resizable_tensor& data
) const
{
this->to_tensor_init(ibegin, iend, data, 1);
const auto rects = data.annotation().get<std::vector<rectangle>>();
if (rects.size() == 0)
return;
// copy the first raw image into the top part of the tiled pyramid. We need to
// do this for each of the input images/samples in the tensor.
auto ptr = data.host_write_only();
for (auto i = ibegin; i != iend; ++i)
{
auto& img = *i;
ptr += rects[0].top()*data.nc();
for (long r = 0; r < img.nr(); ++r)
{
auto p = ptr+rects[0].left();
for (long c = 0; c < img.nc(); ++c)
p[c] = (img(r,c))/256.0;
ptr += data.nc();
}
ptr += data.nc()*(data.nr()-rects[0].bottom()-1);
}
this->create_tiled_pyramid(rects, data);
}
friend void serialize(const input_grayscale_image_pyramid& item, std::ostream& out)
{
serialize("input_grayscale_image_pyramid", out);
serialize(item.pyramid_padding, out);
serialize(item.pyramid_outer_padding, out);
}
friend void deserialize(input_grayscale_image_pyramid& item, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "input_grayscale_image_pyramid")
throw serialization_error("Unexpected version found while deserializing dlib::input_grayscale_image_pyramid.");
deserialize(item.pyramid_padding, in);
deserialize(item.pyramid_outer_padding, in);
}
friend std::ostream& operator<<(std::ostream& out, const input_grayscale_image_pyramid& item)
{
out << "input_grayscale_image_pyramid()";
out << " pyramid_padding="<<item.pyramid_padding;
out << " pyramid_outer_padding="<<item.pyramid_outer_padding;
return out;
}
friend void to_xml(const input_grayscale_image_pyramid& item, std::ostream& out)
{
out << "<input_grayscale_image_pyramid"
<<"' pyramid_padding='"<<item.pyramid_padding
<<"' pyramid_outer_padding='"<<item.pyramid_outer_padding
<<"'/>";
}
};
// ----------------------------------------------------------------------------------------
template <typename PYRAMID_TYPE>
class input_rgb_image_pyramid : public detail::input_image_pyramid<PYRAMID_TYPE>
{
public:
typedef matrix<rgb_pixel> input_type;
@ -616,42 +812,6 @@ namespace dlib
float get_avg_green() const { return avg_green; }
float get_avg_blue() const { return avg_blue; }
unsigned long get_pyramid_padding () const { return pyramid_padding; }
void set_pyramid_padding (unsigned long value) { pyramid_padding = value; }
unsigned long get_pyramid_outer_padding () const { return pyramid_outer_padding; }
void set_pyramid_outer_padding (unsigned long value) { pyramid_outer_padding = value; }
bool image_contained_point (
const tensor& data,
const point& p
) const
{
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
DLIB_CASSERT(rects.size() > 0);
return rects[0].contains(p+rects[0].tl_corner());
}
drectangle tensor_space_to_image_space (
const tensor& data,
drectangle r
) const
{
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
return tiled_pyramid_to_image<pyramid_type>(rects, r);
}
drectangle image_space_to_tensor_space (
const tensor& data,
double scale,
drectangle r
) const
{
DLIB_CASSERT(0 < scale && scale <= 1 , "scale: "<< scale);
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
return image_to_tiled_pyramid<pyramid_type>(rects, scale, r);
}
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
@ -659,42 +819,15 @@ namespace dlib
resizable_tensor& data
) const
{
DLIB_CASSERT(std::distance(ibegin,iend) > 0);
auto nr = ibegin->nr();
auto nc = ibegin->nc();
// make sure all the input matrices have the same dimensions
for (auto i = ibegin; i != iend; ++i)
{
DLIB_CASSERT(i->nr()==nr && i->nc()==nc,
"\t input_rgb_image_pyramid::to_tensor()"
<< "\n\t All matrices given to to_tensor() must have the same dimensions."
<< "\n\t nr: " << nr
<< "\n\t nc: " << nc
<< "\n\t i->nr(): " << i->nr()
<< "\n\t i->nc(): " << i->nc()
);
}
long NR, NC;
pyramid_type pyr;
auto& rects = data.annotation().get<std::vector<rectangle>>();
impl::compute_tiled_image_pyramid_details(pyr, nr, nc, pyramid_padding, pyramid_outer_padding, rects, NR, NC);
// initialize data to the right size to contain the stuff in the iterator range.
data.set_size(std::distance(ibegin,iend), 3, NR, NC);
// We need to zero the image before doing the pyramid, since the pyramid
// creation code doesn't write to all parts of the image. We also take
// care to avoid triggering any device to hosts copies.
auto ptr = data.host_write_only();
for (size_t i = 0; i < data.size(); ++i)
ptr[i] = 0;
this->to_tensor_init(ibegin, iend, data, 3);
const auto rects = data.annotation().get<std::vector<rectangle>>();
if (rects.size() == 0)
return;
// copy the first raw image into the top part of the tiled pyramid. We need to
// do this for each of the input images/samples in the tensor.
auto ptr = data.host_write_only();
for (auto i = ibegin; i != iend; ++i)
{
auto& img = *i;
@ -729,19 +862,7 @@ namespace dlib
ptr += data.nc()*(data.nr()-rects[0].bottom()-1);
}
// now build the image pyramid into data. This does the same thing as
// create_tiled_pyramid(), except we use the GPU if one is available.
for (size_t i = 1; i < rects.size(); ++i)
{
alias_tensor src(data.num_samples(),data.k(),rects[i-1].height(),rects[i-1].width());
alias_tensor dest(data.num_samples(),data.k(),rects[i].height(),rects[i].width());
auto asrc = src(data, data.nc()*rects[i-1].top() + rects[i-1].left());
auto adest = dest(data, data.nc()*rects[i].top() + rects[i].left());
tt::resize_bilinear(adest, data.nc(), data.nr()*data.nc(),
asrc, data.nc(), data.nr()*data.nc());
}
this->create_tiled_pyramid(rects, data);
}
friend void serialize(const input_rgb_image_pyramid& item, std::ostream& out)
@ -796,8 +917,6 @@ namespace dlib
float avg_red;
float avg_green;
float avg_blue;
unsigned long pyramid_padding = 10;
unsigned long pyramid_outer_padding = 11;
};
// ----------------------------------------------------------------------------------------

View File

@ -271,6 +271,155 @@ namespace dlib
};
// ----------------------------------------------------------------------------------------
template <
typename PYRAMID_TYPE
>
class input_grayscale_image_pyramid
{
/*!
REQUIREMENTS ON PYRAMID_TYPE
PYRAMID_TYPE must be an instance of the dlib::pyramid_down template.
WHAT THIS OBJECT REPRESENTS
This input layer works with gray scale images of type matrix<unsigned char>.
It is identical to input layer except that it outputs a tensor containing a tiled
image pyramid of each input image rather than a simple copy of each image.
The tiled image pyramid is created using create_tiled_pyramid().
!*/
public:
typedef matrix<unsigned char> input_type;
typedef PYRAMID_TYPE pyramid_type;
input_grayscale_image_pyramid (
);
/*!
ensures
- #get_pyramid_padding() == 10
- #get_pyramid_outer_padding() == 11
!*/
unsigned long get_pyramid_padding (
) const;
/*!
ensures
- When this object creates a pyramid it will call create_tiled_pyramid() and
set create_tiled_pyramid's pyramid_padding parameter to get_pyramid_padding().
!*/
void set_pyramid_padding (
unsigned long value
);
/*!
ensures
- #get_pyramid_padding() == value
!*/
unsigned long get_pyramid_outer_padding (
) const;
/*!
ensures
- When this object creates a pyramid it will call create_tiled_pyramid()
and set create_tiled_pyramid's pyramid_outer_padding parameter to
get_pyramid_outer_padding().
!*/
void set_pyramid_outer_padding (
unsigned long value
);
/*!
ensures
- #get_pyramid_outer_padding() == value
!*/
template <typename forward_iterator>
void to_tensor (
forward_iterator ibegin,
forward_iterator iend,
resizable_tensor& data
) const;
/*!
requires
- [ibegin, iend) is an iterator range over input_type objects.
- std::distance(ibegin,iend) > 0
- The input range should contain images that all have the same
dimensions.
ensures
- Converts the iterator range into a tensor and stores it into #data. In
particular, we will have:
- #data.num_samples() == std::distance(ibegin,iend)
- #data.k() == 1
- Each sample in #data contains a tiled image pyramid of the
corresponding input image. The tiled pyramid is created by
create_tiled_pyramid().
Moreover, each pixel is normalized, dividing them by 256.0.
!*/
bool image_contained_point (
const tensor& data,
const point& p
) const;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
ensures
- Since data is a tensor that is built from a bunch of identically sized
images, we can ask if those images were big enough to contain the point
p. This function returns the answer to that question.
!*/
drectangle image_space_to_tensor_space (
const tensor& data,
double scale,
drectangle r
) const;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
- 0 < scale <= 1
ensures
- This function maps from to_tensor()'s input image space to its output
tensor space. Therefore, given that data is a tensor produced by
to_tensor(), image_space_to_tensor_space() allows you to ask for the
rectangle in data that corresponds to a rectangle in the original image
space.
Note that since the output tensor contains an image pyramid, there are
multiple points in the output tensor that correspond to any input
location. So you must also specify a scale so we know what level of the
pyramid is needed. So given a rectangle r in an input image, you can
ask, what rectangle in data corresponds to r when things are scale times
smaller? That rectangle is returned by this function.
- A scale of 1 means we don't move anywhere in the pyramid scale space relative
to the input image while smaller values of scale mean we move down the
pyramid.
!*/
drectangle tensor_space_to_image_space (
const tensor& data,
drectangle r
) const;
/*!
requires
- data is a tensor that was produced by this->to_tensor()
ensures
- This function maps from to_tensor()'s output tensor space to its input
image space. Therefore, given that data is a tensor produced by
to_tensor(), tensor_space_to_image_space() allows you to ask for the
rectangle in the input image that corresponds to a rectangle in data.
- It should be noted that this function isn't always an inverse of
image_space_to_tensor_space(). This is because you can ask
image_space_to_tensor_space() for the coordinates of points outside the input
image and they will be mapped to somewhere that doesn't have an inverse.
But for points actually inside the input image this function performs an
approximate inverse mapping. I.e. when image_contained_point(data,center(r))==true
there is an approximate inverse.
!*/
};
// ----------------------------------------------------------------------------------------
template <