mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Add input_grayscale_image_pyramid
This commit is contained in:
parent
0ecb49b94e
commit
8001b924e6
283
dlib/dnn/input.h
283
dlib/dnn/input.h
@ -590,8 +590,204 @@ namespace dlib
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace detail {
|
||||
template <typename PYRAMID_TYPE>
|
||||
class input_rgb_image_pyramid
|
||||
class input_image_pyramid
|
||||
{
|
||||
public:
|
||||
|
||||
virtual ~input_image_pyramid() = 0;
|
||||
|
||||
typedef PYRAMID_TYPE pyramid_type;
|
||||
|
||||
unsigned long get_pyramid_padding() const { return pyramid_padding; }
|
||||
void set_pyramid_padding(unsigned long value) { pyramid_padding = value; }
|
||||
|
||||
unsigned long get_pyramid_outer_padding() const { return pyramid_outer_padding; }
|
||||
void set_pyramid_outer_padding(unsigned long value) { pyramid_outer_padding = value; }
|
||||
|
||||
bool image_contained_point(
|
||||
const tensor& data,
|
||||
const point& p
|
||||
) const
|
||||
{
|
||||
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
|
||||
DLIB_CASSERT(rects.size() > 0);
|
||||
return rects[0].contains(p + rects[0].tl_corner());
|
||||
}
|
||||
|
||||
drectangle tensor_space_to_image_space(
|
||||
const tensor& data,
|
||||
drectangle r
|
||||
) const
|
||||
{
|
||||
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
|
||||
return tiled_pyramid_to_image<pyramid_type>(rects, r);
|
||||
}
|
||||
|
||||
drectangle image_space_to_tensor_space (
|
||||
const tensor& data,
|
||||
double scale,
|
||||
drectangle r
|
||||
) const
|
||||
{
|
||||
DLIB_CASSERT(0 < scale && scale <= 1, "scale: " << scale);
|
||||
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
|
||||
return image_to_tiled_pyramid<pyramid_type>(rects, scale, r);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
template <typename forward_iterator>
|
||||
void to_tensor_init (
|
||||
forward_iterator ibegin,
|
||||
forward_iterator iend,
|
||||
resizable_tensor &data,
|
||||
unsigned int k
|
||||
) const
|
||||
{
|
||||
|
||||
DLIB_CASSERT(std::distance(ibegin, iend) > 0);
|
||||
auto nr = ibegin->nr();
|
||||
auto nc = ibegin->nc();
|
||||
// make sure all the input matrices have the same dimensions
|
||||
for (auto i = ibegin; i != iend; ++i)
|
||||
{
|
||||
DLIB_CASSERT(i->nr() == nr && i->nc() == nc,
|
||||
"\t input_grayscale_image_pyramid::to_tensor()"
|
||||
<< "\n\t All matrices given to to_tensor() must have the same dimensions."
|
||||
<< "\n\t nr: " << nr
|
||||
<< "\n\t nc: " << nc
|
||||
<< "\n\t i->nr(): " << i->nr()
|
||||
<< "\n\t i->nc(): " << i->nc()
|
||||
);
|
||||
}
|
||||
|
||||
long NR, NC;
|
||||
pyramid_type pyr;
|
||||
auto& rects = data.annotation().get<std::vector<rectangle>>();
|
||||
impl::compute_tiled_image_pyramid_details(pyr, nr, nc, pyramid_padding, pyramid_outer_padding, rects,
|
||||
NR, NC);
|
||||
|
||||
// initialize data to the right size to contain the stuff in the iterator range.
|
||||
data.set_size(std::distance(ibegin, iend), k, NR, NC);
|
||||
|
||||
// We need to zero the image before doing the pyramid, since the pyramid
|
||||
// creation code doesn't write to all parts of the image. We also take
|
||||
// care to avoid triggering any device to hosts copies.
|
||||
auto ptr = data.host_write_only();
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
ptr[i] = 0;
|
||||
|
||||
}
|
||||
|
||||
// now build the image pyramid into data. This does the same thing as
|
||||
// standard create_tiled_pyramid(), except we use the GPU if one is available.
|
||||
void create_tiled_pyramid (
|
||||
const std::vector<rectangle>& rects,
|
||||
resizable_tensor& data
|
||||
) const
|
||||
{
|
||||
for (size_t i = 1; i < rects.size(); ++i) {
|
||||
alias_tensor src(data.num_samples(), data.k(), rects[i - 1].height(), rects[i - 1].width());
|
||||
alias_tensor dest(data.num_samples(), data.k(), rects[i].height(), rects[i].width());
|
||||
|
||||
auto asrc = src(data, data.nc() * rects[i - 1].top() + rects[i - 1].left());
|
||||
auto adest = dest(data, data.nc() * rects[i].top() + rects[i].left());
|
||||
|
||||
tt::resize_bilinear(adest, data.nc(), data.nr() * data.nc(),
|
||||
asrc, data.nc(), data.nr() * data.nc());
|
||||
}
|
||||
}
|
||||
|
||||
unsigned long pyramid_padding = 10;
|
||||
unsigned long pyramid_outer_padding = 11;
|
||||
};
|
||||
|
||||
template <typename PYRAMID_TYPE>
|
||||
input_image_pyramid<PYRAMID_TYPE>::~input_image_pyramid() {}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename PYRAMID_TYPE>
|
||||
class input_grayscale_image_pyramid : public detail::input_image_pyramid<PYRAMID_TYPE>
|
||||
{
|
||||
public:
|
||||
typedef matrix<unsigned char> input_type;
|
||||
typedef PYRAMID_TYPE pyramid_type;
|
||||
|
||||
template <typename forward_iterator>
|
||||
void to_tensor (
|
||||
forward_iterator ibegin,
|
||||
forward_iterator iend,
|
||||
resizable_tensor& data
|
||||
) const
|
||||
{
|
||||
this->to_tensor_init(ibegin, iend, data, 1);
|
||||
|
||||
const auto rects = data.annotation().get<std::vector<rectangle>>();
|
||||
if (rects.size() == 0)
|
||||
return;
|
||||
|
||||
// copy the first raw image into the top part of the tiled pyramid. We need to
|
||||
// do this for each of the input images/samples in the tensor.
|
||||
auto ptr = data.host_write_only();
|
||||
for (auto i = ibegin; i != iend; ++i)
|
||||
{
|
||||
auto& img = *i;
|
||||
ptr += rects[0].top()*data.nc();
|
||||
for (long r = 0; r < img.nr(); ++r)
|
||||
{
|
||||
auto p = ptr+rects[0].left();
|
||||
for (long c = 0; c < img.nc(); ++c)
|
||||
p[c] = (img(r,c))/256.0;
|
||||
ptr += data.nc();
|
||||
}
|
||||
ptr += data.nc()*(data.nr()-rects[0].bottom()-1);
|
||||
}
|
||||
|
||||
this->create_tiled_pyramid(rects, data);
|
||||
}
|
||||
|
||||
friend void serialize(const input_grayscale_image_pyramid& item, std::ostream& out)
|
||||
{
|
||||
serialize("input_grayscale_image_pyramid", out);
|
||||
serialize(item.pyramid_padding, out);
|
||||
serialize(item.pyramid_outer_padding, out);
|
||||
}
|
||||
|
||||
friend void deserialize(input_grayscale_image_pyramid& item, std::istream& in)
|
||||
{
|
||||
std::string version;
|
||||
deserialize(version, in);
|
||||
if (version != "input_grayscale_image_pyramid")
|
||||
throw serialization_error("Unexpected version found while deserializing dlib::input_grayscale_image_pyramid.");
|
||||
deserialize(item.pyramid_padding, in);
|
||||
deserialize(item.pyramid_outer_padding, in);
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& out, const input_grayscale_image_pyramid& item)
|
||||
{
|
||||
out << "input_grayscale_image_pyramid()";
|
||||
out << " pyramid_padding="<<item.pyramid_padding;
|
||||
out << " pyramid_outer_padding="<<item.pyramid_outer_padding;
|
||||
return out;
|
||||
}
|
||||
|
||||
friend void to_xml(const input_grayscale_image_pyramid& item, std::ostream& out)
|
||||
{
|
||||
out << "<input_grayscale_image_pyramid"
|
||||
<<"' pyramid_padding='"<<item.pyramid_padding
|
||||
<<"' pyramid_outer_padding='"<<item.pyramid_outer_padding
|
||||
<<"'/>";
|
||||
}
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <typename PYRAMID_TYPE>
|
||||
class input_rgb_image_pyramid : public detail::input_image_pyramid<PYRAMID_TYPE>
|
||||
{
|
||||
public:
|
||||
typedef matrix<rgb_pixel> input_type;
|
||||
@ -616,42 +812,6 @@ namespace dlib
|
||||
float get_avg_green() const { return avg_green; }
|
||||
float get_avg_blue() const { return avg_blue; }
|
||||
|
||||
unsigned long get_pyramid_padding () const { return pyramid_padding; }
|
||||
void set_pyramid_padding (unsigned long value) { pyramid_padding = value; }
|
||||
|
||||
unsigned long get_pyramid_outer_padding () const { return pyramid_outer_padding; }
|
||||
void set_pyramid_outer_padding (unsigned long value) { pyramid_outer_padding = value; }
|
||||
|
||||
bool image_contained_point (
|
||||
const tensor& data,
|
||||
const point& p
|
||||
) const
|
||||
{
|
||||
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
|
||||
DLIB_CASSERT(rects.size() > 0);
|
||||
return rects[0].contains(p+rects[0].tl_corner());
|
||||
}
|
||||
|
||||
drectangle tensor_space_to_image_space (
|
||||
const tensor& data,
|
||||
drectangle r
|
||||
) const
|
||||
{
|
||||
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
|
||||
return tiled_pyramid_to_image<pyramid_type>(rects, r);
|
||||
}
|
||||
|
||||
drectangle image_space_to_tensor_space (
|
||||
const tensor& data,
|
||||
double scale,
|
||||
drectangle r
|
||||
) const
|
||||
{
|
||||
DLIB_CASSERT(0 < scale && scale <= 1 , "scale: "<< scale);
|
||||
auto&& rects = any_cast<std::vector<rectangle>>(data.annotation());
|
||||
return image_to_tiled_pyramid<pyramid_type>(rects, scale, r);
|
||||
}
|
||||
|
||||
template <typename forward_iterator>
|
||||
void to_tensor (
|
||||
forward_iterator ibegin,
|
||||
@ -659,42 +819,15 @@ namespace dlib
|
||||
resizable_tensor& data
|
||||
) const
|
||||
{
|
||||
DLIB_CASSERT(std::distance(ibegin,iend) > 0);
|
||||
auto nr = ibegin->nr();
|
||||
auto nc = ibegin->nc();
|
||||
// make sure all the input matrices have the same dimensions
|
||||
for (auto i = ibegin; i != iend; ++i)
|
||||
{
|
||||
DLIB_CASSERT(i->nr()==nr && i->nc()==nc,
|
||||
"\t input_rgb_image_pyramid::to_tensor()"
|
||||
<< "\n\t All matrices given to to_tensor() must have the same dimensions."
|
||||
<< "\n\t nr: " << nr
|
||||
<< "\n\t nc: " << nc
|
||||
<< "\n\t i->nr(): " << i->nr()
|
||||
<< "\n\t i->nc(): " << i->nc()
|
||||
);
|
||||
}
|
||||
|
||||
long NR, NC;
|
||||
pyramid_type pyr;
|
||||
auto& rects = data.annotation().get<std::vector<rectangle>>();
|
||||
impl::compute_tiled_image_pyramid_details(pyr, nr, nc, pyramid_padding, pyramid_outer_padding, rects, NR, NC);
|
||||
|
||||
// initialize data to the right size to contain the stuff in the iterator range.
|
||||
data.set_size(std::distance(ibegin,iend), 3, NR, NC);
|
||||
|
||||
// We need to zero the image before doing the pyramid, since the pyramid
|
||||
// creation code doesn't write to all parts of the image. We also take
|
||||
// care to avoid triggering any device to hosts copies.
|
||||
auto ptr = data.host_write_only();
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
ptr[i] = 0;
|
||||
this->to_tensor_init(ibegin, iend, data, 3);
|
||||
|
||||
const auto rects = data.annotation().get<std::vector<rectangle>>();
|
||||
if (rects.size() == 0)
|
||||
return;
|
||||
|
||||
// copy the first raw image into the top part of the tiled pyramid. We need to
|
||||
// do this for each of the input images/samples in the tensor.
|
||||
auto ptr = data.host_write_only();
|
||||
for (auto i = ibegin; i != iend; ++i)
|
||||
{
|
||||
auto& img = *i;
|
||||
@ -729,19 +862,7 @@ namespace dlib
|
||||
ptr += data.nc()*(data.nr()-rects[0].bottom()-1);
|
||||
}
|
||||
|
||||
// now build the image pyramid into data. This does the same thing as
|
||||
// create_tiled_pyramid(), except we use the GPU if one is available.
|
||||
for (size_t i = 1; i < rects.size(); ++i)
|
||||
{
|
||||
alias_tensor src(data.num_samples(),data.k(),rects[i-1].height(),rects[i-1].width());
|
||||
alias_tensor dest(data.num_samples(),data.k(),rects[i].height(),rects[i].width());
|
||||
|
||||
auto asrc = src(data, data.nc()*rects[i-1].top() + rects[i-1].left());
|
||||
auto adest = dest(data, data.nc()*rects[i].top() + rects[i].left());
|
||||
|
||||
tt::resize_bilinear(adest, data.nc(), data.nr()*data.nc(),
|
||||
asrc, data.nc(), data.nr()*data.nc());
|
||||
}
|
||||
this->create_tiled_pyramid(rects, data);
|
||||
}
|
||||
|
||||
friend void serialize(const input_rgb_image_pyramid& item, std::ostream& out)
|
||||
@ -796,8 +917,6 @@ namespace dlib
|
||||
float avg_red;
|
||||
float avg_green;
|
||||
float avg_blue;
|
||||
unsigned long pyramid_padding = 10;
|
||||
unsigned long pyramid_outer_padding = 11;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
@ -271,6 +271,155 @@ namespace dlib
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename PYRAMID_TYPE
|
||||
>
|
||||
class input_grayscale_image_pyramid
|
||||
{
|
||||
/*!
|
||||
REQUIREMENTS ON PYRAMID_TYPE
|
||||
PYRAMID_TYPE must be an instance of the dlib::pyramid_down template.
|
||||
|
||||
WHAT THIS OBJECT REPRESENTS
|
||||
This input layer works with gray scale images of type matrix<unsigned char>.
|
||||
It is identical to input layer except that it outputs a tensor containing a tiled
|
||||
image pyramid of each input image rather than a simple copy of each image.
|
||||
The tiled image pyramid is created using create_tiled_pyramid().
|
||||
!*/
|
||||
|
||||
public:
|
||||
|
||||
typedef matrix<unsigned char> input_type;
|
||||
typedef PYRAMID_TYPE pyramid_type;
|
||||
input_grayscale_image_pyramid (
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_pyramid_padding() == 10
|
||||
- #get_pyramid_outer_padding() == 11
|
||||
!*/
|
||||
|
||||
unsigned long get_pyramid_padding (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- When this object creates a pyramid it will call create_tiled_pyramid() and
|
||||
set create_tiled_pyramid's pyramid_padding parameter to get_pyramid_padding().
|
||||
!*/
|
||||
|
||||
void set_pyramid_padding (
|
||||
unsigned long value
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_pyramid_padding() == value
|
||||
!*/
|
||||
|
||||
unsigned long get_pyramid_outer_padding (
|
||||
) const;
|
||||
/*!
|
||||
ensures
|
||||
- When this object creates a pyramid it will call create_tiled_pyramid()
|
||||
and set create_tiled_pyramid's pyramid_outer_padding parameter to
|
||||
get_pyramid_outer_padding().
|
||||
!*/
|
||||
|
||||
void set_pyramid_outer_padding (
|
||||
unsigned long value
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- #get_pyramid_outer_padding() == value
|
||||
!*/
|
||||
|
||||
template <typename forward_iterator>
|
||||
void to_tensor (
|
||||
forward_iterator ibegin,
|
||||
forward_iterator iend,
|
||||
resizable_tensor& data
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- [ibegin, iend) is an iterator range over input_type objects.
|
||||
- std::distance(ibegin,iend) > 0
|
||||
- The input range should contain images that all have the same
|
||||
dimensions.
|
||||
ensures
|
||||
- Converts the iterator range into a tensor and stores it into #data. In
|
||||
particular, we will have:
|
||||
- #data.num_samples() == std::distance(ibegin,iend)
|
||||
- #data.k() == 1
|
||||
- Each sample in #data contains a tiled image pyramid of the
|
||||
corresponding input image. The tiled pyramid is created by
|
||||
create_tiled_pyramid().
|
||||
Moreover, each pixel is normalized, dividing them by 256.0.
|
||||
!*/
|
||||
|
||||
bool image_contained_point (
|
||||
const tensor& data,
|
||||
const point& p
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- data is a tensor that was produced by this->to_tensor()
|
||||
ensures
|
||||
- Since data is a tensor that is built from a bunch of identically sized
|
||||
images, we can ask if those images were big enough to contain the point
|
||||
p. This function returns the answer to that question.
|
||||
!*/
|
||||
|
||||
drectangle image_space_to_tensor_space (
|
||||
const tensor& data,
|
||||
double scale,
|
||||
drectangle r
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- data is a tensor that was produced by this->to_tensor()
|
||||
- 0 < scale <= 1
|
||||
ensures
|
||||
- This function maps from to_tensor()'s input image space to its output
|
||||
tensor space. Therefore, given that data is a tensor produced by
|
||||
to_tensor(), image_space_to_tensor_space() allows you to ask for the
|
||||
rectangle in data that corresponds to a rectangle in the original image
|
||||
space.
|
||||
|
||||
Note that since the output tensor contains an image pyramid, there are
|
||||
multiple points in the output tensor that correspond to any input
|
||||
location. So you must also specify a scale so we know what level of the
|
||||
pyramid is needed. So given a rectangle r in an input image, you can
|
||||
ask, what rectangle in data corresponds to r when things are scale times
|
||||
smaller? That rectangle is returned by this function.
|
||||
- A scale of 1 means we don't move anywhere in the pyramid scale space relative
|
||||
to the input image while smaller values of scale mean we move down the
|
||||
pyramid.
|
||||
!*/
|
||||
|
||||
drectangle tensor_space_to_image_space (
|
||||
const tensor& data,
|
||||
drectangle r
|
||||
) const;
|
||||
/*!
|
||||
requires
|
||||
- data is a tensor that was produced by this->to_tensor()
|
||||
ensures
|
||||
- This function maps from to_tensor()'s output tensor space to its input
|
||||
image space. Therefore, given that data is a tensor produced by
|
||||
to_tensor(), tensor_space_to_image_space() allows you to ask for the
|
||||
rectangle in the input image that corresponds to a rectangle in data.
|
||||
- It should be noted that this function isn't always an inverse of
|
||||
image_space_to_tensor_space(). This is because you can ask
|
||||
image_space_to_tensor_space() for the coordinates of points outside the input
|
||||
image and they will be mapped to somewhere that doesn't have an inverse.
|
||||
But for points actually inside the input image this function performs an
|
||||
approximate inverse mapping. I.e. when image_contained_point(data,center(r))==true
|
||||
there is an approximate inverse.
|
||||
!*/
|
||||
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
|
Loading…
Reference in New Issue
Block a user