From 8001b924e63ac920a9047610ece2e4e6266e8a4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Gal=C3=A1n?= Date: Sat, 25 May 2019 18:18:01 -0300 Subject: [PATCH] Add input_grayscale_image_pyramid, issue #354 (#1761) Add input_grayscale_image_pyramid --- dlib/dnn/input.h | 291 +++++++++++++++++++++++++++----------- dlib/dnn/input_abstract.h | 149 +++++++++++++++++++ 2 files changed, 354 insertions(+), 86 deletions(-) diff --git a/dlib/dnn/input.h b/dlib/dnn/input.h index 700dbd3e1..28167ac2e 100644 --- a/dlib/dnn/input.h +++ b/dlib/dnn/input.h @@ -588,20 +588,216 @@ namespace dlib } }; +// ---------------------------------------------------------------------------------------- + + namespace detail { + template + class input_image_pyramid + { + public: + + virtual ~input_image_pyramid() = 0; + + typedef PYRAMID_TYPE pyramid_type; + + unsigned long get_pyramid_padding() const { return pyramid_padding; } + void set_pyramid_padding(unsigned long value) { pyramid_padding = value; } + + unsigned long get_pyramid_outer_padding() const { return pyramid_outer_padding; } + void set_pyramid_outer_padding(unsigned long value) { pyramid_outer_padding = value; } + + bool image_contained_point( + const tensor& data, + const point& p + ) const + { + auto&& rects = any_cast>(data.annotation()); + DLIB_CASSERT(rects.size() > 0); + return rects[0].contains(p + rects[0].tl_corner()); + } + + drectangle tensor_space_to_image_space( + const tensor& data, + drectangle r + ) const + { + auto&& rects = any_cast>(data.annotation()); + return tiled_pyramid_to_image(rects, r); + } + + drectangle image_space_to_tensor_space ( + const tensor& data, + double scale, + drectangle r + ) const + { + DLIB_CASSERT(0 < scale && scale <= 1, "scale: " << scale); + auto&& rects = any_cast>(data.annotation()); + return image_to_tiled_pyramid(rects, scale, r); + } + + protected: + + template + void to_tensor_init ( + forward_iterator ibegin, + forward_iterator iend, + resizable_tensor &data, + unsigned int k + ) const + { + + DLIB_CASSERT(std::distance(ibegin, iend) > 0); + auto nr = ibegin->nr(); + auto nc = ibegin->nc(); + // make sure all the input matrices have the same dimensions + for (auto i = ibegin; i != iend; ++i) + { + DLIB_CASSERT(i->nr() == nr && i->nc() == nc, + "\t input_grayscale_image_pyramid::to_tensor()" + << "\n\t All matrices given to to_tensor() must have the same dimensions." + << "\n\t nr: " << nr + << "\n\t nc: " << nc + << "\n\t i->nr(): " << i->nr() + << "\n\t i->nc(): " << i->nc() + ); + } + + long NR, NC; + pyramid_type pyr; + auto& rects = data.annotation().get>(); + impl::compute_tiled_image_pyramid_details(pyr, nr, nc, pyramid_padding, pyramid_outer_padding, rects, + NR, NC); + + // initialize data to the right size to contain the stuff in the iterator range. + data.set_size(std::distance(ibegin, iend), k, NR, NC); + + // We need to zero the image before doing the pyramid, since the pyramid + // creation code doesn't write to all parts of the image. We also take + // care to avoid triggering any device to hosts copies. + auto ptr = data.host_write_only(); + for (size_t i = 0; i < data.size(); ++i) + ptr[i] = 0; + + } + + // now build the image pyramid into data. This does the same thing as + // standard create_tiled_pyramid(), except we use the GPU if one is available. + void create_tiled_pyramid ( + const std::vector& rects, + resizable_tensor& data + ) const + { + for (size_t i = 1; i < rects.size(); ++i) { + alias_tensor src(data.num_samples(), data.k(), rects[i - 1].height(), rects[i - 1].width()); + alias_tensor dest(data.num_samples(), data.k(), rects[i].height(), rects[i].width()); + + auto asrc = src(data, data.nc() * rects[i - 1].top() + rects[i - 1].left()); + auto adest = dest(data, data.nc() * rects[i].top() + rects[i].left()); + + tt::resize_bilinear(adest, data.nc(), data.nr() * data.nc(), + asrc, data.nc(), data.nr() * data.nc()); + } + } + + unsigned long pyramid_padding = 10; + unsigned long pyramid_outer_padding = 11; + }; + + template + input_image_pyramid::~input_image_pyramid() {} + } + // ---------------------------------------------------------------------------------------- template - class input_rgb_image_pyramid + class input_grayscale_image_pyramid : public detail::input_image_pyramid + { + public: + typedef matrix input_type; + typedef PYRAMID_TYPE pyramid_type; + + template + void to_tensor ( + forward_iterator ibegin, + forward_iterator iend, + resizable_tensor& data + ) const + { + this->to_tensor_init(ibegin, iend, data, 1); + + const auto rects = data.annotation().get>(); + if (rects.size() == 0) + return; + + // copy the first raw image into the top part of the tiled pyramid. We need to + // do this for each of the input images/samples in the tensor. + auto ptr = data.host_write_only(); + for (auto i = ibegin; i != iend; ++i) + { + auto& img = *i; + ptr += rects[0].top()*data.nc(); + for (long r = 0; r < img.nr(); ++r) + { + auto p = ptr+rects[0].left(); + for (long c = 0; c < img.nc(); ++c) + p[c] = (img(r,c))/256.0; + ptr += data.nc(); + } + ptr += data.nc()*(data.nr()-rects[0].bottom()-1); + } + + this->create_tiled_pyramid(rects, data); + } + + friend void serialize(const input_grayscale_image_pyramid& item, std::ostream& out) + { + serialize("input_grayscale_image_pyramid", out); + serialize(item.pyramid_padding, out); + serialize(item.pyramid_outer_padding, out); + } + + friend void deserialize(input_grayscale_image_pyramid& item, std::istream& in) + { + std::string version; + deserialize(version, in); + if (version != "input_grayscale_image_pyramid") + throw serialization_error("Unexpected version found while deserializing dlib::input_grayscale_image_pyramid."); + deserialize(item.pyramid_padding, in); + deserialize(item.pyramid_outer_padding, in); + } + + friend std::ostream& operator<<(std::ostream& out, const input_grayscale_image_pyramid& item) + { + out << "input_grayscale_image_pyramid()"; + out << " pyramid_padding="<"; + } + }; + +// ---------------------------------------------------------------------------------------- + + template + class input_rgb_image_pyramid : public detail::input_image_pyramid { public: typedef matrix input_type; typedef PYRAMID_TYPE pyramid_type; input_rgb_image_pyramid ( - ) : - avg_red(122.782), + ) : + avg_red(122.782), avg_green(117.001), - avg_blue(104.298) + avg_blue(104.298) { } @@ -609,49 +805,13 @@ namespace dlib float avg_red_, float avg_green_, float avg_blue_ - ) : avg_red(avg_red_), avg_green(avg_green_), avg_blue(avg_blue_) + ) : avg_red(avg_red_), avg_green(avg_green_), avg_blue(avg_blue_) {} float get_avg_red() const { return avg_red; } float get_avg_green() const { return avg_green; } float get_avg_blue() const { return avg_blue; } - unsigned long get_pyramid_padding () const { return pyramid_padding; } - void set_pyramid_padding (unsigned long value) { pyramid_padding = value; } - - unsigned long get_pyramid_outer_padding () const { return pyramid_outer_padding; } - void set_pyramid_outer_padding (unsigned long value) { pyramid_outer_padding = value; } - - bool image_contained_point ( - const tensor& data, - const point& p - ) const - { - auto&& rects = any_cast>(data.annotation()); - DLIB_CASSERT(rects.size() > 0); - return rects[0].contains(p+rects[0].tl_corner()); - } - - drectangle tensor_space_to_image_space ( - const tensor& data, - drectangle r - ) const - { - auto&& rects = any_cast>(data.annotation()); - return tiled_pyramid_to_image(rects, r); - } - - drectangle image_space_to_tensor_space ( - const tensor& data, - double scale, - drectangle r - ) const - { - DLIB_CASSERT(0 < scale && scale <= 1 , "scale: "<< scale); - auto&& rects = any_cast>(data.annotation()); - return image_to_tiled_pyramid(rects, scale, r); - } - template void to_tensor ( forward_iterator ibegin, @@ -659,42 +819,15 @@ namespace dlib resizable_tensor& data ) const { - DLIB_CASSERT(std::distance(ibegin,iend) > 0); - auto nr = ibegin->nr(); - auto nc = ibegin->nc(); - // make sure all the input matrices have the same dimensions - for (auto i = ibegin; i != iend; ++i) - { - DLIB_CASSERT(i->nr()==nr && i->nc()==nc, - "\t input_rgb_image_pyramid::to_tensor()" - << "\n\t All matrices given to to_tensor() must have the same dimensions." - << "\n\t nr: " << nr - << "\n\t nc: " << nc - << "\n\t i->nr(): " << i->nr() - << "\n\t i->nc(): " << i->nc() - ); - } - - long NR, NC; - pyramid_type pyr; - auto& rects = data.annotation().get>(); - impl::compute_tiled_image_pyramid_details(pyr, nr, nc, pyramid_padding, pyramid_outer_padding, rects, NR, NC); - - // initialize data to the right size to contain the stuff in the iterator range. - data.set_size(std::distance(ibegin,iend), 3, NR, NC); - - // We need to zero the image before doing the pyramid, since the pyramid - // creation code doesn't write to all parts of the image. We also take - // care to avoid triggering any device to hosts copies. - auto ptr = data.host_write_only(); - for (size_t i = 0; i < data.size(); ++i) - ptr[i] = 0; + this->to_tensor_init(ibegin, iend, data, 3); + const auto rects = data.annotation().get>(); if (rects.size() == 0) return; // copy the first raw image into the top part of the tiled pyramid. We need to // do this for each of the input images/samples in the tensor. + auto ptr = data.host_write_only(); for (auto i = ibegin; i != iend; ++i) { auto& img = *i; @@ -729,19 +862,7 @@ namespace dlib ptr += data.nc()*(data.nr()-rects[0].bottom()-1); } - // now build the image pyramid into data. This does the same thing as - // create_tiled_pyramid(), except we use the GPU if one is available. - for (size_t i = 1; i < rects.size(); ++i) - { - alias_tensor src(data.num_samples(),data.k(),rects[i-1].height(),rects[i-1].width()); - alias_tensor dest(data.num_samples(),data.k(),rects[i].height(),rects[i].width()); - - auto asrc = src(data, data.nc()*rects[i-1].top() + rects[i-1].left()); - auto adest = dest(data, data.nc()*rects[i].top() + rects[i].left()); - - tt::resize_bilinear(adest, data.nc(), data.nr()*data.nc(), - asrc, data.nc(), data.nr()*data.nc()); - } + this->create_tiled_pyramid(rects, data); } friend void serialize(const input_rgb_image_pyramid& item, std::ostream& out) @@ -796,8 +917,6 @@ namespace dlib float avg_red; float avg_green; float avg_blue; - unsigned long pyramid_padding = 10; - unsigned long pyramid_outer_padding = 11; }; // ---------------------------------------------------------------------------------------- diff --git a/dlib/dnn/input_abstract.h b/dlib/dnn/input_abstract.h index 7130efb17..24fe6de6e 100644 --- a/dlib/dnn/input_abstract.h +++ b/dlib/dnn/input_abstract.h @@ -271,6 +271,155 @@ namespace dlib }; +// ---------------------------------------------------------------------------------------- + + template < + typename PYRAMID_TYPE + > + class input_grayscale_image_pyramid + { + /*! + REQUIREMENTS ON PYRAMID_TYPE + PYRAMID_TYPE must be an instance of the dlib::pyramid_down template. + + WHAT THIS OBJECT REPRESENTS + This input layer works with gray scale images of type matrix. + It is identical to input layer except that it outputs a tensor containing a tiled + image pyramid of each input image rather than a simple copy of each image. + The tiled image pyramid is created using create_tiled_pyramid(). + !*/ + + public: + + typedef matrix input_type; + typedef PYRAMID_TYPE pyramid_type; + input_grayscale_image_pyramid ( + ); + /*! + ensures + - #get_pyramid_padding() == 10 + - #get_pyramid_outer_padding() == 11 + !*/ + + unsigned long get_pyramid_padding ( + ) const; + /*! + ensures + - When this object creates a pyramid it will call create_tiled_pyramid() and + set create_tiled_pyramid's pyramid_padding parameter to get_pyramid_padding(). + !*/ + + void set_pyramid_padding ( + unsigned long value + ); + /*! + ensures + - #get_pyramid_padding() == value + !*/ + + unsigned long get_pyramid_outer_padding ( + ) const; + /*! + ensures + - When this object creates a pyramid it will call create_tiled_pyramid() + and set create_tiled_pyramid's pyramid_outer_padding parameter to + get_pyramid_outer_padding(). + !*/ + + void set_pyramid_outer_padding ( + unsigned long value + ); + /*! + ensures + - #get_pyramid_outer_padding() == value + !*/ + + template + void to_tensor ( + forward_iterator ibegin, + forward_iterator iend, + resizable_tensor& data + ) const; + /*! + requires + - [ibegin, iend) is an iterator range over input_type objects. + - std::distance(ibegin,iend) > 0 + - The input range should contain images that all have the same + dimensions. + ensures + - Converts the iterator range into a tensor and stores it into #data. In + particular, we will have: + - #data.num_samples() == std::distance(ibegin,iend) + - #data.k() == 1 + - Each sample in #data contains a tiled image pyramid of the + corresponding input image. The tiled pyramid is created by + create_tiled_pyramid(). + Moreover, each pixel is normalized, dividing them by 256.0. + !*/ + + bool image_contained_point ( + const tensor& data, + const point& p + ) const; + /*! + requires + - data is a tensor that was produced by this->to_tensor() + ensures + - Since data is a tensor that is built from a bunch of identically sized + images, we can ask if those images were big enough to contain the point + p. This function returns the answer to that question. + !*/ + + drectangle image_space_to_tensor_space ( + const tensor& data, + double scale, + drectangle r + ) const; + /*! + requires + - data is a tensor that was produced by this->to_tensor() + - 0 < scale <= 1 + ensures + - This function maps from to_tensor()'s input image space to its output + tensor space. Therefore, given that data is a tensor produced by + to_tensor(), image_space_to_tensor_space() allows you to ask for the + rectangle in data that corresponds to a rectangle in the original image + space. + + Note that since the output tensor contains an image pyramid, there are + multiple points in the output tensor that correspond to any input + location. So you must also specify a scale so we know what level of the + pyramid is needed. So given a rectangle r in an input image, you can + ask, what rectangle in data corresponds to r when things are scale times + smaller? That rectangle is returned by this function. + - A scale of 1 means we don't move anywhere in the pyramid scale space relative + to the input image while smaller values of scale mean we move down the + pyramid. + !*/ + + drectangle tensor_space_to_image_space ( + const tensor& data, + drectangle r + ) const; + /*! + requires + - data is a tensor that was produced by this->to_tensor() + ensures + - This function maps from to_tensor()'s output tensor space to its input + image space. Therefore, given that data is a tensor produced by + to_tensor(), tensor_space_to_image_space() allows you to ask for the + rectangle in the input image that corresponds to a rectangle in data. + - It should be noted that this function isn't always an inverse of + image_space_to_tensor_space(). This is because you can ask + image_space_to_tensor_space() for the coordinates of points outside the input + image and they will be mapped to somewhere that doesn't have an inverse. + But for points actually inside the input image this function performs an + approximate inverse mapping. I.e. when image_contained_point(data,center(r))==true + there is an approximate inverse. + !*/ + + }; + // ---------------------------------------------------------------------------------------- template <