mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
extend letterbox behavior (#2899)
* extend letterbox behavior * simplify scale logic and update docs * oops, forgot one line in the yolo example * make dpoint const
This commit is contained in:
parent
b20e97446b
commit
19a952c3a4
@ -977,28 +977,29 @@ namespace dlib
|
|||||||
point_transform_affine letterbox_image (
|
point_transform_affine letterbox_image (
|
||||||
const image_type1& img_in,
|
const image_type1& img_in,
|
||||||
image_type2& img_out,
|
image_type2& img_out,
|
||||||
long size,
|
|
||||||
const interpolation_type& interp
|
const interpolation_type& interp
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
DLIB_CASSERT(size > 0, "size must be bigger than zero, but was " << size);
|
|
||||||
const_image_view<image_type1> vimg_in(img_in);
|
const_image_view<image_type1> vimg_in(img_in);
|
||||||
image_view<image_type2> vimg_out(img_out);
|
image_view<image_type2> vimg_out(img_out);
|
||||||
|
const long rows = vimg_out.nr();
|
||||||
const auto scale = size / std::max<double>(vimg_in.nr(), vimg_in.nc());
|
const long cols = vimg_out.nc();
|
||||||
|
DLIB_CASSERT(vimg_out.size() > 0, "img_out size must be bigger than zero, but was " << rows << "x" << cols);
|
||||||
|
|
||||||
// early return if the image has already the requested size and no padding is needed
|
// early return if the image has already the requested size and no padding is needed
|
||||||
if (scale == 1 && vimg_in.nr() == vimg_in.nc())
|
if (have_same_dimensions(vimg_in, vimg_out))
|
||||||
{
|
{
|
||||||
assign_image(vimg_out, vimg_in);
|
assign_image(vimg_out, vimg_in);
|
||||||
return point_transform_affine();
|
return point_transform_affine();
|
||||||
}
|
}
|
||||||
|
|
||||||
vimg_out.set_size(size, size);
|
const double rows_scale = rows / static_cast<double>(vimg_in.nr());
|
||||||
|
const double cols_scale = cols / static_cast<double>(vimg_in.nc());
|
||||||
|
const double scale = rows_scale * vimg_in.nc() > rows ? cols_scale : rows_scale;
|
||||||
|
|
||||||
const long nr = std::round(scale * vimg_in.nr());
|
const long nr = std::lround(scale * vimg_in.nr());
|
||||||
const long nc = std::round(scale * vimg_in.nc());
|
const long nc = std::lround(scale * vimg_in.nc());
|
||||||
dpoint offset((size - nc) / 2.0, (size - nr) / 2.0);
|
const dpoint offset((cols - nc) / 2.0, (rows - nr) / 2.0);
|
||||||
const auto r = rectangle(offset.x(), offset.y(), offset.x() + nc - 1, offset.y() + nr - 1);
|
const auto r = rectangle(offset.x(), offset.y(), offset.x() + nc - 1, offset.y() + nr - 1);
|
||||||
zero_border_pixels(vimg_out, r);
|
zero_border_pixels(vimg_out, r);
|
||||||
auto si = sub_image(img_out, r);
|
auto si = sub_image(img_out, r);
|
||||||
@ -1006,19 +1007,6 @@ namespace dlib
|
|||||||
return point_transform_affine(identity_matrix<double>(2) * scale, offset);
|
return point_transform_affine(identity_matrix<double>(2) * scale, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <
|
|
||||||
typename image_type1,
|
|
||||||
typename image_type2
|
|
||||||
>
|
|
||||||
point_transform_affine letterbox_image (
|
|
||||||
const image_type1& img_in,
|
|
||||||
image_type2& img_out,
|
|
||||||
long size
|
|
||||||
)
|
|
||||||
{
|
|
||||||
return letterbox_image(img_in, img_out, size, interpolate_bilinear());
|
|
||||||
}
|
|
||||||
|
|
||||||
template <
|
template <
|
||||||
typename image_type1,
|
typename image_type1,
|
||||||
typename image_type2
|
typename image_type2
|
||||||
@ -1028,7 +1016,7 @@ namespace dlib
|
|||||||
image_type2& img_out
|
image_type2& img_out
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
return letterbox_image(img_in, img_out, std::max(num_rows(img_in), num_columns(img_in)), interpolate_bilinear());
|
return letterbox_image(img_in, img_out, interpolate_bilinear());
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------------------
|
||||||
|
@ -446,7 +446,6 @@ namespace dlib
|
|||||||
point_transform_affine letterbox_image (
|
point_transform_affine letterbox_image (
|
||||||
const image_type1& img_in,
|
const image_type1& img_in,
|
||||||
image_type2& img_out,
|
image_type2& img_out,
|
||||||
long size
|
|
||||||
const interpolation_type interp
|
const interpolation_type interp
|
||||||
);
|
);
|
||||||
/*!
|
/*!
|
||||||
@ -455,15 +454,12 @@ namespace dlib
|
|||||||
dlib/image_processing/generic_image.h
|
dlib/image_processing/generic_image.h
|
||||||
- image_type2 == an image object that implements the interface defined in
|
- image_type2 == an image object that implements the interface defined in
|
||||||
dlib/image_processing/generic_image.h
|
dlib/image_processing/generic_image.h
|
||||||
|
- img_out.size() > 0
|
||||||
- interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear,
|
- interpolation_type == interpolate_nearest_neighbor, interpolate_bilinear,
|
||||||
interpolate_quadratic, or a type with a compatible interface.
|
interpolate_quadratic, or a type with a compatible interface.
|
||||||
- size > 0
|
|
||||||
- is_same_object(in_img, out_img) == false
|
- is_same_object(in_img, out_img) == false
|
||||||
ensures
|
ensures
|
||||||
- Scales in_img so that it fits into a size * size square.
|
- Scales in_img so that it fits into img_out.
|
||||||
In particular, we will have:
|
|
||||||
- #img_out.nr() == size
|
|
||||||
- #img_out.nc() == size
|
|
||||||
- Preserves the aspect ratio of in_img by 0-padding the shortest side.
|
- Preserves the aspect ratio of in_img by 0-padding the shortest side.
|
||||||
- Uses the supplied interpolation routine interp to perform the necessary
|
- Uses the supplied interpolation routine interp to perform the necessary
|
||||||
pixel interpolation.
|
pixel interpolation.
|
||||||
@ -471,35 +467,6 @@ namespace dlib
|
|||||||
corresponding location in #out_img.
|
corresponding location in #out_img.
|
||||||
!*/
|
!*/
|
||||||
|
|
||||||
template <
|
|
||||||
typename image_type1,
|
|
||||||
typename image_type2
|
|
||||||
>
|
|
||||||
point_transform_affine letterbox_image (
|
|
||||||
const image_type1& img_in,
|
|
||||||
image_type2& img_out,
|
|
||||||
long size
|
|
||||||
);
|
|
||||||
/*!
|
|
||||||
requires
|
|
||||||
- image_type1 == an image object that implements the interface defined in
|
|
||||||
dlib/image_processing/generic_image.h
|
|
||||||
- image_type2 == an image object that implements the interface defined in
|
|
||||||
dlib/image_processing/generic_image.h
|
|
||||||
- size > 0
|
|
||||||
- is_same_object(in_img, out_img) == false
|
|
||||||
ensures
|
|
||||||
- Scales in_img so that it fits into a size * size square.
|
|
||||||
In particular, we will have:
|
|
||||||
- #img_out.nr() == size
|
|
||||||
- #img_out.nc() == size
|
|
||||||
- Preserves the aspect ratio of in_img by 0-padding the shortest side.
|
|
||||||
- Uses the bilinear interpolation to perform the necessary pixel
|
|
||||||
interpolation.
|
|
||||||
- Returns a transformation object that maps points in in_img into their
|
|
||||||
corresponding location in #out_img.
|
|
||||||
!*/
|
|
||||||
|
|
||||||
template <
|
template <
|
||||||
typename image_type1,
|
typename image_type1,
|
||||||
typename image_type2
|
typename image_type2
|
||||||
@ -514,13 +481,11 @@ namespace dlib
|
|||||||
dlib/image_processing/generic_image.h
|
dlib/image_processing/generic_image.h
|
||||||
- image_type2 == an image object that implements the interface defined in
|
- image_type2 == an image object that implements the interface defined in
|
||||||
dlib/image_processing/generic_image.h
|
dlib/image_processing/generic_image.h
|
||||||
|
- img_out.size() > 0
|
||||||
- is_same_object(in_img, out_img) == false
|
- is_same_object(in_img, out_img) == false
|
||||||
ensures
|
ensures
|
||||||
- 0-pads in_img so that it fits into a square whose side is computed as
|
- Scales in_img so that it fits into img_out using bilinear interpolation.
|
||||||
max(num_rows(in_img), num_columns(in_img)) and stores into #out_img.
|
- Preserves the aspect ratio of in_img by 0-padding the shortest side.
|
||||||
In particular, we will have:
|
|
||||||
- #img_out.nr() == max(num_rows(in_img), num_columns(in_img))
|
|
||||||
- #img_out.nc() == max(num_rows(in_img), num_columns(in_img))
|
|
||||||
- Returns a transformation object that maps points in in_img into their
|
- Returns a transformation object that maps points in in_img into their
|
||||||
corresponding location in #out_img.
|
corresponding location in #out_img.
|
||||||
!*/
|
!*/
|
||||||
|
@ -2438,9 +2438,9 @@ namespace
|
|||||||
rgb_pixel black(0, 0, 0);
|
rgb_pixel black(0, 0, 0);
|
||||||
rgb_pixel white(255, 255, 255);
|
rgb_pixel white(255, 255, 255);
|
||||||
matrix<rgb_pixel> img_s(40, 60);
|
matrix<rgb_pixel> img_s(40, 60);
|
||||||
matrix<rgb_pixel> img_d;
|
matrix<rgb_pixel> img_d(30, 30);
|
||||||
assign_all_pixels(img_s, white);
|
assign_all_pixels(img_s, white);
|
||||||
const auto tform = letterbox_image(img_s, img_d, 30, interpolate_nearest_neighbor());
|
const auto tform = letterbox_image(img_s, img_d, interpolate_nearest_neighbor());
|
||||||
DLIB_TEST(tform.get_m() == identity_matrix<double>(2) * 0.5);
|
DLIB_TEST(tform.get_m() == identity_matrix<double>(2) * 0.5);
|
||||||
DLIB_TEST(tform.get_b() == dpoint(0, 5));
|
DLIB_TEST(tform.get_b() == dpoint(0, 5));
|
||||||
|
|
||||||
|
@ -2008,7 +2008,7 @@
|
|||||||
<file>dlib/image_transforms.h</file>
|
<file>dlib/image_transforms.h</file>
|
||||||
<spec_file link="true">dlib/image_transforms/interpolation_abstract.h</spec_file>
|
<spec_file link="true">dlib/image_transforms/interpolation_abstract.h</spec_file>
|
||||||
<description>
|
<description>
|
||||||
Scales an image so that it fits into a size * size square, while preserving the aspect
|
Scales an image so that it fits into another size, while preserving the aspect
|
||||||
ratio of the actual contents by appropriate 0 padding.
|
ratio of the actual contents by appropriate 0 padding.
|
||||||
|
|
||||||
<examples>
|
<examples>
|
||||||
|
@ -131,9 +131,9 @@ namespace darknet
|
|||||||
}
|
}
|
||||||
|
|
||||||
// In this example, YOLO expects square images, and we choose to transform them by letterboxing them.
|
// In this example, YOLO expects square images, and we choose to transform them by letterboxing them.
|
||||||
rectangle_transform preprocess_image(const matrix<rgb_pixel>& image, matrix<rgb_pixel>& output, const long image_size)
|
rectangle_transform preprocess_image(const matrix<rgb_pixel>& image, matrix<rgb_pixel>& output)
|
||||||
{
|
{
|
||||||
return rectangle_transform(inv(letterbox_image(image, output, image_size)));
|
return rectangle_transform(inv(letterbox_image(image, output)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// YOLO outputs the bounding boxes in the coordinate system of the input (letterboxed) image, so we need to convert them
|
// YOLO outputs the bounding boxes in the coordinate system of the input (letterboxed) image, so we need to convert them
|
||||||
@ -296,14 +296,14 @@ try
|
|||||||
}
|
}
|
||||||
const double threshold = get_option(parser, "test", 0.01);
|
const double threshold = get_option(parser, "test", 0.01);
|
||||||
image_window win;
|
image_window win;
|
||||||
matrix<rgb_pixel> image, resized;
|
matrix<rgb_pixel> image, resized(image_size, image_size);
|
||||||
for (const auto& im : dataset.images)
|
for (const auto& im : dataset.images)
|
||||||
{
|
{
|
||||||
win.clear_overlay();
|
win.clear_overlay();
|
||||||
load_image(image, data_directory + "/" + im.filename);
|
load_image(image, data_directory + "/" + im.filename);
|
||||||
win.set_title(im.filename);
|
win.set_title(im.filename);
|
||||||
win.set_image(image);
|
win.set_image(image);
|
||||||
const auto tform = preprocess_image(image, resized, image_size);
|
const auto tform = preprocess_image(image, resized);
|
||||||
auto detections = net.process(resized, threshold);
|
auto detections = net.process(resized, threshold);
|
||||||
postprocess_detections(tform, detections);
|
postprocess_detections(tform, detections);
|
||||||
cout << "# detections: " << detections.size() << endl;
|
cout << "# detections: " << detections.size() << endl;
|
||||||
@ -329,7 +329,7 @@ try
|
|||||||
cout << "Could not find file " << sync_file_name << endl;
|
cout << "Could not find file " << sync_file_name << endl;
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
matrix<rgb_pixel> image, resized;
|
matrix<rgb_pixel> image, resized(image_size, image_size);
|
||||||
std::map<std::string, std::vector<std::pair<double, bool>>> hits;
|
std::map<std::string, std::vector<std::pair<double, bool>>> hits;
|
||||||
std::map<std::string, unsigned long> missing;
|
std::map<std::string, unsigned long> missing;
|
||||||
for (const auto& label : options.labels)
|
for (const auto& label : options.labels)
|
||||||
@ -342,7 +342,7 @@ try
|
|||||||
{
|
{
|
||||||
const auto& im = dataset.images[i];
|
const auto& im = dataset.images[i];
|
||||||
load_image(image, data_directory + "/" + im.filename);
|
load_image(image, data_directory + "/" + im.filename);
|
||||||
const auto tform = preprocess_image(image, resized, image_size);
|
const auto tform = preprocess_image(image, resized);
|
||||||
auto dets = net.process(resized, 0.005);
|
auto dets = net.process(resized, 0.005);
|
||||||
postprocess_detections(tform, dets);
|
postprocess_detections(tform, dets);
|
||||||
std::vector<bool> used(dets.size(), false);
|
std::vector<bool> used(dets.size(), false);
|
||||||
@ -395,6 +395,7 @@ try
|
|||||||
dlib::rand rnd(time(nullptr) + seed);
|
dlib::rand rnd(time(nullptr) + seed);
|
||||||
matrix<rgb_pixel> image, rotated;
|
matrix<rgb_pixel> image, rotated;
|
||||||
std::pair<matrix<rgb_pixel>, std::vector<yolo_rect>> temp;
|
std::pair<matrix<rgb_pixel>, std::vector<yolo_rect>> temp;
|
||||||
|
temp.first.set_size(image_size, image_size);
|
||||||
random_cropper cropper;
|
random_cropper cropper;
|
||||||
cropper.set_seed(time(nullptr) + seed);
|
cropper.set_seed(time(nullptr) + seed);
|
||||||
cropper.set_chip_dims(image_size, image_size);
|
cropper.set_chip_dims(image_size, image_size);
|
||||||
@ -423,7 +424,7 @@ try
|
|||||||
for (auto& box : temp.second)
|
for (auto& box : temp.second)
|
||||||
box.rect = tform(box.rect);
|
box.rect = tform(box.rect);
|
||||||
|
|
||||||
tform = letterbox_image(rotated, temp.first, image_size);
|
tform = letterbox_image(rotated, temp.first);
|
||||||
for (auto& box : temp.second)
|
for (auto& box : temp.second)
|
||||||
box.rect = tform(box.rect);
|
box.rect = tform(box.rect);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user