// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt /* Instance segmentation using the PASCAL VOC2012 dataset. Instance segmentation sort-of combines object detection with semantic segmentation. While each dog, for example, is detected separately, the output is not only a bounding-box but a more accurate, per-pixel mask. For introductions to object detection and semantic segmentation, you can have a look at dnn_mmod_ex.cpp and dnn_semantic_segmentation.h, respectively. Instructions how to run the example: 1. Download the PASCAL VOC2012 data, and untar it somewhere. http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 2. Build the dnn_instance_segmentation_train_ex example program. 3. Run: ./dnn_instance_segmentation_train_ex /path/to/VOC2012 4. Wait while the network is being trained. 5. Build the dnn_instance_segmentation_ex example program. 6. Run: ./dnn_instance_segmentation_ex /path/to/VOC2012-or-other-images An alternative to steps 2-4 above is to download a pre-trained network from here: http://dlib.net/files/instance_segmentation_voc2012net.dnn It would be a good idea to become familiar with dlib's DNN tooling before reading this example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp before reading this example program. */ #ifndef DLIB_DNn_INSTANCE_SEGMENTATION_EX_H_ #define DLIB_DNn_INSTANCE_SEGMENTATION_EX_H_ #include // ---------------------------------------------------------------------------------------- namespace { // Segmentation will be performed using patches having this size. constexpr int seg_dim = 227; } dlib::rectangle get_cropping_rect(const dlib::rectangle& rectangle) { DLIB_ASSERT(!rectangle.is_empty()); const auto center_point = dlib::center(rectangle); const auto max_dim = std::max(rectangle.width(), rectangle.height()); const auto d = static_cast(std::round(max_dim / 2.0 * 1.5)); // add +50% return dlib::rectangle( center_point.x() - d, center_point.y() - d, center_point.x() + d, center_point.y() + d ); } // ---------------------------------------------------------------------------------------- // The object detection network. // Adapted from dnn_mmod_train_find_cars_ex.cpp and friends. template using con5d = dlib::con; template using con5 = dlib::con; template using bdownsampler = dlib::relu>>>>>>>>; template using adownsampler = dlib::relu>>>>>>>>; template using brcon5 = dlib::relu>>; template using arcon5 = dlib::relu>>; using det_bnet_type = dlib::loss_mmod>>>>>>>; using det_anet_type = dlib::loss_mmod>>>>>>>; // The segmentation network. // For the time being, this is very much copy-paste from dnn_semantic_segmentation.h, although the network is made narrower (smaller feature maps). template class BN, int stride, typename SUBNET> using block = BN>>>>; template class BN, int stride, typename SUBNET> using blockt = BN>>>>; template