Added an overload of load_image_dataset() that can load the part

information into full_object_detections.
2024-11-01 10:14:53 +08:00 · 2012-09-01 22:11:32 -04:00 · 2012-09-01 22:11:32 -04:00 · 8460f4214b
commit 8460f4214b
parent 25dcd0c716
2 changed files with 171 additions and 0 deletions
--- a/dlib/data_io/load_image_dataset.h
+++ b/dlib/data_io/load_image_dataset.h
@ -12,6 +12,7 @@
 #include "../geometry.h"
 #include "image_dataset_metadata.h"
 #include <string>
+#include "../image_processing/full_object_detection.h"


 namespace dlib
@ -81,6 +82,110 @@ namespace dlib
    }

 // ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type, 
+        typename MM
+        >
+    std::vector<std::string> load_image_dataset (
+        array<image_type,MM>& images,
+        std::vector<std::vector<full_object_detection> >& object_locations,
+        const std::string& filename,
+        const std::string& label 
+    )
+    {
+        images.clear();
+        object_locations.clear();
+        const std::string old_working_dir = get_current_dir();
+
+        // Set the current directory to be the one that contains the
+        // metadata file. We do this because the file might contain
+        // file paths which are relative to this folder.
+        const std::string parent_dir = get_parent_directory(file(filename)).full_name();
+        set_current_dir(parent_dir);
+
+
+        using namespace dlib::image_dataset_metadata;
+
+        dataset data;
+        load_image_dataset_metadata(data, filename);
+        std::set<std::string> all_parts;
+
+        // find out what parts are being used in the dataset.  Store results in all_parts.
+        for (unsigned long i = 0; i < data.images.size(); ++i)
+        {
+            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+            {
+                if (label.size() == 0 || data.images[i].boxes[j].label == label)
+                {
+                    const std::map<std::string,point>& parts = data.images[i].boxes[j].parts;
+                    std::map<std::string,point>::const_iterator itr;
+
+                    for (itr = parts.begin(); itr != parts.end(); ++itr)
+                    {
+                        all_parts.insert(itr->first);
+                    }
+                }
+            }
+        }
+
+        // make a mapping between part names and the integers [0, all_parts.size())
+        std::map<std::string,int> parts_idx;
+        std::vector<std::string> ret_parts_list;
+        for (std::set<std::string>::iterator i = all_parts.begin(); i != all_parts.end(); ++i)
+        {
+            parts_idx[*i] = ret_parts_list.size();
+            ret_parts_list.push_back(*i);
+        }
+
+        images.resize(data.images.size());
+        std::vector<full_object_detection> object_dets;
+        for (unsigned long i = 0; i < data.images.size(); ++i)
+        {
+            load_image(images[i], data.images[i].filename);
+            object_dets.clear();
+            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
+            {
+                if (label.size() == 0 || data.images[i].boxes[j].label == label)
+                {
+                    std::vector<point> partlist(parts_idx.size(), OBJECT_PART_NOT_PRESENT);
+
+                    // populate partlist with all the parts present in this box.
+                    const std::map<std::string,point>& parts = data.images[i].boxes[j].parts;
+                    std::map<std::string,point>::const_iterator itr;
+                    for (itr = parts.begin(); itr != parts.end(); ++itr)
+                    {
+                        partlist[parts_idx[itr->first]] = itr->second;
+                    }
+
+                    object_dets.push_back(full_object_detection(data.images[i].boxes[j].rect, partlist));
+                }
+            }
+            object_locations.push_back(object_dets);
+        }
+
+        set_current_dir(old_working_dir);
+
+        return ret_parts_list;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type, 
+        typename MM
+        >
+    std::vector<std::string> load_image_dataset (
+        array<image_type,MM>& images,
+        std::vector<std::vector<full_object_detection> >& object_locations,
+        const std::string& filename
+    )
+    {
+        return load_image_dataset(images, object_locations, filename, "");
+    }
+
+// ----------------------------------------------------------------------------------------
+
 }

 #endif // DLIB_LOAD_IMAGE_DaTASET_H__
--- a/dlib/data_io/load_image_dataset_abstract.h
+++ b/dlib/data_io/load_image_dataset_abstract.h
@ -7,6 +7,7 @@
 #include "../array/array_kernel_abstract.h"
 #include <string>
 #include <vector>
+#include "../image_processing/full_object_detection_abstract.h"


 namespace dlib
@ -67,6 +68,71 @@ namespace dlib
              (i.e. it ignores box labels and therefore loads all the boxes in the dataset)
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type, 
+        typename MM
+        >
+    std::vector<std::string> load_image_dataset (
+        array<image_type,MM>& images,
+        std::vector<std::vector<full_object_detection> >& object_locations,
+        const std::string& filename,
+        const std::string& label 
+    );
+    /*!
+        requires
+            - image_type == is an implementation of array2d/array2d_kernel_abstract.h
+            - pixel_traits<typename image_type::type> is defined  
+        ensures
+            - This routine loads the images and their associated object locations from the
+              image metadata file indicated by filename.  This metadata file should be in
+              the XML format used by the save_image_dataset_metadata() routine.
+            - The difference between this function and the version of load_image_dataset()
+              defined above is that this version will also load object part information and
+              thus fully populates the full_object_detection objects.
+            - #images.size() == the number of images in the metadata file
+            - #images.size() == #object_locations.size()
+            - This routine is capable of loading any image format which can be read
+              by the load_image() routine.
+            - returns a vector, call it RETURNED_PARTS, that contains the list of object
+              parts found in the input file and loaded into object_locations.  
+            - for all valid i:  
+                - #images[i] == a copy of the ith image from the dataset.
+                - #object_locations[i] == a vector of all the object detections associated
+                  with #images[i]. 
+                - for all valid j:
+                    - #object_locations[i][j].num_parts() == RETURNED_PARTS.size()
+                    - for all valid k:
+                        - #object_locations[i][j].part(k) == the location of the part
+                          with name RETURNED_PARTS[k] or OBJECT_PART_NOT_PRESENT if the
+                          part was not indicated for object #object_locations[i][j].
+                - if (labels != "") then
+                    - only boxes with the given label will be loaded into object_locations.
+                - else
+                    - all boxes in the dataset will be loaded into object_locations.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_type, 
+        typename MM
+        >
+    std::vector<std::string> load_image_dataset (
+        array<image_type,MM>& images,
+        std::vector<std::vector<full_object_detection> >& object_locations,
+        const std::string& filename
+    );
+    /*!
+        requires
+            - image_type == is an implementation of array2d/array2d_kernel_abstract.h
+            - pixel_traits<typename image_type::type> is defined  
+        ensures
+            - performs: return load_image_dataset(images, object_locations, filename, "");
+              (i.e. it ignores box labels and therefore loads all the boxes in the dataset)
+    !*/
+
 // ----------------------------------------------------------------------------------------

 }