Added a --split-train-test option to imglab.

This commit is contained in:
Davis King 2018-06-18 21:36:36 -04:00
parent 5cea44a284
commit 7d3fac5502

View File

@ -21,7 +21,7 @@
#include <dlib/dir_nav.h>
const char* VERSION = "1.14";
const char* VERSION = "1.15";
@ -127,6 +127,44 @@ int split_dataset (
// ----------------------------------------------------------------------------------------
int make_train_test_splits (
const command_line_parser& parser
)
{
if (parser.number_of_arguments() != 1)
{
cerr << "The --split-train-test option requires you to give one XML file on the command line." << endl;
return EXIT_FAILURE;
}
const double train_frac = get_option(parser, "split-train-test", 0.5);
dlib::image_dataset_metadata::dataset data, data_train, data_test;
load_image_dataset_metadata(data, parser[0]);
data_train.name = data.name;
data_train.comment = data.comment;
data_test.name = data.name;
data_test.comment = data.comment;
const unsigned long num_train_images = static_cast<unsigned long>(std::round(train_frac*data.images.size()));
for (unsigned long i = 0; i < data.images.size(); ++i)
{
if (i < num_train_images)
data_train.images.push_back(data.images[i]);
else
data_test.images.push_back(data.images[i]);
}
save_image_dataset_metadata(data_train, left_substr(parser[0],".") + "_train.xml");
save_image_dataset_metadata(data_test, left_substr(parser[0],".") + "_test.xml");
return EXIT_SUCCESS;
}
// ----------------------------------------------------------------------------------------
void print_all_labels (
const dlib::image_dataset_metadata::dataset& data
)
@ -545,6 +583,10 @@ int main(int argc, char** argv)
parser.add_option("seed", "When using --shuffle, set the random seed to the string <arg>.",1);
parser.add_option("split", "Split the contents of an XML file into two separate files. One containing the "
"images with objects labeled <arg> and another file with all the other images. ",1);
parser.add_option("split-train-test", "Split the contents of an XML file into two separate files. A training "
"file containing <arg> fraction of the images and a testing file containing the remaining (1-<arg>) images. "
"The partitioning is done deterministically by putting the first images in the input xml file into the training split "
"and the later images into the test split.",1);
parser.add_option("add", "Add the image metadata from <arg1> into <arg2>. If any of the image "
"tags are in both files then the ones in <arg2> are deleted and replaced with the "
"image tags from <arg1>. The results are saved into merged.xml and neither <arg1> or "
@ -581,7 +623,7 @@ int main(int argc, char** argv)
const char* singles[] = {"h","c","r","l","files","convert","parts","rmdiff", "rmtrunc", "rmdupes", "seed", "shuffle", "split", "add",
"flip-basic", "flip", "rotate", "tile", "size", "cluster", "resample", "min-object-size", "rmempty",
"crop-size", "cropped-object-size", "rmlabel", "rm-other-labels", "rm-if-overlaps", "sort-num-objects",
"one-object-per-image", "jpg", "rmignore", "sort"};
"one-object-per-image", "jpg", "rmignore", "sort", "split-train-test"};
parser.check_one_time_options(singles);
const char* c_sub_ops[] = {"r", "convert"};
parser.check_sub_options("c", c_sub_ops);
@ -676,6 +718,7 @@ int main(int argc, char** argv)
parser.check_option_arg_range("min-object-size", 1, 10000*10000);
parser.check_option_arg_range("cropped-object-size", 4, 10000*10000);
parser.check_option_arg_range("crop-size", 1.0, 100.0);
parser.check_option_arg_range("split-train-test", 0.0, 1.0);
if (parser.option("h"))
{
@ -1016,6 +1059,11 @@ int main(int argc, char** argv)
return split_dataset(parser);
}
if (parser.option("split-train-test"))
{
return make_train_test_splits(parser);
}
if (parser.option("shuffle"))
{
if (parser.number_of_arguments() != 1)