debug test path

This commit is contained in:
Adrià Arrufat 2021-01-19 23:51:20 +09:00
parent e532993364
commit 6d5b4e7a1f
6 changed files with 332 additions and 316 deletions

View File

@ -124,6 +124,7 @@ jobs:
- name: Build tests - name: Build tests
working-directory: ${{ github.workspace }}/test working-directory: ${{ github.workspace }}/test
run: | run: |
echo `pwd`
cmake . -B ${{ env.build_dir }} -DCMAKE_BUILD_TYPE=${{ env.config }} -G Ninja cmake . -B ${{ env.build_dir }} -DCMAKE_BUILD_TYPE=${{ env.config }} -G Ninja
cmake --build . cmake --build .

View File

@ -17,147 +17,147 @@ add_subdirectory(.. dlib_build)
# This variable contains a list of all the tests we are building # This variable contains a list of all the tests we are building
# into the regression test suite. # into the regression test suite.
set (tests set (tests
example.cpp # example.cpp
active_learning.cpp # active_learning.cpp
any.cpp # any.cpp
any_function.cpp # any_function.cpp
array2d.cpp # array2d.cpp
array.cpp # array.cpp
assignment_learning.cpp # assignment_learning.cpp
base64.cpp # base64.cpp
bayes_nets.cpp # bayes_nets.cpp
bigint.cpp # bigint.cpp
binary_search_tree_kernel_1a.cpp # binary_search_tree_kernel_1a.cpp
binary_search_tree_kernel_2a.cpp # binary_search_tree_kernel_2a.cpp
binary_search_tree_mm1.cpp # binary_search_tree_mm1.cpp
binary_search_tree_mm2.cpp # binary_search_tree_mm2.cpp
bridge.cpp # bridge.cpp
bsp.cpp # bsp.cpp
byte_orderer.cpp # byte_orderer.cpp
cca.cpp # cca.cpp
clustering.cpp # clustering.cpp
cmd_line_parser.cpp # cmd_line_parser.cpp
cmd_line_parser_wchar_t.cpp # cmd_line_parser_wchar_t.cpp
compress_stream.cpp # compress_stream.cpp
conditioning_class_c.cpp # conditioning_class_c.cpp
conditioning_class.cpp # conditioning_class.cpp
config_reader.cpp # config_reader.cpp
correlation_tracker.cpp # correlation_tracker.cpp
crc32.cpp # crc32.cpp
create_iris_datafile.cpp # create_iris_datafile.cpp
data_io.cpp # data_io.cpp
directed_graph.cpp # directed_graph.cpp
discriminant_pca.cpp # discriminant_pca.cpp
disjoint_subsets.cpp # disjoint_subsets.cpp
disjoint_subsets_sized.cpp # disjoint_subsets_sized.cpp
ekm_and_lisf.cpp # ekm_and_lisf.cpp
empirical_kernel_map.cpp # empirical_kernel_map.cpp
entropy_coder.cpp # entropy_coder.cpp
entropy_encoder_model.cpp # entropy_encoder_model.cpp
example_args.cpp # example_args.cpp
face.cpp # face.cpp
fft.cpp # fft.cpp
fhog.cpp # fhog.cpp
filtering.cpp # filtering.cpp
find_max_factor_graph_nmplp.cpp # find_max_factor_graph_nmplp.cpp
find_max_factor_graph_viterbi.cpp # find_max_factor_graph_viterbi.cpp
geometry.cpp # geometry.cpp
graph.cpp # graph.cpp
graph_cuts.cpp # graph_cuts.cpp
graph_labeler.cpp # graph_labeler.cpp
hash.cpp # hash.cpp
hash_map.cpp # hash_map.cpp
hash_set.cpp # hash_set.cpp
hash_table.cpp # hash_table.cpp
hog_image.cpp # hog_image.cpp
image.cpp # image.cpp
iosockstream.cpp # iosockstream.cpp
is_same_object.cpp # is_same_object.cpp
isotonic_regression.cpp # isotonic_regression.cpp
kcentroid.cpp # kcentroid.cpp
kernel_matrix.cpp # kernel_matrix.cpp
kmeans.cpp # kmeans.cpp
learning_to_track.cpp # learning_to_track.cpp
least_squares.cpp # least_squares.cpp
linear_manifold_regularizer.cpp # linear_manifold_regularizer.cpp
lspi.cpp # lspi.cpp
lz77_buffer.cpp # lz77_buffer.cpp
map.cpp # map.cpp
matrix2.cpp # matrix2.cpp
matrix3.cpp # matrix3.cpp
matrix4.cpp # matrix4.cpp
matrix_chol.cpp # matrix_chol.cpp
matrix.cpp # matrix.cpp
matrix_eig.cpp # matrix_eig.cpp
matrix_lu.cpp # matrix_lu.cpp
matrix_qr.cpp # matrix_qr.cpp
max_cost_assignment.cpp # max_cost_assignment.cpp
max_sum_submatrix.cpp # max_sum_submatrix.cpp
md5.cpp # md5.cpp
member_function_pointer.cpp # member_function_pointer.cpp
metaprogramming.cpp # metaprogramming.cpp
mpc.cpp # mpc.cpp
multithreaded_object.cpp # multithreaded_object.cpp
numerical_integration.cpp # numerical_integration.cpp
object_detector.cpp # object_detector.cpp
oca.cpp # oca.cpp
one_vs_all_trainer.cpp # one_vs_all_trainer.cpp
one_vs_one_trainer.cpp # one_vs_one_trainer.cpp
optimization.cpp # optimization.cpp
optimization_test_functions.cpp # optimization_test_functions.cpp
global_optimization.cpp # global_optimization.cpp
opt_qp_solver.cpp # opt_qp_solver.cpp
parallel_for.cpp # parallel_for.cpp
parse.cpp # parse.cpp
pipe.cpp # pipe.cpp
pixel.cpp # pixel.cpp
probabilistic.cpp # probabilistic.cpp
pyramid_down.cpp # pyramid_down.cpp
queue.cpp # queue.cpp
rand.cpp # rand.cpp
ranking.cpp # ranking.cpp
read_write_mutex.cpp # read_write_mutex.cpp
reference_counter.cpp # reference_counter.cpp
rls.cpp # rls.cpp
random_forest.cpp # random_forest.cpp
sammon.cpp # sammon.cpp
scan_image.cpp # scan_image.cpp
sequence.cpp # sequence.cpp
sequence_labeler.cpp # sequence_labeler.cpp
sequence_segmenter.cpp # sequence_segmenter.cpp
serialize.cpp # serialize.cpp
set.cpp # set.cpp
sldf.cpp # sldf.cpp
sliding_buffer.cpp # sliding_buffer.cpp
sockets2.cpp # sockets2.cpp
sockets.cpp # sockets.cpp
sockstreambuf.cpp # sockstreambuf.cpp
sparse_vector.cpp # sparse_vector.cpp
stack.cpp # stack.cpp
static_map.cpp # static_map.cpp
static_set.cpp # static_set.cpp
statistics.cpp # statistics.cpp
std_vector_c.cpp # std_vector_c.cpp
string.cpp # string.cpp
svm_c_linear.cpp # svm_c_linear.cpp
svm_c_linear_dcd.cpp # svm_c_linear_dcd.cpp
svm.cpp # svm.cpp
svm_multiclass_linear.cpp # svm_multiclass_linear.cpp
svm_struct.cpp # svm_struct.cpp
svr_linear_trainer.cpp # svr_linear_trainer.cpp
symmetric_matrix_cache.cpp # symmetric_matrix_cache.cpp
thread_pool.cpp # thread_pool.cpp
threads.cpp # threads.cpp
timer.cpp # timer.cpp
tokenizer.cpp # tokenizer.cpp
trust_region.cpp # trust_region.cpp
tuple.cpp # tuple.cpp
type_safe_union.cpp # type_safe_union.cpp
vectorstream.cpp # vectorstream.cpp
dnn.cpp dnn.cpp
cublas.cpp # cublas.cpp
find_optimal_parameters.cpp # find_optimal_parameters.cpp
elastic_net.cpp # elastic_net.cpp
) )
@ -170,7 +170,7 @@ if (CMAKE_COMPILER_IS_GNUCXX)
add_definitions("-W -Wall") add_definitions("-W -Wall")
# I don't care about unused testing functions though. I like to keep them # I don't care about unused testing functions though. I like to keep them
# around. Don't warn about it. # around. Don't warn about it.
add_definitions("-Wno-unused-function") add_definitions("-Wno-unused-function -Wno-deprecated-copy -fdiagnostics-color=always")
endif() endif()

View File

@ -36,6 +36,7 @@ cmake_minimum_required(VERSION 2.8.12)
# Every project needs a name. We call this the "examples" project. # Every project needs a name. We call this the "examples" project.
project(examples) project(examples)
add_compile_options (-fdiagnostics-color=always)
# Tell cmake we will need dlib. This command will pull in dlib and compile it # Tell cmake we will need dlib. This command will pull in dlib and compile it
# into your project. Note that you don't need to compile or install dlib. All # into your project. Note that you don't need to compile or install dlib. All
@ -60,10 +61,10 @@ add_subdirectory(../dlib dlib_build)
# are going to compile one of the dlib example programs which has only one .cpp # are going to compile one of the dlib example programs which has only one .cpp
# file, assignment_learning_ex.cpp. If your program consisted of multiple .cpp # file, assignment_learning_ex.cpp. If your program consisted of multiple .cpp
# files you would simply list them here in the add_executable() statement. # files you would simply list them here in the add_executable() statement.
add_executable(assignment_learning_ex assignment_learning_ex.cpp) # add_executable(assignment_learning_ex assignment_learning_ex.cpp)
# Finally, you need to tell CMake that this program, assignment_learning_ex, # Finally, you need to tell CMake that this program, assignment_learning_ex,
# depends on dlib. You do that with this statement: # depends on dlib. You do that with this statement:
target_link_libraries(assignment_learning_ex dlib::dlib) # target_link_libraries(assignment_learning_ex dlib::dlib)
@ -134,132 +135,135 @@ endmacro()
# like this: # like this:
# cmake .. -G "Visual Studio 14 2015 Win64" -T host=x64 # cmake .. -G "Visual Studio 14 2015 Win64" -T host=x64
if (NOT USING_OLD_VISUAL_STUDIO_COMPILER) if (NOT USING_OLD_VISUAL_STUDIO_COMPILER)
add_example(dnn_metric_learning_ex) # add_example(dnn_metric_learning_ex)
add_gui_example(dnn_face_recognition_ex) # add_gui_example(dnn_face_recognition_ex)
add_example(dnn_introduction_ex) add_example(dnn_introduction_ex)
add_example(dnn_introduction2_ex) # add_example(dnn_introduction2_ex)
add_example(dnn_introduction3_ex) # add_example(dnn_introduction3_ex)
add_example(dnn_inception_ex) # add_example(dnn_inception_ex)
add_gui_example(dnn_mmod_ex) # add_gui_example(dnn_mmod_ex)
add_gui_example(dnn_mmod_face_detection_ex) # add_gui_example(dnn_mmod_face_detection_ex)
add_gui_example(random_cropper_ex) # add_gui_example(random_cropper_ex)
add_gui_example(dnn_mmod_dog_hipsterizer) # add_gui_example(dnn_mmod_dog_hipsterizer)
add_gui_example(dnn_imagenet_ex) add_gui_example(dnn_imagenet_ex)
add_gui_example(dnn_mmod_find_cars_ex) # add_gui_example(dnn_mmod_find_cars_ex)
add_gui_example(dnn_mmod_find_cars2_ex) # add_gui_example(dnn_mmod_find_cars2_ex)
add_example(dnn_mmod_train_find_cars_ex) # add_example(dnn_mmod_train_find_cars_ex)
add_gui_example(dnn_semantic_segmentation_ex) # add_gui_example(dnn_semantic_segmentation_ex)
add_gui_example(dnn_instance_segmentation_ex) # add_gui_example(dnn_instance_segmentation_ex)
add_example(dnn_imagenet_train_ex) # add_example(dnn_imagenet_train_ex)
add_example(dnn_semantic_segmentation_train_ex) # add_example(dnn_semantic_segmentation_train_ex)
add_example(dnn_instance_segmentation_train_ex) # add_example(dnn_instance_segmentation_train_ex)
add_example(dnn_metric_learning_on_images_ex) # add_example(dnn_metric_learning_on_images_ex)
add_gui_example(dnn_dcgan_train_ex) # add_gui_example(dnn_dcgan_train_ex)
# add_gui_example(dnn_neural_style_transfer_ex)
endif() endif()
if (DLIB_NO_GUI_SUPPORT) # if (DLIB_NO_GUI_SUPPORT)
message("No GUI support, so we won't build the webcam_face_pose_ex example.") # message("No GUI support, so we won't build the webcam_face_pose_ex example.")
else() # else()
find_package(OpenCV QUIET) # find_package(OpenCV QUIET)
if (OpenCV_FOUND) # if (OpenCV_FOUND)
include_directories(${OpenCV_INCLUDE_DIRS}) # include_directories(${OpenCV_INCLUDE_DIRS})
add_executable(webcam_face_pose_ex webcam_face_pose_ex.cpp) # add_executable(webcam_face_pose_ex webcam_face_pose_ex.cpp)
target_link_libraries(webcam_face_pose_ex dlib::dlib ${OpenCV_LIBS} ) # target_link_libraries(webcam_face_pose_ex dlib::dlib ${OpenCV_LIBS} )
else() # else()
message("OpenCV not found, so we won't build the webcam_face_pose_ex example.") # message("OpenCV not found, so we won't build the webcam_face_pose_ex example.")
endif() # endif()
endif() # endif()
#here we apply our macros # #here we apply our macros
add_gui_example(3d_point_cloud_ex) # add_gui_example(3d_point_cloud_ex)
add_example(bayes_net_ex) # add_example(bayes_net_ex)
add_example(bayes_net_from_disk_ex) # add_example(bayes_net_from_disk_ex)
add_gui_example(bayes_net_gui_ex) # add_gui_example(bayes_net_gui_ex)
add_example(bridge_ex) # add_example(bridge_ex)
add_example(bsp_ex) # add_example(bsp_ex)
add_example(compress_stream_ex) # add_example(compress_stream_ex)
add_example(config_reader_ex) # add_example(config_reader_ex)
add_example(custom_trainer_ex) # add_example(custom_trainer_ex)
add_example(dir_nav_ex) # add_example(dir_nav_ex)
add_example(empirical_kernel_map_ex) # add_example(empirical_kernel_map_ex)
add_gui_example(face_detection_ex) # add_gui_example(face_detection_ex)
add_gui_example(face_landmark_detection_ex) # add_gui_example(face_landmark_detection_ex)
add_gui_example(fhog_ex) # add_gui_example(fhog_ex)
add_gui_example(fhog_object_detector_ex) # add_gui_example(fhog_object_detector_ex)
add_example(file_to_code_ex) # add_example(file_to_code_ex)
add_example(graph_labeling_ex) # add_example(graph_labeling_ex)
add_gui_example(gui_api_ex) add_gui_example(gui_api_ex)
add_gui_example(hough_transform_ex) # add_gui_example(hough_transform_ex)
add_gui_example(image_ex) # add_gui_example(image_ex)
add_example(integrate_function_adapt_simp_ex) # add_example(integrate_function_adapt_simp_ex)
add_example(iosockstream_ex) # add_example(iosockstream_ex)
add_example(kcentroid_ex) # add_example(kcentroid_ex)
add_example(kkmeans_ex) # add_example(kkmeans_ex)
add_example(krls_ex) # add_example(krls_ex)
add_example(krls_filter_ex) # add_example(krls_filter_ex)
add_example(krr_classification_ex) # add_example(krr_classification_ex)
add_example(krr_regression_ex) # add_example(krr_regression_ex)
add_example(learning_to_track_ex) # add_example(learning_to_track_ex)
add_example(least_squares_ex) # add_example(least_squares_ex)
add_example(linear_manifold_regularizer_ex) # add_example(linear_manifold_regularizer_ex)
add_example(logger_custom_output_ex) # add_example(logger_custom_output_ex)
add_example(logger_ex) # add_example(logger_ex)
add_example(logger_ex_2) # add_example(logger_ex_2)
add_example(matrix_ex) # add_example(matrix_ex)
add_example(matrix_expressions_ex) # add_example(matrix_expressions_ex)
add_example(max_cost_assignment_ex) # add_example(max_cost_assignment_ex)
add_example(member_function_pointer_ex) # add_example(member_function_pointer_ex)
add_example(mlp_ex) # add_example(mlp_ex)
add_example(model_selection_ex) # add_example(model_selection_ex)
add_gui_example(mpc_ex) # add_gui_example(mpc_ex)
add_example(multiclass_classification_ex) # add_example(multiclass_classification_ex)
add_example(multithreaded_object_ex) # add_example(multithreaded_object_ex)
add_gui_example(object_detector_advanced_ex) # add_gui_example(object_detector_advanced_ex)
add_gui_example(object_detector_ex) # add_gui_example(object_detector_ex)
add_gui_example(one_class_classifiers_ex) # add_gui_example(one_class_classifiers_ex)
add_example(optimization_ex) # add_example(optimization_ex)
add_example(parallel_for_ex) # add_example(parallel_for_ex)
add_example(pipe_ex) # add_example(pipe_ex)
add_example(pipe_ex_2) # add_example(pipe_ex_2)
add_example(quantum_computing_ex) # add_example(quantum_computing_ex)
add_example(queue_ex) # add_example(queue_ex)
add_example(rank_features_ex) # add_example(rank_features_ex)
add_example(running_stats_ex) # add_example(running_stats_ex)
add_example(rvm_ex) # add_example(rvm_ex)
add_example(rvm_regression_ex) # add_example(rvm_regression_ex)
add_example(sequence_labeler_ex) # add_example(sequence_labeler_ex)
add_example(sequence_segmenter_ex) # add_example(sequence_segmenter_ex)
add_example(server_http_ex) # add_example(server_http_ex)
add_example(server_iostream_ex) # add_example(server_iostream_ex)
add_example(sockets_ex) # add_example(sockets_ex)
add_example(sockstreambuf_ex) # add_example(sockstreambuf_ex)
add_example(std_allocator_ex) # add_example(std_allocator_ex)
add_gui_example(surf_ex) # add_gui_example(surf_ex)
add_example(svm_c_ex) # add_example(svm_c_ex)
add_example(svm_ex) # add_example(svm_ex)
add_example(svm_pegasos_ex) # add_example(svm_pegasos_ex)
add_example(svm_rank_ex) # add_example(svm_rank_ex)
add_example(svm_sparse_ex) # add_example(svm_sparse_ex)
add_example(svm_struct_ex) # add_example(svm_struct_ex)
add_example(svr_ex) # add_example(svr_ex)
add_example(thread_function_ex) # add_example(thread_function_ex)
add_example(thread_pool_ex) # add_example(thread_pool_ex)
add_example(threaded_object_ex) # add_example(threaded_object_ex)
add_example(threads_ex) # add_example(threads_ex)
add_example(timer_ex) # add_example(timer_ex)
add_gui_example(train_object_detector) # add_gui_example(train_object_detector)
add_example(train_shape_predictor_ex) # add_example(train_shape_predictor_ex)
add_example(using_custom_kernels_ex) # add_example(using_custom_kernels_ex)
add_gui_example(video_tracking_ex) # add_gui_example(video_tracking_ex)
add_example(xml_parser_ex) # add_example(xml_parser_ex)
# add_example(dnn_graph_visitor_ex)
add_example(playground)
if (DLIB_LINK_WITH_SQLITE3) # if (DLIB_LINK_WITH_SQLITE3)
add_example(sqlite_ex) # add_example(sqlite_ex)
endif() # endif()

View File

@ -13,7 +13,7 @@
For an introduction to dlib's DNN module read the dnn_introduction_ex.cpp and For an introduction to dlib's DNN module read the dnn_introduction_ex.cpp and
dnn_introduction2_ex.cpp example programs. dnn_introduction2_ex.cpp example programs.
Finally, these tools will use CUDA and cuDNN to drastically accelerate Finally, these tools will use CUDA and cuDNN to drastically accelerate
network training and testing. CMake should automatically find them if they network training and testing. CMake should automatically find them if they
are installed and configure things appropriately. If not, the program will are installed and configure things appropriately. If not, the program will
@ -30,7 +30,7 @@
using namespace std; using namespace std;
using namespace dlib; using namespace dlib;
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
// This block of statements defines the resnet-34 network // This block of statements defines the resnet-34 network
@ -41,7 +41,7 @@ using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET> template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>; using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>;
template <int N, template <typename> class BN, int stride, typename SUBNET> template <int N, template <typename> class BN, int stride, typename SUBNET>
using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>; using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;
template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>; template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>;
@ -130,7 +130,7 @@ int main(int argc, char** argv) try
// already does this. But if we instead want to get the probability of each // already does this. But if we instead want to get the probability of each
// class as output we need to replace the last layer of the network with a // class as output we need to replace the last layer of the network with a
// softmax layer, which we do as follows: // softmax layer, which we do as follows:
softmax<anet_type::subnet_type> snet; softmax<anet_type::subnet_type> snet;
snet.subnet() = net.subnet(); snet.subnet() = net.subnet();
dlib::array<matrix<rgb_pixel>> images; dlib::array<matrix<rgb_pixel>> images;
@ -150,17 +150,24 @@ int main(int argc, char** argv) try
// p(i) == the probability the image contains object of class i. // p(i) == the probability the image contains object of class i.
matrix<float,1,1000> p = sum_rows(mat(snet(images.begin(), images.end())))/num_crops; matrix<float,1,1000> p = sum_rows(mat(snet(images.begin(), images.end())))/num_crops;
win.set_image(img); // win.set_image(img);
bool keep = false;
// Print the 5 most probable labels // Print the 5 most probable labels
for (int k = 0; k < 5; ++k) for (int k = 0; k < 5; ++k)
{ {
unsigned long predicted_label = index_of_max(p); unsigned long predicted_label = index_of_max(p);
cout << p(predicted_label) << ": " << labels[predicted_label] << endl; // cout << p(predicted_label) << ": " << labels[predicted_label] << endl;
p(predicted_label) = 0; p(predicted_label) = 0;
if (labels[predicted_label] == "racket" or labels[predicted_label] == "tennis_ball")
keep = true;
} }
if (not keep)
cout << "Hit enter to process the next image"; {
cin.get(); std::remove(argv[i]);
cout << "removing " << argv[i] << '\n';
}
// cout << "Hit enter to process the next image";
// cin.get();
} }
} }

View File

@ -9,7 +9,7 @@
The specific network we will run is from the paper The specific network we will run is from the paper
LeCun, Yann, et al. "Gradient-based learning applied to document recognition." LeCun, Yann, et al. "Gradient-based learning applied to document recognition."
Proceedings of the IEEE 86.11 (1998): 2278-2324. Proceedings of the IEEE 86.11 (1998): 2278-2324.
except that we replace the sigmoid non-linearities with rectified linear units. except that we replace the sigmoid non-linearities with rectified linear units.
These tools will use CUDA and cuDNN to drastically accelerate network These tools will use CUDA and cuDNN to drastically accelerate network
training and testing. CMake should automatically find them if they are training and testing. CMake should automatically find them if they are
@ -24,10 +24,10 @@
using namespace std; using namespace std;
using namespace dlib; using namespace dlib;
int main(int argc, char** argv) try int main(int argc, char** argv) try
{ {
// This example is going to run on the MNIST dataset. // This example is going to run on the MNIST dataset.
if (argc != 2) if (argc != 2)
{ {
cout << "This example needs the MNIST dataset to run!" << endl; cout << "This example needs the MNIST dataset to run!" << endl;
@ -50,8 +50,8 @@ int main(int argc, char** argv) try
// Now let's define the LeNet. Broadly speaking, there are 3 parts to a network // Now let's define the LeNet. Broadly speaking, there are 3 parts to a network
// definition. The loss layer, a bunch of computational layers, and then an input // definition. The loss layer, a bunch of computational layers, and then an input
// layer. You can see these components in the network definition below. // layer. You can see these components in the network definition below.
// //
// The input layer here says the network expects to be given matrix<unsigned char> // The input layer here says the network expects to be given matrix<unsigned char>
// objects as input. In general, you can use any dlib image or matrix type here, or // objects as input. In general, you can use any dlib image or matrix type here, or
// even define your own types by creating custom input layers. // even define your own types by creating custom input layers.
@ -59,29 +59,29 @@ int main(int argc, char** argv) try
// Then the middle layers define the computation the network will do to transform the // Then the middle layers define the computation the network will do to transform the
// input into whatever we want. Here we run the image through multiple convolutions, // input into whatever we want. Here we run the image through multiple convolutions,
// ReLU units, max pooling operations, and then finally a fully connected layer that // ReLU units, max pooling operations, and then finally a fully connected layer that
// converts the whole thing into just 10 numbers. // converts the whole thing into just 10 numbers.
// //
// Finally, the loss layer defines the relationship between the network outputs, our 10 // Finally, the loss layer defines the relationship between the network outputs, our 10
// numbers, and the labels in our dataset. Since we selected loss_multiclass_log it // numbers, and the labels in our dataset. Since we selected loss_multiclass_log it
// means we want to do multiclass classification with our network. Moreover, the // means we want to do multiclass classification with our network. Moreover, the
// number of network outputs (i.e. 10) is the number of possible labels. Whichever // number of network outputs (i.e. 10) is the number of possible labels. Whichever
// network output is largest is the predicted label. So for example, if the first // network output is largest is the predicted label. So for example, if the first
// network output is largest then the predicted digit is 0, if the last network output // network output is largest then the predicted digit is 0, if the last network output
// is largest then the predicted digit is 9. // is largest then the predicted digit is 9.
using net_type = loss_multiclass_log< using net_type = loss_multiclass_log<
fc<10, fc<10,
relu<fc<84, elu<fc<84,
relu<fc<120, elu<fc<120,
max_pool<2,2,2,2,relu<con<16,5,5,1,1, max_pool<2,2,2,2,elu<con<16,5,5,1,1,
max_pool<2,2,2,2,relu<con<6,5,5,1,1, max_pool<2,2,2,2,elu<con<6,5,5,1,1,
input<matrix<unsigned char>> input<matrix<unsigned char>>
>>>>>>>>>>>>; >>>>>>>>>>>>;
// This net_type defines the entire network architecture. For example, the block // This net_type defines the entire network architecture. For example, the block
// relu<fc<84,SUBNET>> means we take the output from the subnetwork, pass it through a // relu<fc<84,SUBNET>> means we take the output from the subnetwork, pass it through a
// fully connected layer with 84 outputs, then apply ReLU. Similarly, a block of // fully connected layer with 84 outputs, then apply ReLU. Similarly, a block of
// max_pool<2,2,2,2,relu<con<16,5,5,1,1,SUBNET>>> means we apply 16 convolutions with a // max_pool<2,2,2,2,relu<con<16,5,5,1,1,SUBNET>>> means we apply 16 convolutions with a
// 5x5 filter size and 1x1 stride to the output of a subnetwork, then apply ReLU, then // 5x5 filter size and 1x1 stride to the output of a subnetwork, then apply ReLU, then
// perform max pooling with a 2x2 window and 2x2 stride. // perform max pooling with a 2x2 window and 2x2 stride.
@ -105,7 +105,7 @@ int main(int argc, char** argv) try
// learning rate until the loss stops decreasing. Then it reduces the learning rate by // learning rate until the loss stops decreasing. Then it reduces the learning rate by
// a factor of 10 and continues running until the loss stops decreasing again. It will // a factor of 10 and continues running until the loss stops decreasing again. It will
// keep doing this until the learning rate has dropped below the min learning rate // keep doing this until the learning rate has dropped below the min learning rate
// defined above or the maximum number of epochs as been executed (defaulted to 10000). // defined above or the maximum number of epochs as been executed (defaulted to 10000).
trainer.train(training_images, training_labels); trainer.train(training_images, training_labels);
// At this point our net object should have learned how to classify MNIST images. But // At this point our net object should have learned how to classify MNIST images. But
@ -134,7 +134,7 @@ int main(int argc, char** argv) try
++num_right; ++num_right;
else else
++num_wrong; ++num_wrong;
} }
cout << "training num_right: " << num_right << endl; cout << "training num_right: " << num_right << endl;
cout << "training num_wrong: " << num_wrong << endl; cout << "training num_wrong: " << num_wrong << endl;
@ -151,7 +151,7 @@ int main(int argc, char** argv) try
++num_right; ++num_right;
else else
++num_wrong; ++num_wrong;
} }
cout << "testing num_right: " << num_right << endl; cout << "testing num_right: " << num_right << endl;
cout << "testing num_wrong: " << num_wrong << endl; cout << "testing num_wrong: " << num_wrong << endl;

View File

@ -1,6 +1,6 @@
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt // The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/* /*
This example shows how to train a CNN based object detector using dlib's This example shows how to train a CNN based object detector using dlib's
loss_mmod loss layer. This loss layer implements the Max-Margin Object loss_mmod loss layer. This loss layer implements the Max-Margin Object
Detection loss as described in the paper: Detection loss as described in the paper:
Max-Margin Object Detection by Davis E. King (http://arxiv.org/abs/1502.00046). Max-Margin Object Detection by Davis E. King (http://arxiv.org/abs/1502.00046).
@ -13,12 +13,12 @@
example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp
before reading this example program. You should also read the introductory DNN+MMOD before reading this example program. You should also read the introductory DNN+MMOD
example dnn_mmod_ex.cpp as well before proceeding. example dnn_mmod_ex.cpp as well before proceeding.
This example is essentially a more complex version of dnn_mmod_ex.cpp. In it we train This example is essentially a more complex version of dnn_mmod_ex.cpp. In it we train
a detector that finds the rear ends of motor vehicles. I will also discuss some a detector that finds the rear ends of motor vehicles. I will also discuss some
aspects of data preparation useful when training this kind of detector. aspects of data preparation useful when training this kind of detector.
*/ */
@ -35,7 +35,11 @@ template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2
template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>; template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>;
template <typename SUBNET> using downsampler = relu<bn_con<con5d<32, relu<bn_con<con5d<32, relu<bn_con<con5d<16,SUBNET>>>>>>>>>; template <typename SUBNET> using downsampler = relu<bn_con<con5d<32, relu<bn_con<con5d<32, relu<bn_con<con5d<16,SUBNET>>>>>>>>>;
template <typename SUBNET> using rcon5 = relu<bn_con<con5<55,SUBNET>>>; template <typename SUBNET> using rcon5 = relu<bn_con<con5<55,SUBNET>>>;
using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>; // using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
// scale1<sig<con<55,1,1,1,1,avg_pool_everything<tag1<
using net_type = loss_mmod<con<1,9,9,1,1,
scale_prev2<skip1<tag2<sig<con<55,1,1,1,1,avg_pool_everything<tag1<
rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>>>>>>>>;
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
@ -107,21 +111,21 @@ int main(int argc, char** argv) try
// image is implicitly assumed to be not a car, and the algorithm will use it as // image is implicitly assumed to be not a car, and the algorithm will use it as
// negative training data. So every car must be labeled, either with a normal // negative training data. So every car must be labeled, either with a normal
// rectangle or an "ignore" rectangle that tells MMOD to simply ignore it (i.e. neither // rectangle or an "ignore" rectangle that tells MMOD to simply ignore it (i.e. neither
// treat it as a thing to detect nor as negative training data). // treat it as a thing to detect nor as negative training data).
// //
// In our present case, many images contain very tiny cars in the distance, ones that // In our present case, many images contain very tiny cars in the distance, ones that
// are essentially just dark smudges. It's not reasonable to expect the CNN // are essentially just dark smudges. It's not reasonable to expect the CNN
// architecture we defined to detect such vehicles. However, I erred on the side of // architecture we defined to detect such vehicles. However, I erred on the side of
// having more complete annotations when creating the dataset. So when I labeled these // having more complete annotations when creating the dataset. So when I labeled these
// images I labeled many of these really difficult cases as vehicles to detect. // images I labeled many of these really difficult cases as vehicles to detect.
// //
// So the first thing we are going to do is clean up our dataset a little bit. In // So the first thing we are going to do is clean up our dataset a little bit. In
// particular, we are going to mark boxes smaller than 35*35 pixels as ignore since // particular, we are going to mark boxes smaller than 35*35 pixels as ignore since
// only really small and blurry cars appear at those sizes. We will also mark boxes // only really small and blurry cars appear at those sizes. We will also mark boxes
// that are heavily overlapped by another box as ignore. We do this because we want to // that are heavily overlapped by another box as ignore. We do this because we want to
// allow for stronger non-maximum suppression logic in the learned detector, since that // allow for stronger non-maximum suppression logic in the learned detector, since that
// will help make it easier to learn a good detector. // will help make it easier to learn a good detector.
// //
// To explain this non-max suppression idea further it's important to understand how // To explain this non-max suppression idea further it's important to understand how
// the detector works. Essentially, sliding window detectors scan all image locations // the detector works. Essentially, sliding window detectors scan all image locations
// and ask "is there a car here?". If there really is a car in a specific location in // and ask "is there a car here?". If there really is a car in a specific location in
@ -143,7 +147,7 @@ int main(int argc, char** argv) try
// "close to" measure will be configured to allow detections to really overlap a whole // "close to" measure will be configured to allow detections to really overlap a whole
// lot. On the other hand, if your dataset didn't contain any overlapped boxes at all, // lot. On the other hand, if your dataset didn't contain any overlapped boxes at all,
// then the non-max suppression logic would be configured to filter out any boxes that // then the non-max suppression logic would be configured to filter out any boxes that
// overlapped at all, and thus would be performing a much stronger non-max suppression. // overlapped at all, and thus would be performing a much stronger non-max suppression.
// //
// Why does this matter? Well, remember that we want to avoid duplicate detections. // Why does this matter? Well, remember that we want to avoid duplicate detections.
// If non-max suppression just kills everything in a really wide area around a car then // If non-max suppression just kills everything in a really wide area around a car then
@ -183,8 +187,8 @@ int main(int argc, char** argv) try
// really extreme aspect ratios. However, some datasets do, often because of // really extreme aspect ratios. However, some datasets do, often because of
// bad labeling. So it's a good idea to check for that and either eliminate // bad labeling. So it's a good idea to check for that and either eliminate
// those boxes or set them to ignore. Although, this depends on your // those boxes or set them to ignore. Although, this depends on your
// application. // application.
// //
// For instance, if your dataset has boxes with an aspect ratio // For instance, if your dataset has boxes with an aspect ratio
// of 10 then you should think about what that means for the network // of 10 then you should think about what that means for the network
// architecture. Does the receptive field even cover the entirety of the box // architecture. Does the receptive field even cover the entirety of the box
@ -196,13 +200,13 @@ int main(int argc, char** argv) try
// errors, but are annotated in a sloppy and inconsistent way. Fixing those // errors, but are annotated in a sloppy and inconsistent way. Fixing those
// errors and inconsistencies can often greatly improve models trained from // errors and inconsistencies can often greatly improve models trained from
// such data. It's almost always worth the time to try and improve your // such data. It's almost always worth the time to try and improve your
// training dataset. // training dataset.
// //
// In any case, my point is that there are other types of dataset cleaning you // In any case, my point is that there are other types of dataset cleaning you
// could put here. What exactly you need depends on your application. But you // could put here. What exactly you need depends on your application. But you
// should carefully consider it and not take your dataset as a given. The work // should carefully consider it and not take your dataset as a given. The work
// of creating a good detector is largely about creating a high quality // of creating a good detector is largely about creating a high quality
// training dataset. // training dataset.
} }
} }
@ -226,7 +230,7 @@ int main(int argc, char** argv) try
// each of the sliding windows needs to be so as to be able to detect all the vehicles. // each of the sliding windows needs to be so as to be able to detect all the vehicles.
// Since our dataset has basically these 3 different aspect ratios, it will decide to // Since our dataset has basically these 3 different aspect ratios, it will decide to
// use 3 different sliding windows. This means the final con layer in the network will // use 3 different sliding windows. This means the final con layer in the network will
// have 3 filters, one for each of these aspect ratios. // have 3 filters, one for each of these aspect ratios.
// //
// Another thing to consider when setting the sliding window size is the "stride" of // Another thing to consider when setting the sliding window size is the "stride" of
// your network. The network we defined above downsamples the image by a factor of 8x // your network. The network we defined above downsamples the image by a factor of 8x
@ -237,7 +241,7 @@ int main(int argc, char** argv) try
// pixels at a time when scanning. This is obviously a problem since 75% of the image // pixels at a time when scanning. This is obviously a problem since 75% of the image
// won't even be visited by the sliding window. So you need to set the window size to // won't even be visited by the sliding window. So you need to set the window size to
// be big enough relative to the stride of your network. In our case, the windows are // be big enough relative to the stride of your network. In our case, the windows are
// at least 30 pixels in length, so being moved by 8 pixel steps is fine. // at least 30 pixels in length, so being moved by 8 pixel steps is fine.
mmod_options options(boxes_train, 70, 30); mmod_options options(boxes_train, 70, 30);
@ -247,22 +251,22 @@ int main(int argc, char** argv) try
// also contained a lot of ignore boxes. Some of them are large boxes that encompass // also contained a lot of ignore boxes. Some of them are large boxes that encompass
// large parts of an image and the intention is to have everything inside those boxes // large parts of an image and the intention is to have everything inside those boxes
// be ignored. Therefore, we need to tell the MMOD algorithm to do that, which we do // be ignored. Therefore, we need to tell the MMOD algorithm to do that, which we do
// by setting options.overlaps_ignore appropriately. // by setting options.overlaps_ignore appropriately.
// //
// But first, we need to understand exactly what this option does. The MMOD loss // But first, we need to understand exactly what this option does. The MMOD loss
// is essentially counting the number of false alarms + missed detections produced by // is essentially counting the number of false alarms + missed detections produced by
// the detector for each image. During training, the code is running the detector on // the detector for each image. During training, the code is running the detector on
// each image in a mini-batch and looking at its output and counting the number of // each image in a mini-batch and looking at its output and counting the number of
// mistakes. The optimizer tries to find parameters settings that minimize the number // mistakes. The optimizer tries to find parameters settings that minimize the number
// of detector mistakes. // of detector mistakes.
// //
// This overlaps_ignore option allows you to tell the loss that some outputs from the // This overlaps_ignore option allows you to tell the loss that some outputs from the
// detector should be totally ignored, as if they never happened. In particular, if a // detector should be totally ignored, as if they never happened. In particular, if a
// detection overlaps a box in the training data with ignore==true then that detection // detection overlaps a box in the training data with ignore==true then that detection
// is ignored. This overlap is determined by calling // is ignored. This overlap is determined by calling
// options.overlaps_ignore(the_detection, the_ignored_training_box). If it returns // options.overlaps_ignore(the_detection, the_ignored_training_box). If it returns
// true then that detection is ignored. // true then that detection is ignored.
// //
// You should read the documentation for test_box_overlap, the class type for // You should read the documentation for test_box_overlap, the class type for
// overlaps_ignore for full details. However, the gist is that the default behavior is // overlaps_ignore for full details. However, the gist is that the default behavior is
// to only consider boxes as overlapping if their intersection over union is > 0.5. // to only consider boxes as overlapping if their intersection over union is > 0.5.
@ -275,7 +279,7 @@ int main(int argc, char** argv) try
net_type net(options); net_type net(options);
// The final layer of the network must be a con layer that contains // The final layer of the network must be a con layer that contains
// options.detector_windows.size() filters. This is because these final filters are // options.detector_windows.size() filters. This is because these final filters are
// what perform the final "sliding window" detection in the network. For the dlib // what perform the final "sliding window" detection in the network. For the dlib
// vehicle dataset, there will be 3 sliding window detectors, so we will be setting // vehicle dataset, there will be 3 sliding window detectors, so we will be setting
@ -306,13 +310,13 @@ int main(int argc, char** argv) try
std::vector<matrix<rgb_pixel>> mini_batch_samples; std::vector<matrix<rgb_pixel>> mini_batch_samples;
std::vector<std::vector<mmod_rect>> mini_batch_labels; std::vector<std::vector<mmod_rect>> mini_batch_labels;
random_cropper cropper; random_cropper cropper;
cropper.set_seed(time(0)); cropper.set_seed(time(0));
cropper.set_chip_dims(350, 350); cropper.set_chip_dims(350, 350);
// Usually you want to give the cropper whatever min sizes you passed to the // Usually you want to give the cropper whatever min sizes you passed to the
// mmod_options constructor, or very slightly smaller sizes, which is what we do here. // mmod_options constructor, or very slightly smaller sizes, which is what we do here.
cropper.set_min_object_size(69,28); cropper.set_min_object_size(69,28);
cropper.set_max_rotation_degrees(2); cropper.set_max_rotation_degrees(2);
dlib::rand rnd; dlib::rand rnd;
@ -320,10 +324,10 @@ int main(int argc, char** argv) try
cout << trainer << cropper << endl; cout << trainer << cropper << endl;
int cnt = 1; int cnt = 1;
// Run the trainer until the learning rate gets small. // Run the trainer until the learning rate gets small.
while(trainer.get_learning_rate() >= 1e-4) while(trainer.get_learning_rate() >= 1e-4)
{ {
// Every 30 mini-batches we do a testing mini-batch. // Every 30 mini-batches we do a testing mini-batch.
if (cnt%30 != 0 || images_test.size() == 0) if (cnt%30 != 0 || images_test.size() == 0)
{ {
cropper(87, images_train, boxes_train, mini_batch_samples, mini_batch_labels); cropper(87, images_train, boxes_train, mini_batch_samples, mini_batch_labels);
@ -375,7 +379,7 @@ int main(int argc, char** argv) try
cout << "\nsync_filename: " << sync_filename << endl; cout << "\nsync_filename: " << sync_filename << endl;
cout << "num training images: "<< images_train.size() << endl; cout << "num training images: "<< images_train.size() << endl;
cout << "training results: " << test_object_detection_function(net, images_train, boxes_train, test_box_overlap(), 0, options.overlaps_ignore); cout << "training results: " << test_object_detection_function(net, images_train, boxes_train, test_box_overlap(), 0, options.overlaps_ignore);
// Upsampling the data will allow the detector to find smaller cars. Recall that // Upsampling the data will allow the detector to find smaller cars. Recall that
// we configured it to use a sliding window nominally 70 pixels in size. So upsampling // we configured it to use a sliding window nominally 70 pixels in size. So upsampling
// here will let it find things nominally 35 pixels in size. Although we include a // here will let it find things nominally 35 pixels in size. Although we include a
// limit of 1800*1800 here which means "don't upsample an image if it's already larger // limit of 1800*1800 here which means "don't upsample an image if it's already larger
@ -405,11 +409,11 @@ int main(int argc, char** argv) try
Also, the training and testing accuracies were: Also, the training and testing accuracies were:
num training images: 2217 num training images: 2217
training results: 0.990738 0.736431 0.736073 training results: 0.990738 0.736431 0.736073
training upsampled results: 0.986837 0.937694 0.936912 training upsampled results: 0.986837 0.937694 0.936912
num testing images: 135 num testing images: 135
testing results: 0.988827 0.471372 0.470806 testing results: 0.988827 0.471372 0.470806
testing upsampled results: 0.987879 0.651132 0.650399 testing upsampled results: 0.987879 0.651132 0.650399
*/ */
return 0; return 0;