mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
debug test path
This commit is contained in:
parent
e532993364
commit
6d5b4e7a1f
1
.github/workflows/ubuntu.yml
vendored
1
.github/workflows/ubuntu.yml
vendored
@ -124,6 +124,7 @@ jobs:
|
|||||||
- name: Build tests
|
- name: Build tests
|
||||||
working-directory: ${{ github.workspace }}/test
|
working-directory: ${{ github.workspace }}/test
|
||||||
run: |
|
run: |
|
||||||
|
echo `pwd`
|
||||||
cmake . -B ${{ env.build_dir }} -DCMAKE_BUILD_TYPE=${{ env.config }} -G Ninja
|
cmake . -B ${{ env.build_dir }} -DCMAKE_BUILD_TYPE=${{ env.config }} -G Ninja
|
||||||
cmake --build .
|
cmake --build .
|
||||||
|
|
||||||
|
@ -17,147 +17,147 @@ add_subdirectory(.. dlib_build)
|
|||||||
# This variable contains a list of all the tests we are building
|
# This variable contains a list of all the tests we are building
|
||||||
# into the regression test suite.
|
# into the regression test suite.
|
||||||
set (tests
|
set (tests
|
||||||
example.cpp
|
# example.cpp
|
||||||
active_learning.cpp
|
# active_learning.cpp
|
||||||
any.cpp
|
# any.cpp
|
||||||
any_function.cpp
|
# any_function.cpp
|
||||||
array2d.cpp
|
# array2d.cpp
|
||||||
array.cpp
|
# array.cpp
|
||||||
assignment_learning.cpp
|
# assignment_learning.cpp
|
||||||
base64.cpp
|
# base64.cpp
|
||||||
bayes_nets.cpp
|
# bayes_nets.cpp
|
||||||
bigint.cpp
|
# bigint.cpp
|
||||||
binary_search_tree_kernel_1a.cpp
|
# binary_search_tree_kernel_1a.cpp
|
||||||
binary_search_tree_kernel_2a.cpp
|
# binary_search_tree_kernel_2a.cpp
|
||||||
binary_search_tree_mm1.cpp
|
# binary_search_tree_mm1.cpp
|
||||||
binary_search_tree_mm2.cpp
|
# binary_search_tree_mm2.cpp
|
||||||
bridge.cpp
|
# bridge.cpp
|
||||||
bsp.cpp
|
# bsp.cpp
|
||||||
byte_orderer.cpp
|
# byte_orderer.cpp
|
||||||
cca.cpp
|
# cca.cpp
|
||||||
clustering.cpp
|
# clustering.cpp
|
||||||
cmd_line_parser.cpp
|
# cmd_line_parser.cpp
|
||||||
cmd_line_parser_wchar_t.cpp
|
# cmd_line_parser_wchar_t.cpp
|
||||||
compress_stream.cpp
|
# compress_stream.cpp
|
||||||
conditioning_class_c.cpp
|
# conditioning_class_c.cpp
|
||||||
conditioning_class.cpp
|
# conditioning_class.cpp
|
||||||
config_reader.cpp
|
# config_reader.cpp
|
||||||
correlation_tracker.cpp
|
# correlation_tracker.cpp
|
||||||
crc32.cpp
|
# crc32.cpp
|
||||||
create_iris_datafile.cpp
|
# create_iris_datafile.cpp
|
||||||
data_io.cpp
|
# data_io.cpp
|
||||||
directed_graph.cpp
|
# directed_graph.cpp
|
||||||
discriminant_pca.cpp
|
# discriminant_pca.cpp
|
||||||
disjoint_subsets.cpp
|
# disjoint_subsets.cpp
|
||||||
disjoint_subsets_sized.cpp
|
# disjoint_subsets_sized.cpp
|
||||||
ekm_and_lisf.cpp
|
# ekm_and_lisf.cpp
|
||||||
empirical_kernel_map.cpp
|
# empirical_kernel_map.cpp
|
||||||
entropy_coder.cpp
|
# entropy_coder.cpp
|
||||||
entropy_encoder_model.cpp
|
# entropy_encoder_model.cpp
|
||||||
example_args.cpp
|
# example_args.cpp
|
||||||
face.cpp
|
# face.cpp
|
||||||
fft.cpp
|
# fft.cpp
|
||||||
fhog.cpp
|
# fhog.cpp
|
||||||
filtering.cpp
|
# filtering.cpp
|
||||||
find_max_factor_graph_nmplp.cpp
|
# find_max_factor_graph_nmplp.cpp
|
||||||
find_max_factor_graph_viterbi.cpp
|
# find_max_factor_graph_viterbi.cpp
|
||||||
geometry.cpp
|
# geometry.cpp
|
||||||
graph.cpp
|
# graph.cpp
|
||||||
graph_cuts.cpp
|
# graph_cuts.cpp
|
||||||
graph_labeler.cpp
|
# graph_labeler.cpp
|
||||||
hash.cpp
|
# hash.cpp
|
||||||
hash_map.cpp
|
# hash_map.cpp
|
||||||
hash_set.cpp
|
# hash_set.cpp
|
||||||
hash_table.cpp
|
# hash_table.cpp
|
||||||
hog_image.cpp
|
# hog_image.cpp
|
||||||
image.cpp
|
# image.cpp
|
||||||
iosockstream.cpp
|
# iosockstream.cpp
|
||||||
is_same_object.cpp
|
# is_same_object.cpp
|
||||||
isotonic_regression.cpp
|
# isotonic_regression.cpp
|
||||||
kcentroid.cpp
|
# kcentroid.cpp
|
||||||
kernel_matrix.cpp
|
# kernel_matrix.cpp
|
||||||
kmeans.cpp
|
# kmeans.cpp
|
||||||
learning_to_track.cpp
|
# learning_to_track.cpp
|
||||||
least_squares.cpp
|
# least_squares.cpp
|
||||||
linear_manifold_regularizer.cpp
|
# linear_manifold_regularizer.cpp
|
||||||
lspi.cpp
|
# lspi.cpp
|
||||||
lz77_buffer.cpp
|
# lz77_buffer.cpp
|
||||||
map.cpp
|
# map.cpp
|
||||||
matrix2.cpp
|
# matrix2.cpp
|
||||||
matrix3.cpp
|
# matrix3.cpp
|
||||||
matrix4.cpp
|
# matrix4.cpp
|
||||||
matrix_chol.cpp
|
# matrix_chol.cpp
|
||||||
matrix.cpp
|
# matrix.cpp
|
||||||
matrix_eig.cpp
|
# matrix_eig.cpp
|
||||||
matrix_lu.cpp
|
# matrix_lu.cpp
|
||||||
matrix_qr.cpp
|
# matrix_qr.cpp
|
||||||
max_cost_assignment.cpp
|
# max_cost_assignment.cpp
|
||||||
max_sum_submatrix.cpp
|
# max_sum_submatrix.cpp
|
||||||
md5.cpp
|
# md5.cpp
|
||||||
member_function_pointer.cpp
|
# member_function_pointer.cpp
|
||||||
metaprogramming.cpp
|
# metaprogramming.cpp
|
||||||
mpc.cpp
|
# mpc.cpp
|
||||||
multithreaded_object.cpp
|
# multithreaded_object.cpp
|
||||||
numerical_integration.cpp
|
# numerical_integration.cpp
|
||||||
object_detector.cpp
|
# object_detector.cpp
|
||||||
oca.cpp
|
# oca.cpp
|
||||||
one_vs_all_trainer.cpp
|
# one_vs_all_trainer.cpp
|
||||||
one_vs_one_trainer.cpp
|
# one_vs_one_trainer.cpp
|
||||||
optimization.cpp
|
# optimization.cpp
|
||||||
optimization_test_functions.cpp
|
# optimization_test_functions.cpp
|
||||||
global_optimization.cpp
|
# global_optimization.cpp
|
||||||
opt_qp_solver.cpp
|
# opt_qp_solver.cpp
|
||||||
parallel_for.cpp
|
# parallel_for.cpp
|
||||||
parse.cpp
|
# parse.cpp
|
||||||
pipe.cpp
|
# pipe.cpp
|
||||||
pixel.cpp
|
# pixel.cpp
|
||||||
probabilistic.cpp
|
# probabilistic.cpp
|
||||||
pyramid_down.cpp
|
# pyramid_down.cpp
|
||||||
queue.cpp
|
# queue.cpp
|
||||||
rand.cpp
|
# rand.cpp
|
||||||
ranking.cpp
|
# ranking.cpp
|
||||||
read_write_mutex.cpp
|
# read_write_mutex.cpp
|
||||||
reference_counter.cpp
|
# reference_counter.cpp
|
||||||
rls.cpp
|
# rls.cpp
|
||||||
random_forest.cpp
|
# random_forest.cpp
|
||||||
sammon.cpp
|
# sammon.cpp
|
||||||
scan_image.cpp
|
# scan_image.cpp
|
||||||
sequence.cpp
|
# sequence.cpp
|
||||||
sequence_labeler.cpp
|
# sequence_labeler.cpp
|
||||||
sequence_segmenter.cpp
|
# sequence_segmenter.cpp
|
||||||
serialize.cpp
|
# serialize.cpp
|
||||||
set.cpp
|
# set.cpp
|
||||||
sldf.cpp
|
# sldf.cpp
|
||||||
sliding_buffer.cpp
|
# sliding_buffer.cpp
|
||||||
sockets2.cpp
|
# sockets2.cpp
|
||||||
sockets.cpp
|
# sockets.cpp
|
||||||
sockstreambuf.cpp
|
# sockstreambuf.cpp
|
||||||
sparse_vector.cpp
|
# sparse_vector.cpp
|
||||||
stack.cpp
|
# stack.cpp
|
||||||
static_map.cpp
|
# static_map.cpp
|
||||||
static_set.cpp
|
# static_set.cpp
|
||||||
statistics.cpp
|
# statistics.cpp
|
||||||
std_vector_c.cpp
|
# std_vector_c.cpp
|
||||||
string.cpp
|
# string.cpp
|
||||||
svm_c_linear.cpp
|
# svm_c_linear.cpp
|
||||||
svm_c_linear_dcd.cpp
|
# svm_c_linear_dcd.cpp
|
||||||
svm.cpp
|
# svm.cpp
|
||||||
svm_multiclass_linear.cpp
|
# svm_multiclass_linear.cpp
|
||||||
svm_struct.cpp
|
# svm_struct.cpp
|
||||||
svr_linear_trainer.cpp
|
# svr_linear_trainer.cpp
|
||||||
symmetric_matrix_cache.cpp
|
# symmetric_matrix_cache.cpp
|
||||||
thread_pool.cpp
|
# thread_pool.cpp
|
||||||
threads.cpp
|
# threads.cpp
|
||||||
timer.cpp
|
# timer.cpp
|
||||||
tokenizer.cpp
|
# tokenizer.cpp
|
||||||
trust_region.cpp
|
# trust_region.cpp
|
||||||
tuple.cpp
|
# tuple.cpp
|
||||||
type_safe_union.cpp
|
# type_safe_union.cpp
|
||||||
vectorstream.cpp
|
# vectorstream.cpp
|
||||||
dnn.cpp
|
dnn.cpp
|
||||||
cublas.cpp
|
# cublas.cpp
|
||||||
find_optimal_parameters.cpp
|
# find_optimal_parameters.cpp
|
||||||
elastic_net.cpp
|
# elastic_net.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -170,7 +170,7 @@ if (CMAKE_COMPILER_IS_GNUCXX)
|
|||||||
add_definitions("-W -Wall")
|
add_definitions("-W -Wall")
|
||||||
# I don't care about unused testing functions though. I like to keep them
|
# I don't care about unused testing functions though. I like to keep them
|
||||||
# around. Don't warn about it.
|
# around. Don't warn about it.
|
||||||
add_definitions("-Wno-unused-function")
|
add_definitions("-Wno-unused-function -Wno-deprecated-copy -fdiagnostics-color=always")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ cmake_minimum_required(VERSION 2.8.12)
|
|||||||
# Every project needs a name. We call this the "examples" project.
|
# Every project needs a name. We call this the "examples" project.
|
||||||
project(examples)
|
project(examples)
|
||||||
|
|
||||||
|
add_compile_options (-fdiagnostics-color=always)
|
||||||
|
|
||||||
# Tell cmake we will need dlib. This command will pull in dlib and compile it
|
# Tell cmake we will need dlib. This command will pull in dlib and compile it
|
||||||
# into your project. Note that you don't need to compile or install dlib. All
|
# into your project. Note that you don't need to compile or install dlib. All
|
||||||
@ -60,10 +61,10 @@ add_subdirectory(../dlib dlib_build)
|
|||||||
# are going to compile one of the dlib example programs which has only one .cpp
|
# are going to compile one of the dlib example programs which has only one .cpp
|
||||||
# file, assignment_learning_ex.cpp. If your program consisted of multiple .cpp
|
# file, assignment_learning_ex.cpp. If your program consisted of multiple .cpp
|
||||||
# files you would simply list them here in the add_executable() statement.
|
# files you would simply list them here in the add_executable() statement.
|
||||||
add_executable(assignment_learning_ex assignment_learning_ex.cpp)
|
# add_executable(assignment_learning_ex assignment_learning_ex.cpp)
|
||||||
# Finally, you need to tell CMake that this program, assignment_learning_ex,
|
# Finally, you need to tell CMake that this program, assignment_learning_ex,
|
||||||
# depends on dlib. You do that with this statement:
|
# depends on dlib. You do that with this statement:
|
||||||
target_link_libraries(assignment_learning_ex dlib::dlib)
|
# target_link_libraries(assignment_learning_ex dlib::dlib)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -134,132 +135,135 @@ endmacro()
|
|||||||
# like this:
|
# like this:
|
||||||
# cmake .. -G "Visual Studio 14 2015 Win64" -T host=x64
|
# cmake .. -G "Visual Studio 14 2015 Win64" -T host=x64
|
||||||
if (NOT USING_OLD_VISUAL_STUDIO_COMPILER)
|
if (NOT USING_OLD_VISUAL_STUDIO_COMPILER)
|
||||||
add_example(dnn_metric_learning_ex)
|
# add_example(dnn_metric_learning_ex)
|
||||||
add_gui_example(dnn_face_recognition_ex)
|
# add_gui_example(dnn_face_recognition_ex)
|
||||||
add_example(dnn_introduction_ex)
|
add_example(dnn_introduction_ex)
|
||||||
add_example(dnn_introduction2_ex)
|
# add_example(dnn_introduction2_ex)
|
||||||
add_example(dnn_introduction3_ex)
|
# add_example(dnn_introduction3_ex)
|
||||||
add_example(dnn_inception_ex)
|
# add_example(dnn_inception_ex)
|
||||||
add_gui_example(dnn_mmod_ex)
|
# add_gui_example(dnn_mmod_ex)
|
||||||
add_gui_example(dnn_mmod_face_detection_ex)
|
# add_gui_example(dnn_mmod_face_detection_ex)
|
||||||
add_gui_example(random_cropper_ex)
|
# add_gui_example(random_cropper_ex)
|
||||||
add_gui_example(dnn_mmod_dog_hipsterizer)
|
# add_gui_example(dnn_mmod_dog_hipsterizer)
|
||||||
add_gui_example(dnn_imagenet_ex)
|
add_gui_example(dnn_imagenet_ex)
|
||||||
add_gui_example(dnn_mmod_find_cars_ex)
|
# add_gui_example(dnn_mmod_find_cars_ex)
|
||||||
add_gui_example(dnn_mmod_find_cars2_ex)
|
# add_gui_example(dnn_mmod_find_cars2_ex)
|
||||||
add_example(dnn_mmod_train_find_cars_ex)
|
# add_example(dnn_mmod_train_find_cars_ex)
|
||||||
add_gui_example(dnn_semantic_segmentation_ex)
|
# add_gui_example(dnn_semantic_segmentation_ex)
|
||||||
add_gui_example(dnn_instance_segmentation_ex)
|
# add_gui_example(dnn_instance_segmentation_ex)
|
||||||
add_example(dnn_imagenet_train_ex)
|
# add_example(dnn_imagenet_train_ex)
|
||||||
add_example(dnn_semantic_segmentation_train_ex)
|
# add_example(dnn_semantic_segmentation_train_ex)
|
||||||
add_example(dnn_instance_segmentation_train_ex)
|
# add_example(dnn_instance_segmentation_train_ex)
|
||||||
add_example(dnn_metric_learning_on_images_ex)
|
# add_example(dnn_metric_learning_on_images_ex)
|
||||||
add_gui_example(dnn_dcgan_train_ex)
|
# add_gui_example(dnn_dcgan_train_ex)
|
||||||
|
# add_gui_example(dnn_neural_style_transfer_ex)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
if (DLIB_NO_GUI_SUPPORT)
|
# if (DLIB_NO_GUI_SUPPORT)
|
||||||
message("No GUI support, so we won't build the webcam_face_pose_ex example.")
|
# message("No GUI support, so we won't build the webcam_face_pose_ex example.")
|
||||||
else()
|
# else()
|
||||||
find_package(OpenCV QUIET)
|
# find_package(OpenCV QUIET)
|
||||||
if (OpenCV_FOUND)
|
# if (OpenCV_FOUND)
|
||||||
include_directories(${OpenCV_INCLUDE_DIRS})
|
# include_directories(${OpenCV_INCLUDE_DIRS})
|
||||||
|
|
||||||
add_executable(webcam_face_pose_ex webcam_face_pose_ex.cpp)
|
# add_executable(webcam_face_pose_ex webcam_face_pose_ex.cpp)
|
||||||
target_link_libraries(webcam_face_pose_ex dlib::dlib ${OpenCV_LIBS} )
|
# target_link_libraries(webcam_face_pose_ex dlib::dlib ${OpenCV_LIBS} )
|
||||||
else()
|
# else()
|
||||||
message("OpenCV not found, so we won't build the webcam_face_pose_ex example.")
|
# message("OpenCV not found, so we won't build the webcam_face_pose_ex example.")
|
||||||
endif()
|
# endif()
|
||||||
endif()
|
# endif()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#here we apply our macros
|
# #here we apply our macros
|
||||||
add_gui_example(3d_point_cloud_ex)
|
# add_gui_example(3d_point_cloud_ex)
|
||||||
add_example(bayes_net_ex)
|
# add_example(bayes_net_ex)
|
||||||
add_example(bayes_net_from_disk_ex)
|
# add_example(bayes_net_from_disk_ex)
|
||||||
add_gui_example(bayes_net_gui_ex)
|
# add_gui_example(bayes_net_gui_ex)
|
||||||
add_example(bridge_ex)
|
# add_example(bridge_ex)
|
||||||
add_example(bsp_ex)
|
# add_example(bsp_ex)
|
||||||
add_example(compress_stream_ex)
|
# add_example(compress_stream_ex)
|
||||||
add_example(config_reader_ex)
|
# add_example(config_reader_ex)
|
||||||
add_example(custom_trainer_ex)
|
# add_example(custom_trainer_ex)
|
||||||
add_example(dir_nav_ex)
|
# add_example(dir_nav_ex)
|
||||||
add_example(empirical_kernel_map_ex)
|
# add_example(empirical_kernel_map_ex)
|
||||||
add_gui_example(face_detection_ex)
|
# add_gui_example(face_detection_ex)
|
||||||
add_gui_example(face_landmark_detection_ex)
|
# add_gui_example(face_landmark_detection_ex)
|
||||||
add_gui_example(fhog_ex)
|
# add_gui_example(fhog_ex)
|
||||||
add_gui_example(fhog_object_detector_ex)
|
# add_gui_example(fhog_object_detector_ex)
|
||||||
add_example(file_to_code_ex)
|
# add_example(file_to_code_ex)
|
||||||
add_example(graph_labeling_ex)
|
# add_example(graph_labeling_ex)
|
||||||
add_gui_example(gui_api_ex)
|
add_gui_example(gui_api_ex)
|
||||||
add_gui_example(hough_transform_ex)
|
# add_gui_example(hough_transform_ex)
|
||||||
add_gui_example(image_ex)
|
# add_gui_example(image_ex)
|
||||||
add_example(integrate_function_adapt_simp_ex)
|
# add_example(integrate_function_adapt_simp_ex)
|
||||||
add_example(iosockstream_ex)
|
# add_example(iosockstream_ex)
|
||||||
add_example(kcentroid_ex)
|
# add_example(kcentroid_ex)
|
||||||
add_example(kkmeans_ex)
|
# add_example(kkmeans_ex)
|
||||||
add_example(krls_ex)
|
# add_example(krls_ex)
|
||||||
add_example(krls_filter_ex)
|
# add_example(krls_filter_ex)
|
||||||
add_example(krr_classification_ex)
|
# add_example(krr_classification_ex)
|
||||||
add_example(krr_regression_ex)
|
# add_example(krr_regression_ex)
|
||||||
add_example(learning_to_track_ex)
|
# add_example(learning_to_track_ex)
|
||||||
add_example(least_squares_ex)
|
# add_example(least_squares_ex)
|
||||||
add_example(linear_manifold_regularizer_ex)
|
# add_example(linear_manifold_regularizer_ex)
|
||||||
add_example(logger_custom_output_ex)
|
# add_example(logger_custom_output_ex)
|
||||||
add_example(logger_ex)
|
# add_example(logger_ex)
|
||||||
add_example(logger_ex_2)
|
# add_example(logger_ex_2)
|
||||||
add_example(matrix_ex)
|
# add_example(matrix_ex)
|
||||||
add_example(matrix_expressions_ex)
|
# add_example(matrix_expressions_ex)
|
||||||
add_example(max_cost_assignment_ex)
|
# add_example(max_cost_assignment_ex)
|
||||||
add_example(member_function_pointer_ex)
|
# add_example(member_function_pointer_ex)
|
||||||
add_example(mlp_ex)
|
# add_example(mlp_ex)
|
||||||
add_example(model_selection_ex)
|
# add_example(model_selection_ex)
|
||||||
add_gui_example(mpc_ex)
|
# add_gui_example(mpc_ex)
|
||||||
add_example(multiclass_classification_ex)
|
# add_example(multiclass_classification_ex)
|
||||||
add_example(multithreaded_object_ex)
|
# add_example(multithreaded_object_ex)
|
||||||
add_gui_example(object_detector_advanced_ex)
|
# add_gui_example(object_detector_advanced_ex)
|
||||||
add_gui_example(object_detector_ex)
|
# add_gui_example(object_detector_ex)
|
||||||
add_gui_example(one_class_classifiers_ex)
|
# add_gui_example(one_class_classifiers_ex)
|
||||||
add_example(optimization_ex)
|
# add_example(optimization_ex)
|
||||||
add_example(parallel_for_ex)
|
# add_example(parallel_for_ex)
|
||||||
add_example(pipe_ex)
|
# add_example(pipe_ex)
|
||||||
add_example(pipe_ex_2)
|
# add_example(pipe_ex_2)
|
||||||
add_example(quantum_computing_ex)
|
# add_example(quantum_computing_ex)
|
||||||
add_example(queue_ex)
|
# add_example(queue_ex)
|
||||||
add_example(rank_features_ex)
|
# add_example(rank_features_ex)
|
||||||
add_example(running_stats_ex)
|
# add_example(running_stats_ex)
|
||||||
add_example(rvm_ex)
|
# add_example(rvm_ex)
|
||||||
add_example(rvm_regression_ex)
|
# add_example(rvm_regression_ex)
|
||||||
add_example(sequence_labeler_ex)
|
# add_example(sequence_labeler_ex)
|
||||||
add_example(sequence_segmenter_ex)
|
# add_example(sequence_segmenter_ex)
|
||||||
add_example(server_http_ex)
|
# add_example(server_http_ex)
|
||||||
add_example(server_iostream_ex)
|
# add_example(server_iostream_ex)
|
||||||
add_example(sockets_ex)
|
# add_example(sockets_ex)
|
||||||
add_example(sockstreambuf_ex)
|
# add_example(sockstreambuf_ex)
|
||||||
add_example(std_allocator_ex)
|
# add_example(std_allocator_ex)
|
||||||
add_gui_example(surf_ex)
|
# add_gui_example(surf_ex)
|
||||||
add_example(svm_c_ex)
|
# add_example(svm_c_ex)
|
||||||
add_example(svm_ex)
|
# add_example(svm_ex)
|
||||||
add_example(svm_pegasos_ex)
|
# add_example(svm_pegasos_ex)
|
||||||
add_example(svm_rank_ex)
|
# add_example(svm_rank_ex)
|
||||||
add_example(svm_sparse_ex)
|
# add_example(svm_sparse_ex)
|
||||||
add_example(svm_struct_ex)
|
# add_example(svm_struct_ex)
|
||||||
add_example(svr_ex)
|
# add_example(svr_ex)
|
||||||
add_example(thread_function_ex)
|
# add_example(thread_function_ex)
|
||||||
add_example(thread_pool_ex)
|
# add_example(thread_pool_ex)
|
||||||
add_example(threaded_object_ex)
|
# add_example(threaded_object_ex)
|
||||||
add_example(threads_ex)
|
# add_example(threads_ex)
|
||||||
add_example(timer_ex)
|
# add_example(timer_ex)
|
||||||
add_gui_example(train_object_detector)
|
# add_gui_example(train_object_detector)
|
||||||
add_example(train_shape_predictor_ex)
|
# add_example(train_shape_predictor_ex)
|
||||||
add_example(using_custom_kernels_ex)
|
# add_example(using_custom_kernels_ex)
|
||||||
add_gui_example(video_tracking_ex)
|
# add_gui_example(video_tracking_ex)
|
||||||
add_example(xml_parser_ex)
|
# add_example(xml_parser_ex)
|
||||||
|
# add_example(dnn_graph_visitor_ex)
|
||||||
|
add_example(playground)
|
||||||
|
|
||||||
|
|
||||||
if (DLIB_LINK_WITH_SQLITE3)
|
# if (DLIB_LINK_WITH_SQLITE3)
|
||||||
add_example(sqlite_ex)
|
# add_example(sqlite_ex)
|
||||||
endif()
|
# endif()
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
For an introduction to dlib's DNN module read the dnn_introduction_ex.cpp and
|
For an introduction to dlib's DNN module read the dnn_introduction_ex.cpp and
|
||||||
dnn_introduction2_ex.cpp example programs.
|
dnn_introduction2_ex.cpp example programs.
|
||||||
|
|
||||||
|
|
||||||
Finally, these tools will use CUDA and cuDNN to drastically accelerate
|
Finally, these tools will use CUDA and cuDNN to drastically accelerate
|
||||||
network training and testing. CMake should automatically find them if they
|
network training and testing. CMake should automatically find them if they
|
||||||
are installed and configure things appropriately. If not, the program will
|
are installed and configure things appropriately. If not, the program will
|
||||||
@ -30,7 +30,7 @@
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace dlib;
|
using namespace dlib;
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
// This block of statements defines the resnet-34 network
|
// This block of statements defines the resnet-34 network
|
||||||
@ -41,7 +41,7 @@ using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;
|
|||||||
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
|
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
|
||||||
using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>;
|
using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>;
|
||||||
|
|
||||||
template <int N, template <typename> class BN, int stride, typename SUBNET>
|
template <int N, template <typename> class BN, int stride, typename SUBNET>
|
||||||
using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;
|
using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;
|
||||||
|
|
||||||
template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>;
|
template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>;
|
||||||
@ -130,7 +130,7 @@ int main(int argc, char** argv) try
|
|||||||
// already does this. But if we instead want to get the probability of each
|
// already does this. But if we instead want to get the probability of each
|
||||||
// class as output we need to replace the last layer of the network with a
|
// class as output we need to replace the last layer of the network with a
|
||||||
// softmax layer, which we do as follows:
|
// softmax layer, which we do as follows:
|
||||||
softmax<anet_type::subnet_type> snet;
|
softmax<anet_type::subnet_type> snet;
|
||||||
snet.subnet() = net.subnet();
|
snet.subnet() = net.subnet();
|
||||||
|
|
||||||
dlib::array<matrix<rgb_pixel>> images;
|
dlib::array<matrix<rgb_pixel>> images;
|
||||||
@ -150,17 +150,24 @@ int main(int argc, char** argv) try
|
|||||||
// p(i) == the probability the image contains object of class i.
|
// p(i) == the probability the image contains object of class i.
|
||||||
matrix<float,1,1000> p = sum_rows(mat(snet(images.begin(), images.end())))/num_crops;
|
matrix<float,1,1000> p = sum_rows(mat(snet(images.begin(), images.end())))/num_crops;
|
||||||
|
|
||||||
win.set_image(img);
|
// win.set_image(img);
|
||||||
|
bool keep = false;
|
||||||
// Print the 5 most probable labels
|
// Print the 5 most probable labels
|
||||||
for (int k = 0; k < 5; ++k)
|
for (int k = 0; k < 5; ++k)
|
||||||
{
|
{
|
||||||
unsigned long predicted_label = index_of_max(p);
|
unsigned long predicted_label = index_of_max(p);
|
||||||
cout << p(predicted_label) << ": " << labels[predicted_label] << endl;
|
// cout << p(predicted_label) << ": " << labels[predicted_label] << endl;
|
||||||
p(predicted_label) = 0;
|
p(predicted_label) = 0;
|
||||||
|
if (labels[predicted_label] == "racket" or labels[predicted_label] == "tennis_ball")
|
||||||
|
keep = true;
|
||||||
}
|
}
|
||||||
|
if (not keep)
|
||||||
cout << "Hit enter to process the next image";
|
{
|
||||||
cin.get();
|
std::remove(argv[i]);
|
||||||
|
cout << "removing " << argv[i] << '\n';
|
||||||
|
}
|
||||||
|
// cout << "Hit enter to process the next image";
|
||||||
|
// cin.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
The specific network we will run is from the paper
|
The specific network we will run is from the paper
|
||||||
LeCun, Yann, et al. "Gradient-based learning applied to document recognition."
|
LeCun, Yann, et al. "Gradient-based learning applied to document recognition."
|
||||||
Proceedings of the IEEE 86.11 (1998): 2278-2324.
|
Proceedings of the IEEE 86.11 (1998): 2278-2324.
|
||||||
except that we replace the sigmoid non-linearities with rectified linear units.
|
except that we replace the sigmoid non-linearities with rectified linear units.
|
||||||
|
|
||||||
These tools will use CUDA and cuDNN to drastically accelerate network
|
These tools will use CUDA and cuDNN to drastically accelerate network
|
||||||
training and testing. CMake should automatically find them if they are
|
training and testing. CMake should automatically find them if they are
|
||||||
@ -24,10 +24,10 @@
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace dlib;
|
using namespace dlib;
|
||||||
|
|
||||||
int main(int argc, char** argv) try
|
int main(int argc, char** argv) try
|
||||||
{
|
{
|
||||||
// This example is going to run on the MNIST dataset.
|
// This example is going to run on the MNIST dataset.
|
||||||
if (argc != 2)
|
if (argc != 2)
|
||||||
{
|
{
|
||||||
cout << "This example needs the MNIST dataset to run!" << endl;
|
cout << "This example needs the MNIST dataset to run!" << endl;
|
||||||
@ -50,8 +50,8 @@ int main(int argc, char** argv) try
|
|||||||
|
|
||||||
// Now let's define the LeNet. Broadly speaking, there are 3 parts to a network
|
// Now let's define the LeNet. Broadly speaking, there are 3 parts to a network
|
||||||
// definition. The loss layer, a bunch of computational layers, and then an input
|
// definition. The loss layer, a bunch of computational layers, and then an input
|
||||||
// layer. You can see these components in the network definition below.
|
// layer. You can see these components in the network definition below.
|
||||||
//
|
//
|
||||||
// The input layer here says the network expects to be given matrix<unsigned char>
|
// The input layer here says the network expects to be given matrix<unsigned char>
|
||||||
// objects as input. In general, you can use any dlib image or matrix type here, or
|
// objects as input. In general, you can use any dlib image or matrix type here, or
|
||||||
// even define your own types by creating custom input layers.
|
// even define your own types by creating custom input layers.
|
||||||
@ -59,29 +59,29 @@ int main(int argc, char** argv) try
|
|||||||
// Then the middle layers define the computation the network will do to transform the
|
// Then the middle layers define the computation the network will do to transform the
|
||||||
// input into whatever we want. Here we run the image through multiple convolutions,
|
// input into whatever we want. Here we run the image through multiple convolutions,
|
||||||
// ReLU units, max pooling operations, and then finally a fully connected layer that
|
// ReLU units, max pooling operations, and then finally a fully connected layer that
|
||||||
// converts the whole thing into just 10 numbers.
|
// converts the whole thing into just 10 numbers.
|
||||||
//
|
//
|
||||||
// Finally, the loss layer defines the relationship between the network outputs, our 10
|
// Finally, the loss layer defines the relationship between the network outputs, our 10
|
||||||
// numbers, and the labels in our dataset. Since we selected loss_multiclass_log it
|
// numbers, and the labels in our dataset. Since we selected loss_multiclass_log it
|
||||||
// means we want to do multiclass classification with our network. Moreover, the
|
// means we want to do multiclass classification with our network. Moreover, the
|
||||||
// number of network outputs (i.e. 10) is the number of possible labels. Whichever
|
// number of network outputs (i.e. 10) is the number of possible labels. Whichever
|
||||||
// network output is largest is the predicted label. So for example, if the first
|
// network output is largest is the predicted label. So for example, if the first
|
||||||
// network output is largest then the predicted digit is 0, if the last network output
|
// network output is largest then the predicted digit is 0, if the last network output
|
||||||
// is largest then the predicted digit is 9.
|
// is largest then the predicted digit is 9.
|
||||||
using net_type = loss_multiclass_log<
|
using net_type = loss_multiclass_log<
|
||||||
fc<10,
|
fc<10,
|
||||||
relu<fc<84,
|
elu<fc<84,
|
||||||
relu<fc<120,
|
elu<fc<120,
|
||||||
max_pool<2,2,2,2,relu<con<16,5,5,1,1,
|
max_pool<2,2,2,2,elu<con<16,5,5,1,1,
|
||||||
max_pool<2,2,2,2,relu<con<6,5,5,1,1,
|
max_pool<2,2,2,2,elu<con<6,5,5,1,1,
|
||||||
input<matrix<unsigned char>>
|
input<matrix<unsigned char>>
|
||||||
>>>>>>>>>>>>;
|
>>>>>>>>>>>>;
|
||||||
// This net_type defines the entire network architecture. For example, the block
|
// This net_type defines the entire network architecture. For example, the block
|
||||||
// relu<fc<84,SUBNET>> means we take the output from the subnetwork, pass it through a
|
// relu<fc<84,SUBNET>> means we take the output from the subnetwork, pass it through a
|
||||||
// fully connected layer with 84 outputs, then apply ReLU. Similarly, a block of
|
// fully connected layer with 84 outputs, then apply ReLU. Similarly, a block of
|
||||||
// max_pool<2,2,2,2,relu<con<16,5,5,1,1,SUBNET>>> means we apply 16 convolutions with a
|
// max_pool<2,2,2,2,relu<con<16,5,5,1,1,SUBNET>>> means we apply 16 convolutions with a
|
||||||
// 5x5 filter size and 1x1 stride to the output of a subnetwork, then apply ReLU, then
|
// 5x5 filter size and 1x1 stride to the output of a subnetwork, then apply ReLU, then
|
||||||
// perform max pooling with a 2x2 window and 2x2 stride.
|
// perform max pooling with a 2x2 window and 2x2 stride.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -105,7 +105,7 @@ int main(int argc, char** argv) try
|
|||||||
// learning rate until the loss stops decreasing. Then it reduces the learning rate by
|
// learning rate until the loss stops decreasing. Then it reduces the learning rate by
|
||||||
// a factor of 10 and continues running until the loss stops decreasing again. It will
|
// a factor of 10 and continues running until the loss stops decreasing again. It will
|
||||||
// keep doing this until the learning rate has dropped below the min learning rate
|
// keep doing this until the learning rate has dropped below the min learning rate
|
||||||
// defined above or the maximum number of epochs as been executed (defaulted to 10000).
|
// defined above or the maximum number of epochs as been executed (defaulted to 10000).
|
||||||
trainer.train(training_images, training_labels);
|
trainer.train(training_images, training_labels);
|
||||||
|
|
||||||
// At this point our net object should have learned how to classify MNIST images. But
|
// At this point our net object should have learned how to classify MNIST images. But
|
||||||
@ -134,7 +134,7 @@ int main(int argc, char** argv) try
|
|||||||
++num_right;
|
++num_right;
|
||||||
else
|
else
|
||||||
++num_wrong;
|
++num_wrong;
|
||||||
|
|
||||||
}
|
}
|
||||||
cout << "training num_right: " << num_right << endl;
|
cout << "training num_right: " << num_right << endl;
|
||||||
cout << "training num_wrong: " << num_wrong << endl;
|
cout << "training num_wrong: " << num_wrong << endl;
|
||||||
@ -151,7 +151,7 @@ int main(int argc, char** argv) try
|
|||||||
++num_right;
|
++num_right;
|
||||||
else
|
else
|
||||||
++num_wrong;
|
++num_wrong;
|
||||||
|
|
||||||
}
|
}
|
||||||
cout << "testing num_right: " << num_right << endl;
|
cout << "testing num_right: " << num_right << endl;
|
||||||
cout << "testing num_wrong: " << num_wrong << endl;
|
cout << "testing num_wrong: " << num_wrong << endl;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
||||||
/*
|
/*
|
||||||
This example shows how to train a CNN based object detector using dlib's
|
This example shows how to train a CNN based object detector using dlib's
|
||||||
loss_mmod loss layer. This loss layer implements the Max-Margin Object
|
loss_mmod loss layer. This loss layer implements the Max-Margin Object
|
||||||
Detection loss as described in the paper:
|
Detection loss as described in the paper:
|
||||||
Max-Margin Object Detection by Davis E. King (http://arxiv.org/abs/1502.00046).
|
Max-Margin Object Detection by Davis E. King (http://arxiv.org/abs/1502.00046).
|
||||||
@ -13,12 +13,12 @@
|
|||||||
example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp
|
example. So you should read dnn_introduction_ex.cpp and dnn_introduction2_ex.cpp
|
||||||
before reading this example program. You should also read the introductory DNN+MMOD
|
before reading this example program. You should also read the introductory DNN+MMOD
|
||||||
example dnn_mmod_ex.cpp as well before proceeding.
|
example dnn_mmod_ex.cpp as well before proceeding.
|
||||||
|
|
||||||
|
|
||||||
This example is essentially a more complex version of dnn_mmod_ex.cpp. In it we train
|
This example is essentially a more complex version of dnn_mmod_ex.cpp. In it we train
|
||||||
a detector that finds the rear ends of motor vehicles. I will also discuss some
|
a detector that finds the rear ends of motor vehicles. I will also discuss some
|
||||||
aspects of data preparation useful when training this kind of detector.
|
aspects of data preparation useful when training this kind of detector.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
@ -35,7 +35,11 @@ template <long num_filters, typename SUBNET> using con5d = con<num_filters,5,5,2
|
|||||||
template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>;
|
template <long num_filters, typename SUBNET> using con5 = con<num_filters,5,5,1,1,SUBNET>;
|
||||||
template <typename SUBNET> using downsampler = relu<bn_con<con5d<32, relu<bn_con<con5d<32, relu<bn_con<con5d<16,SUBNET>>>>>>>>>;
|
template <typename SUBNET> using downsampler = relu<bn_con<con5d<32, relu<bn_con<con5d<32, relu<bn_con<con5d<16,SUBNET>>>>>>>>>;
|
||||||
template <typename SUBNET> using rcon5 = relu<bn_con<con5<55,SUBNET>>>;
|
template <typename SUBNET> using rcon5 = relu<bn_con<con5<55,SUBNET>>>;
|
||||||
using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
|
// using net_type = loss_mmod<con<1,9,9,1,1,rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
|
||||||
|
// scale1<sig<con<55,1,1,1,1,avg_pool_everything<tag1<
|
||||||
|
using net_type = loss_mmod<con<1,9,9,1,1,
|
||||||
|
scale_prev2<skip1<tag2<sig<con<55,1,1,1,1,avg_pool_everything<tag1<
|
||||||
|
rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>>>>>>>>;
|
||||||
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------------------
|
||||||
@ -107,21 +111,21 @@ int main(int argc, char** argv) try
|
|||||||
// image is implicitly assumed to be not a car, and the algorithm will use it as
|
// image is implicitly assumed to be not a car, and the algorithm will use it as
|
||||||
// negative training data. So every car must be labeled, either with a normal
|
// negative training data. So every car must be labeled, either with a normal
|
||||||
// rectangle or an "ignore" rectangle that tells MMOD to simply ignore it (i.e. neither
|
// rectangle or an "ignore" rectangle that tells MMOD to simply ignore it (i.e. neither
|
||||||
// treat it as a thing to detect nor as negative training data).
|
// treat it as a thing to detect nor as negative training data).
|
||||||
//
|
//
|
||||||
// In our present case, many images contain very tiny cars in the distance, ones that
|
// In our present case, many images contain very tiny cars in the distance, ones that
|
||||||
// are essentially just dark smudges. It's not reasonable to expect the CNN
|
// are essentially just dark smudges. It's not reasonable to expect the CNN
|
||||||
// architecture we defined to detect such vehicles. However, I erred on the side of
|
// architecture we defined to detect such vehicles. However, I erred on the side of
|
||||||
// having more complete annotations when creating the dataset. So when I labeled these
|
// having more complete annotations when creating the dataset. So when I labeled these
|
||||||
// images I labeled many of these really difficult cases as vehicles to detect.
|
// images I labeled many of these really difficult cases as vehicles to detect.
|
||||||
//
|
//
|
||||||
// So the first thing we are going to do is clean up our dataset a little bit. In
|
// So the first thing we are going to do is clean up our dataset a little bit. In
|
||||||
// particular, we are going to mark boxes smaller than 35*35 pixels as ignore since
|
// particular, we are going to mark boxes smaller than 35*35 pixels as ignore since
|
||||||
// only really small and blurry cars appear at those sizes. We will also mark boxes
|
// only really small and blurry cars appear at those sizes. We will also mark boxes
|
||||||
// that are heavily overlapped by another box as ignore. We do this because we want to
|
// that are heavily overlapped by another box as ignore. We do this because we want to
|
||||||
// allow for stronger non-maximum suppression logic in the learned detector, since that
|
// allow for stronger non-maximum suppression logic in the learned detector, since that
|
||||||
// will help make it easier to learn a good detector.
|
// will help make it easier to learn a good detector.
|
||||||
//
|
//
|
||||||
// To explain this non-max suppression idea further it's important to understand how
|
// To explain this non-max suppression idea further it's important to understand how
|
||||||
// the detector works. Essentially, sliding window detectors scan all image locations
|
// the detector works. Essentially, sliding window detectors scan all image locations
|
||||||
// and ask "is there a car here?". If there really is a car in a specific location in
|
// and ask "is there a car here?". If there really is a car in a specific location in
|
||||||
@ -143,7 +147,7 @@ int main(int argc, char** argv) try
|
|||||||
// "close to" measure will be configured to allow detections to really overlap a whole
|
// "close to" measure will be configured to allow detections to really overlap a whole
|
||||||
// lot. On the other hand, if your dataset didn't contain any overlapped boxes at all,
|
// lot. On the other hand, if your dataset didn't contain any overlapped boxes at all,
|
||||||
// then the non-max suppression logic would be configured to filter out any boxes that
|
// then the non-max suppression logic would be configured to filter out any boxes that
|
||||||
// overlapped at all, and thus would be performing a much stronger non-max suppression.
|
// overlapped at all, and thus would be performing a much stronger non-max suppression.
|
||||||
//
|
//
|
||||||
// Why does this matter? Well, remember that we want to avoid duplicate detections.
|
// Why does this matter? Well, remember that we want to avoid duplicate detections.
|
||||||
// If non-max suppression just kills everything in a really wide area around a car then
|
// If non-max suppression just kills everything in a really wide area around a car then
|
||||||
@ -183,8 +187,8 @@ int main(int argc, char** argv) try
|
|||||||
// really extreme aspect ratios. However, some datasets do, often because of
|
// really extreme aspect ratios. However, some datasets do, often because of
|
||||||
// bad labeling. So it's a good idea to check for that and either eliminate
|
// bad labeling. So it's a good idea to check for that and either eliminate
|
||||||
// those boxes or set them to ignore. Although, this depends on your
|
// those boxes or set them to ignore. Although, this depends on your
|
||||||
// application.
|
// application.
|
||||||
//
|
//
|
||||||
// For instance, if your dataset has boxes with an aspect ratio
|
// For instance, if your dataset has boxes with an aspect ratio
|
||||||
// of 10 then you should think about what that means for the network
|
// of 10 then you should think about what that means for the network
|
||||||
// architecture. Does the receptive field even cover the entirety of the box
|
// architecture. Does the receptive field even cover the entirety of the box
|
||||||
@ -196,13 +200,13 @@ int main(int argc, char** argv) try
|
|||||||
// errors, but are annotated in a sloppy and inconsistent way. Fixing those
|
// errors, but are annotated in a sloppy and inconsistent way. Fixing those
|
||||||
// errors and inconsistencies can often greatly improve models trained from
|
// errors and inconsistencies can often greatly improve models trained from
|
||||||
// such data. It's almost always worth the time to try and improve your
|
// such data. It's almost always worth the time to try and improve your
|
||||||
// training dataset.
|
// training dataset.
|
||||||
//
|
//
|
||||||
// In any case, my point is that there are other types of dataset cleaning you
|
// In any case, my point is that there are other types of dataset cleaning you
|
||||||
// could put here. What exactly you need depends on your application. But you
|
// could put here. What exactly you need depends on your application. But you
|
||||||
// should carefully consider it and not take your dataset as a given. The work
|
// should carefully consider it and not take your dataset as a given. The work
|
||||||
// of creating a good detector is largely about creating a high quality
|
// of creating a good detector is largely about creating a high quality
|
||||||
// training dataset.
|
// training dataset.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -226,7 +230,7 @@ int main(int argc, char** argv) try
|
|||||||
// each of the sliding windows needs to be so as to be able to detect all the vehicles.
|
// each of the sliding windows needs to be so as to be able to detect all the vehicles.
|
||||||
// Since our dataset has basically these 3 different aspect ratios, it will decide to
|
// Since our dataset has basically these 3 different aspect ratios, it will decide to
|
||||||
// use 3 different sliding windows. This means the final con layer in the network will
|
// use 3 different sliding windows. This means the final con layer in the network will
|
||||||
// have 3 filters, one for each of these aspect ratios.
|
// have 3 filters, one for each of these aspect ratios.
|
||||||
//
|
//
|
||||||
// Another thing to consider when setting the sliding window size is the "stride" of
|
// Another thing to consider when setting the sliding window size is the "stride" of
|
||||||
// your network. The network we defined above downsamples the image by a factor of 8x
|
// your network. The network we defined above downsamples the image by a factor of 8x
|
||||||
@ -237,7 +241,7 @@ int main(int argc, char** argv) try
|
|||||||
// pixels at a time when scanning. This is obviously a problem since 75% of the image
|
// pixels at a time when scanning. This is obviously a problem since 75% of the image
|
||||||
// won't even be visited by the sliding window. So you need to set the window size to
|
// won't even be visited by the sliding window. So you need to set the window size to
|
||||||
// be big enough relative to the stride of your network. In our case, the windows are
|
// be big enough relative to the stride of your network. In our case, the windows are
|
||||||
// at least 30 pixels in length, so being moved by 8 pixel steps is fine.
|
// at least 30 pixels in length, so being moved by 8 pixel steps is fine.
|
||||||
mmod_options options(boxes_train, 70, 30);
|
mmod_options options(boxes_train, 70, 30);
|
||||||
|
|
||||||
|
|
||||||
@ -247,22 +251,22 @@ int main(int argc, char** argv) try
|
|||||||
// also contained a lot of ignore boxes. Some of them are large boxes that encompass
|
// also contained a lot of ignore boxes. Some of them are large boxes that encompass
|
||||||
// large parts of an image and the intention is to have everything inside those boxes
|
// large parts of an image and the intention is to have everything inside those boxes
|
||||||
// be ignored. Therefore, we need to tell the MMOD algorithm to do that, which we do
|
// be ignored. Therefore, we need to tell the MMOD algorithm to do that, which we do
|
||||||
// by setting options.overlaps_ignore appropriately.
|
// by setting options.overlaps_ignore appropriately.
|
||||||
//
|
//
|
||||||
// But first, we need to understand exactly what this option does. The MMOD loss
|
// But first, we need to understand exactly what this option does. The MMOD loss
|
||||||
// is essentially counting the number of false alarms + missed detections produced by
|
// is essentially counting the number of false alarms + missed detections produced by
|
||||||
// the detector for each image. During training, the code is running the detector on
|
// the detector for each image. During training, the code is running the detector on
|
||||||
// each image in a mini-batch and looking at its output and counting the number of
|
// each image in a mini-batch and looking at its output and counting the number of
|
||||||
// mistakes. The optimizer tries to find parameters settings that minimize the number
|
// mistakes. The optimizer tries to find parameters settings that minimize the number
|
||||||
// of detector mistakes.
|
// of detector mistakes.
|
||||||
//
|
//
|
||||||
// This overlaps_ignore option allows you to tell the loss that some outputs from the
|
// This overlaps_ignore option allows you to tell the loss that some outputs from the
|
||||||
// detector should be totally ignored, as if they never happened. In particular, if a
|
// detector should be totally ignored, as if they never happened. In particular, if a
|
||||||
// detection overlaps a box in the training data with ignore==true then that detection
|
// detection overlaps a box in the training data with ignore==true then that detection
|
||||||
// is ignored. This overlap is determined by calling
|
// is ignored. This overlap is determined by calling
|
||||||
// options.overlaps_ignore(the_detection, the_ignored_training_box). If it returns
|
// options.overlaps_ignore(the_detection, the_ignored_training_box). If it returns
|
||||||
// true then that detection is ignored.
|
// true then that detection is ignored.
|
||||||
//
|
//
|
||||||
// You should read the documentation for test_box_overlap, the class type for
|
// You should read the documentation for test_box_overlap, the class type for
|
||||||
// overlaps_ignore for full details. However, the gist is that the default behavior is
|
// overlaps_ignore for full details. However, the gist is that the default behavior is
|
||||||
// to only consider boxes as overlapping if their intersection over union is > 0.5.
|
// to only consider boxes as overlapping if their intersection over union is > 0.5.
|
||||||
@ -275,7 +279,7 @@ int main(int argc, char** argv) try
|
|||||||
|
|
||||||
net_type net(options);
|
net_type net(options);
|
||||||
|
|
||||||
// The final layer of the network must be a con layer that contains
|
// The final layer of the network must be a con layer that contains
|
||||||
// options.detector_windows.size() filters. This is because these final filters are
|
// options.detector_windows.size() filters. This is because these final filters are
|
||||||
// what perform the final "sliding window" detection in the network. For the dlib
|
// what perform the final "sliding window" detection in the network. For the dlib
|
||||||
// vehicle dataset, there will be 3 sliding window detectors, so we will be setting
|
// vehicle dataset, there will be 3 sliding window detectors, so we will be setting
|
||||||
@ -306,13 +310,13 @@ int main(int argc, char** argv) try
|
|||||||
|
|
||||||
|
|
||||||
std::vector<matrix<rgb_pixel>> mini_batch_samples;
|
std::vector<matrix<rgb_pixel>> mini_batch_samples;
|
||||||
std::vector<std::vector<mmod_rect>> mini_batch_labels;
|
std::vector<std::vector<mmod_rect>> mini_batch_labels;
|
||||||
random_cropper cropper;
|
random_cropper cropper;
|
||||||
cropper.set_seed(time(0));
|
cropper.set_seed(time(0));
|
||||||
cropper.set_chip_dims(350, 350);
|
cropper.set_chip_dims(350, 350);
|
||||||
// Usually you want to give the cropper whatever min sizes you passed to the
|
// Usually you want to give the cropper whatever min sizes you passed to the
|
||||||
// mmod_options constructor, or very slightly smaller sizes, which is what we do here.
|
// mmod_options constructor, or very slightly smaller sizes, which is what we do here.
|
||||||
cropper.set_min_object_size(69,28);
|
cropper.set_min_object_size(69,28);
|
||||||
cropper.set_max_rotation_degrees(2);
|
cropper.set_max_rotation_degrees(2);
|
||||||
dlib::rand rnd;
|
dlib::rand rnd;
|
||||||
|
|
||||||
@ -320,10 +324,10 @@ int main(int argc, char** argv) try
|
|||||||
cout << trainer << cropper << endl;
|
cout << trainer << cropper << endl;
|
||||||
|
|
||||||
int cnt = 1;
|
int cnt = 1;
|
||||||
// Run the trainer until the learning rate gets small.
|
// Run the trainer until the learning rate gets small.
|
||||||
while(trainer.get_learning_rate() >= 1e-4)
|
while(trainer.get_learning_rate() >= 1e-4)
|
||||||
{
|
{
|
||||||
// Every 30 mini-batches we do a testing mini-batch.
|
// Every 30 mini-batches we do a testing mini-batch.
|
||||||
if (cnt%30 != 0 || images_test.size() == 0)
|
if (cnt%30 != 0 || images_test.size() == 0)
|
||||||
{
|
{
|
||||||
cropper(87, images_train, boxes_train, mini_batch_samples, mini_batch_labels);
|
cropper(87, images_train, boxes_train, mini_batch_samples, mini_batch_labels);
|
||||||
@ -375,7 +379,7 @@ int main(int argc, char** argv) try
|
|||||||
cout << "\nsync_filename: " << sync_filename << endl;
|
cout << "\nsync_filename: " << sync_filename << endl;
|
||||||
cout << "num training images: "<< images_train.size() << endl;
|
cout << "num training images: "<< images_train.size() << endl;
|
||||||
cout << "training results: " << test_object_detection_function(net, images_train, boxes_train, test_box_overlap(), 0, options.overlaps_ignore);
|
cout << "training results: " << test_object_detection_function(net, images_train, boxes_train, test_box_overlap(), 0, options.overlaps_ignore);
|
||||||
// Upsampling the data will allow the detector to find smaller cars. Recall that
|
// Upsampling the data will allow the detector to find smaller cars. Recall that
|
||||||
// we configured it to use a sliding window nominally 70 pixels in size. So upsampling
|
// we configured it to use a sliding window nominally 70 pixels in size. So upsampling
|
||||||
// here will let it find things nominally 35 pixels in size. Although we include a
|
// here will let it find things nominally 35 pixels in size. Although we include a
|
||||||
// limit of 1800*1800 here which means "don't upsample an image if it's already larger
|
// limit of 1800*1800 here which means "don't upsample an image if it's already larger
|
||||||
@ -405,11 +409,11 @@ int main(int argc, char** argv) try
|
|||||||
|
|
||||||
Also, the training and testing accuracies were:
|
Also, the training and testing accuracies were:
|
||||||
num training images: 2217
|
num training images: 2217
|
||||||
training results: 0.990738 0.736431 0.736073
|
training results: 0.990738 0.736431 0.736073
|
||||||
training upsampled results: 0.986837 0.937694 0.936912
|
training upsampled results: 0.986837 0.937694 0.936912
|
||||||
num testing images: 135
|
num testing images: 135
|
||||||
testing results: 0.988827 0.471372 0.470806
|
testing results: 0.988827 0.471372 0.470806
|
||||||
testing upsampled results: 0.987879 0.651132 0.650399
|
testing upsampled results: 0.987879 0.651132 0.650399
|
||||||
*/
|
*/
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user