Added workaround for a bug in cuDNN5.1 which causes

cudnnGetConvolutionBackwardFilterAlgorithm() to pick invalid algorithms, resulting in cuDNN not working correctly.
2024-11-01 10:14:53 +08:00 · 2016-08-11 22:12:40 -04:00 · 2016-08-11 22:12:40 -04:00 · 02b209316a
commit 02b209316a
parent fde662b31e
2 changed files with 26 additions and 1 deletions
--- a/dlib/dnn/cudnn_dlibapi.cpp
+++ b/dlib/dnn/cudnn_dlibapi.cpp
@ -917,7 +917,21 @@ namespace dlib
                        dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST:CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE,
                        std::numeric_limits<size_t>::max(),
                        &backward_filters_best_algo));
+                // cuDNN 5.1 has a bug that causes
+                // cudnnGetConvolutionBackwardFilterAlgorithm() to pick the winograd
+                // algorithm even for cases where cuDNN doesn't support it, leading to
+                // incorrect outputs.  So here we check if we are in a case where winograd
+                // isn't supported and manually overrule
+                // cudnnGetConvolutionBackwardFilterAlgorithm() by picking a safe
+                // algorithm.
+                if (dnn_prefer_fastest_algorithms() && 
+                    !(stride_x == 1 && stride_y == 1 && ((filters_nr==3&&filters_nc==3) || (filters_nr==5&&filters_nc==5)))
+                    )
+                {
+                    backward_filters_best_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
+                }
                backward_filters_algo = backward_filters_best_algo;
+
                CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize( 
                        context(),
                        descriptor(data),
--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@ -1592,7 +1592,7 @@ namespace
                "Runs tests on the deep neural network tools.")
        {}

-        void perform_test (
+        void run_tests (
        )
        {
            // make the tests repeatable
@ -1649,6 +1649,17 @@ namespace
            test_copy_tensor_cpu();
            test_concat();
        }
+
+        void perform_test()
+        {
+            dlog << LINFO << "NOW RUNNING TESTS WITH set_dnn_prefer_fastest_algorithms()";
+            set_dnn_prefer_fastest_algorithms();
+            run_tests();
+
+            dlog << LINFO << "NOW RUNNING TESTS WITH set_dnn_prefer_smallest_algorithms()";
+            set_dnn_prefer_smallest_algorithms();
+            run_tests();
+        }
    } a;
 }