From 45dd580bf4d2f267cd1d5d16ca6f556288eda3e1 Mon Sep 17 00:00:00 2001 From: Davis King Date: Wed, 2 Nov 2016 10:06:36 -0400 Subject: [PATCH] Wrote replacements for set_tensor() and scale_tensor() since the previous versions were calling into cuDNN, however, the cuDNN functions for doing this are horrifically slow, well over 100x slower than they should be, which is surprising since these functions are so trivial. --- dlib/dnn/cuda_dlib.cu | 32 ++++++++++++++++++++++++++++++++ dlib/dnn/cuda_dlib.h | 12 ++++++++++++ dlib/dnn/cudnn_dlibapi.cpp | 26 -------------------------- dlib/dnn/cudnn_dlibapi.h | 20 -------------------- dlib/dnn/tensor.h | 16 ++++++++++++++++ dlib/test/dnn.cpp | 8 ++++++++ 6 files changed, 68 insertions(+), 46 deletions(-) diff --git a/dlib/dnn/cuda_dlib.cu b/dlib/dnn/cuda_dlib.cu index e9f546bb6..36d7edc71 100644 --- a/dlib/dnn/cuda_dlib.cu +++ b/dlib/dnn/cuda_dlib.cu @@ -864,6 +864,38 @@ namespace dlib launch_kernel(_add_bias_gradient,max_jobs(grad.size()),grad.device(), gradient_input.device(), grad.size(), gradient_input.size()); } + // ---------------------------------------------------------------------------------------- + + __global__ void _set_tensor(float* out, size_t n, const float val) + { + for (auto i : grid_stride_range(0, n)) + out[i] = val; + } + + void set_tensor ( + tensor& t, + float value + ) + { + launch_kernel(_set_tensor, max_jobs(t.size()), t.device(), t.size(), value); + } + + // ---------------------------------------------------------------------------------------- + + __global__ void _scale_tensor(float* out, size_t n, const float val) + { + for (auto i : grid_stride_range(0, n)) + out[i] *= val; + } + + void scale_tensor ( + tensor& t, + float value + ) + { + launch_kernel(_scale_tensor, max_jobs(t.size()), t.device(), t.size(), value); + } + // ----------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------- diff --git a/dlib/dnn/cuda_dlib.h b/dlib/dnn/cuda_dlib.h index dd0690417..75c418d6d 100644 --- a/dlib/dnn/cuda_dlib.h +++ b/dlib/dnn/cuda_dlib.h @@ -141,6 +141,18 @@ namespace dlib const tensor& v2 ); + // ------------------------------------------------------------------------------------ + + void set_tensor ( + tensor& t, + float value + ); + + void scale_tensor ( + tensor& t, + float value + ); + // ------------------------------------------------------------------------------------ void multiply ( diff --git a/dlib/dnn/cudnn_dlibapi.cpp b/dlib/dnn/cudnn_dlibapi.cpp index 151d9c2ce..93d542500 100644 --- a/dlib/dnn/cudnn_dlibapi.cpp +++ b/dlib/dnn/cudnn_dlibapi.cpp @@ -289,32 +289,6 @@ namespace dlib dest.device())); } - void set_tensor ( - tensor& t, - float value - ) - { - if (t.size() == 0) - return; - CHECK_CUDNN(cudnnSetTensor(context(), - descriptor(t), - t.device_write_only(), - &value)); - } - - void scale_tensor ( - tensor& t, - float value - ) - { - if (t.size() == 0) - return; - CHECK_CUDNN(cudnnScaleTensor(context(), - descriptor(t), - t.device(), - &value)); - } - void assign_conv_bias_gradient ( tensor& grad, const tensor& gradient_input diff --git a/dlib/dnn/cudnn_dlibapi.h b/dlib/dnn/cudnn_dlibapi.h index 3e3aa3921..a4769c344 100644 --- a/dlib/dnn/cudnn_dlibapi.h +++ b/dlib/dnn/cudnn_dlibapi.h @@ -89,26 +89,6 @@ namespace dlib add into the dest tensor. !*/ - void set_tensor ( - tensor& t, - float value - ); - /*! - ensures - - sets all elements in t equal to value. - !*/ - - void scale_tensor ( - tensor& t, - float value - ); - /*! - ensures - - scales all elements of t by the given value. I.e. for all elements E in - t, this function performs: - - E = E*value - !*/ - // ------------------------------------------------------------------------------------ void assign_conv_bias_gradient ( diff --git a/dlib/dnn/tensor.h b/dlib/dnn/tensor.h index 46d35c601..5b842ba98 100644 --- a/dlib/dnn/tensor.h +++ b/dlib/dnn/tensor.h @@ -14,6 +14,22 @@ namespace dlib { + +// ---------------------------------------------------------------------------------------- + + namespace cuda + { + void set_tensor ( + tensor& t, + float value + ); + + void scale_tensor ( + tensor& t, + float value + ); + } + // ---------------------------------------------------------------------------------------- class tensor diff --git a/dlib/test/dnn.cpp b/dlib/test/dnn.cpp index 9161b6631..e4dda2599 100644 --- a/dlib/test/dnn.cpp +++ b/dlib/test/dnn.cpp @@ -339,6 +339,14 @@ namespace dlog << LINFO << mat(dest); matrix truth1(3,4), truth2(3,4); + truth1 = 2; + DLIB_TEST(max(abs(truth1-mat(src))) < 1e-5); + src *= 2; + truth1 = 4; + DLIB_TEST(max(abs(truth1-mat(src))) < 1e-5); + src = 2; + + truth1 = 7; truth2 = 7, 10, 7, 7, 7, 10, 7, 7,