merged

2024-11-01 10:14:53 +08:00 · 2016-04-25 08:27:00 -04:00 · 2016-04-25 08:27:00 -04:00 · 7c29fbb1fd
commit 7c29fbb1fd
parent 82295c105a 16583730d7
10 changed files with 131 additions and 9 deletions
--- a/dlib/dnn/cublas_dlibapi.cpp
+++ b/dlib/dnn/cublas_dlibapi.cpp
@ -25,7 +25,7 @@ static const char* cublas_get_error_string(cublasStatus_t s)

 // Check the return value of a call to the cuBLAS runtime for an error condition.
 #define CHECK_CUBLAS(call)                                                      \
-{                                                                              \
+do{                                                                              \
    const cublasStatus_t error = call;                                         \
    if (error != CUBLAS_STATUS_SUCCESS)                                        \
    {                                                                          \
@ -34,7 +34,7 @@ static const char* cublas_get_error_string(cublasStatus_t s)
        sout << "code: " << error << ", reason: " << cublas_get_error_string(error);\
        throw dlib::cublas_error(sout.str());                            \
    }                                                                          \
-}
+}while(false)

 namespace dlib
 {
--- a/dlib/dnn/cuda_utils.h
+++ b/dlib/dnn/cuda_utils.h
@ -15,7 +15,7 @@

 // Check the return value of a call to the CUDA runtime for an error condition.
 #define CHECK_CUDA(call)                                                       \
-{                                                                              \
+do{                                                                              \
    const cudaError_t error = call;                                            \
    if (error != cudaSuccess)                                                  \
    {                                                                          \
@ -24,7 +24,7 @@
        sout << "code: " << error << ", reason: " << cudaGetErrorString(error);\
        throw dlib::cuda_error(sout.str());                                          \
    }                                                                          \
-}
+}while(false)

 // ----------------------------------------------------------------------------------------

--- a/dlib/dnn/cudnn_dlibapi.cpp
+++ b/dlib/dnn/cudnn_dlibapi.cpp
@ -36,7 +36,7 @@ static const char* cudnn_get_error_string(cudnnStatus_t s)

 // Check the return value of a call to the cuDNN runtime for an error condition.
 #define CHECK_CUDNN(call)                                                      \
-{                                                                              \
+do{                                                                              \
    const cudnnStatus_t error = call;                                         \
    if (error != CUDNN_STATUS_SUCCESS)                                        \
    {                                                                          \
@ -45,7 +45,7 @@ static const char* cudnn_get_error_string(cudnnStatus_t s)
        sout << "code: " << error << ", reason: " << cudnn_get_error_string(error);\
        throw dlib::cudnn_error(sout.str());                            \
    }                                                                          \
-}
+}while(false)


 namespace dlib
--- a/dlib/dnn/curand_dlibapi.cpp
+++ b/dlib/dnn/curand_dlibapi.cpp
@ -24,7 +24,7 @@ static const char* curand_get_error_string(curandStatus_t s)

 // Check the return value of a call to the cuDNN runtime for an error condition.
 #define CHECK_CURAND(call)                                                      \
-{                                                                              \
+do{                                                                              \
    const curandStatus_t error = call;                                         \
    if (error != CURAND_STATUS_SUCCESS)                                        \
    {                                                                          \
@ -33,7 +33,7 @@ static const char* curand_get_error_string(curandStatus_t s)
        sout << "code: " << error << ", reason: " << curand_get_error_string(error);\
        throw dlib::curand_error(sout.str());                            \
    }                                                                          \
-}
+}while(false)

 namespace dlib
 {
--- a/dlib/dnn/gpu_data.cpp
+++ b/dlib/dnn/gpu_data.cpp
@ -10,11 +10,34 @@
 #include "gpu_data.h"
 #include <iostream>
 #include "cuda_utils.h"
+#include <cstring>


 namespace dlib
 {

+// ----------------------------------------------------------------------------------------
+
+    void memcpy (
+        gpu_data& dest, 
+        const gpu_data& src
+    )
+    {
+        DLIB_CASSERT(dest.size() == src.size(), "");
+        if (src.size() == 0)
+            return;
+
+        // copy the memory efficiently based on which copy is current in each object.
+        if (dest.device_ready() && src.device_ready())
+            CHECK_CUDA(cudaMemcpy(dest.device(), src.device(),          src.size()*sizeof(float), cudaMemcpyDeviceToDevice));
+        else if (!dest.device_ready() && src.device_ready())
+            CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.device(), src.size()*sizeof(float), cudaMemcpyDeviceToHost));
+        else if (dest.device_ready() && !src.device_ready())
+            CHECK_CUDA(cudaMemcpy(dest.device(), src.host(),            src.size()*sizeof(float), cudaMemcpyHostToDevice));
+        else 
+            CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.host(),   src.size()*sizeof(float), cudaMemcpyHostToHost));
+    }
+
 // ----------------------------------------------------------------------------------------

    void gpu_data::
--- a/dlib/dnn/gpu_data.h
+++ b/dlib/dnn/gpu_data.h
@ -5,6 +5,7 @@

 #include "gpu_data_abstract.h"
 #include <memory>
+#include <cstring>
 #include "cuda_errors.h"
 #include "../serialize.h"

@ -202,6 +203,18 @@ namespace dlib
            deserialize(data[i], in);
    }

+#ifdef DLIB_USE_CUDA
+    void memcpy (gpu_data& dest, const gpu_data& src);
+#else
+    inline void memcpy (gpu_data& dest, const gpu_data& src)
+    {
+        DLIB_CASSERT(dest.size() == src.size(), "");
+        if (src.size() == 0)
+            return;
+        std::memcpy(dest.host_write_only(), src.host(), sizeof(float)*src.size());
+    }
+#endif
+
 // ----------------------------------------------------------------------------------------

 }
--- a/dlib/dnn/gpu_data_abstract.h
+++ b/dlib/dnn/gpu_data_abstract.h
@ -208,6 +208,21 @@ namespace dlib
        provides serialization support
    !*/

+    void memcpy (
+        gpu_data& dest, 
+        const gpu_data& src
+    );
+    /*!
+        requires
+            - dest.size() == src.size()
+        ensures
+            - Copies the data in src to dest.  If the device data is current (i.e.
+              device_ready()==true) on both src and dest then the copy will happen entirely
+              on the device side.
+            - It doesn't matter what GPU device is selected by cudaSetDevice().  You can
+              always copy gpu_data objects to and from each other regardless.
+    !*/
+
 // ----------------------------------------------------------------------------------------

 }
--- a/dlib/dnn/tensor.h
+++ b/dlib/dnn/tensor.h
@ -55,7 +55,7 @@ namespace dlib
        tensor& operator= (float val)
        {
 #ifdef DLIB_USE_CUDA
-            // If you are using CUDA then presumably you will be mostly using tensor's on
+            // If you are using CUDA then presumably you will be mostly using tensors on
            // the GPU.  So unless you seem to be actively working with the host side's
            // data then we do this initialization on the device side since this avoids a
            // host to device transfer that would likely immediately follow.
@ -158,6 +158,15 @@ namespace dlib
        ) const = 0; 
 #endif

+        friend void memcpy (
+            tensor& dest, 
+            const tensor& src
+        )
+        {
+            memcpy(dest.data(), src.data());
+        }
+
+
    protected:

        friend class alias_tensor;
--- a/dlib/dnn/tensor_abstract.h
+++ b/dlib/dnn/tensor_abstract.h
@ -298,6 +298,22 @@ namespace dlib
        tensor& operator=(tensor&& item); 
    };

+// ----------------------------------------------------------------------------------------
+
+    void memcpy (
+        tensor& dest, 
+        const tensor& src
+    );
+    /*!
+        requires
+            - dest.size() == src.size()
+        ensures
+            - Copies the data in src to dest.  If the device data is current on both src
+              and dest then the copy will happen entirely on the device side.
+            - It doesn't matter what GPU device is selected by cudaSetDevice().  You can
+              always copy tensor objects to and from each other regardless.
+    !*/
+
 // ----------------------------------------------------------------------------------------

    const matrix_exp mat (
--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@ -439,6 +439,52 @@ namespace
            DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); 
        }

+        {
+            resizable_tensor A, B, truth;
+            A.set_size(2,3,4,5);
+            truth.copy_size(A);
+            B.copy_size(A);
+
+            A = 4;
+            B = 1;
+            truth = 1;
+            DLIB_TEST(max(abs(mat(B)- mat(truth))) < 1e-5);
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+
+            A = 4;
+            A.host();
+            B.host();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+
+#ifdef DLIB_USE_CUDA
+            A = 4;
+            A.device();
+            B.host();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+
+            A = 4;
+            A.device();
+            B.device();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+
+            A = 4;
+            A.host();
+            B.device();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+
+            A = 4;
+            A.host_write_only();
+            B.device();
+            memcpy(A, truth);
+            DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
+#endif
+        }
+
        {
            resizable_tensor A, B;
            A.set_size(2,3,4,5);