This commit is contained in:
Davis King 2016-04-25 08:27:00 -04:00
commit 7c29fbb1fd
10 changed files with 131 additions and 9 deletions

View File

@ -25,7 +25,7 @@ static const char* cublas_get_error_string(cublasStatus_t s)
// Check the return value of a call to the cuBLAS runtime for an error condition.
#define CHECK_CUBLAS(call) \
{ \
do{ \
const cublasStatus_t error = call; \
if (error != CUBLAS_STATUS_SUCCESS) \
{ \
@ -34,7 +34,7 @@ static const char* cublas_get_error_string(cublasStatus_t s)
sout << "code: " << error << ", reason: " << cublas_get_error_string(error);\
throw dlib::cublas_error(sout.str()); \
} \
}
}while(false)
namespace dlib
{

View File

@ -15,7 +15,7 @@
// Check the return value of a call to the CUDA runtime for an error condition.
#define CHECK_CUDA(call) \
{ \
do{ \
const cudaError_t error = call; \
if (error != cudaSuccess) \
{ \
@ -24,7 +24,7 @@
sout << "code: " << error << ", reason: " << cudaGetErrorString(error);\
throw dlib::cuda_error(sout.str()); \
} \
}
}while(false)
// ----------------------------------------------------------------------------------------

View File

@ -36,7 +36,7 @@ static const char* cudnn_get_error_string(cudnnStatus_t s)
// Check the return value of a call to the cuDNN runtime for an error condition.
#define CHECK_CUDNN(call) \
{ \
do{ \
const cudnnStatus_t error = call; \
if (error != CUDNN_STATUS_SUCCESS) \
{ \
@ -45,7 +45,7 @@ static const char* cudnn_get_error_string(cudnnStatus_t s)
sout << "code: " << error << ", reason: " << cudnn_get_error_string(error);\
throw dlib::cudnn_error(sout.str()); \
} \
}
}while(false)
namespace dlib

View File

@ -24,7 +24,7 @@ static const char* curand_get_error_string(curandStatus_t s)
// Check the return value of a call to the cuDNN runtime for an error condition.
#define CHECK_CURAND(call) \
{ \
do{ \
const curandStatus_t error = call; \
if (error != CURAND_STATUS_SUCCESS) \
{ \
@ -33,7 +33,7 @@ static const char* curand_get_error_string(curandStatus_t s)
sout << "code: " << error << ", reason: " << curand_get_error_string(error);\
throw dlib::curand_error(sout.str()); \
} \
}
}while(false)
namespace dlib
{

View File

@ -10,11 +10,34 @@
#include "gpu_data.h"
#include <iostream>
#include "cuda_utils.h"
#include <cstring>
namespace dlib
{
// ----------------------------------------------------------------------------------------
void memcpy (
gpu_data& dest,
const gpu_data& src
)
{
DLIB_CASSERT(dest.size() == src.size(), "");
if (src.size() == 0)
return;
// copy the memory efficiently based on which copy is current in each object.
if (dest.device_ready() && src.device_ready())
CHECK_CUDA(cudaMemcpy(dest.device(), src.device(), src.size()*sizeof(float), cudaMemcpyDeviceToDevice));
else if (!dest.device_ready() && src.device_ready())
CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.device(), src.size()*sizeof(float), cudaMemcpyDeviceToHost));
else if (dest.device_ready() && !src.device_ready())
CHECK_CUDA(cudaMemcpy(dest.device(), src.host(), src.size()*sizeof(float), cudaMemcpyHostToDevice));
else
CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.host(), src.size()*sizeof(float), cudaMemcpyHostToHost));
}
// ----------------------------------------------------------------------------------------
void gpu_data::

View File

@ -5,6 +5,7 @@
#include "gpu_data_abstract.h"
#include <memory>
#include <cstring>
#include "cuda_errors.h"
#include "../serialize.h"
@ -202,6 +203,18 @@ namespace dlib
deserialize(data[i], in);
}
#ifdef DLIB_USE_CUDA
void memcpy (gpu_data& dest, const gpu_data& src);
#else
inline void memcpy (gpu_data& dest, const gpu_data& src)
{
DLIB_CASSERT(dest.size() == src.size(), "");
if (src.size() == 0)
return;
std::memcpy(dest.host_write_only(), src.host(), sizeof(float)*src.size());
}
#endif
// ----------------------------------------------------------------------------------------
}

View File

@ -208,6 +208,21 @@ namespace dlib
provides serialization support
!*/
void memcpy (
gpu_data& dest,
const gpu_data& src
);
/*!
requires
- dest.size() == src.size()
ensures
- Copies the data in src to dest. If the device data is current (i.e.
device_ready()==true) on both src and dest then the copy will happen entirely
on the device side.
- It doesn't matter what GPU device is selected by cudaSetDevice(). You can
always copy gpu_data objects to and from each other regardless.
!*/
// ----------------------------------------------------------------------------------------
}

View File

@ -55,7 +55,7 @@ namespace dlib
tensor& operator= (float val)
{
#ifdef DLIB_USE_CUDA
// If you are using CUDA then presumably you will be mostly using tensor's on
// If you are using CUDA then presumably you will be mostly using tensors on
// the GPU. So unless you seem to be actively working with the host side's
// data then we do this initialization on the device side since this avoids a
// host to device transfer that would likely immediately follow.
@ -158,6 +158,15 @@ namespace dlib
) const = 0;
#endif
friend void memcpy (
tensor& dest,
const tensor& src
)
{
memcpy(dest.data(), src.data());
}
protected:
friend class alias_tensor;

View File

@ -298,6 +298,22 @@ namespace dlib
tensor& operator=(tensor&& item);
};
// ----------------------------------------------------------------------------------------
void memcpy (
tensor& dest,
const tensor& src
);
/*!
requires
- dest.size() == src.size()
ensures
- Copies the data in src to dest. If the device data is current on both src
and dest then the copy will happen entirely on the device side.
- It doesn't matter what GPU device is selected by cudaSetDevice(). You can
always copy tensor objects to and from each other regardless.
!*/
// ----------------------------------------------------------------------------------------
const matrix_exp mat (

View File

@ -439,6 +439,52 @@ namespace
DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6);
}
{
resizable_tensor A, B, truth;
A.set_size(2,3,4,5);
truth.copy_size(A);
B.copy_size(A);
A = 4;
B = 1;
truth = 1;
DLIB_TEST(max(abs(mat(B)- mat(truth))) < 1e-5);
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
A = 4;
A.host();
B.host();
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
#ifdef DLIB_USE_CUDA
A = 4;
A.device();
B.host();
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
A = 4;
A.device();
B.device();
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
A = 4;
A.host();
B.device();
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
A = 4;
A.host_write_only();
B.device();
memcpy(A, truth);
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
#endif
}
{
resizable_tensor A, B;
A.set_size(2,3,4,5);