mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
merged
This commit is contained in:
commit
7c29fbb1fd
@ -25,7 +25,7 @@ static const char* cublas_get_error_string(cublasStatus_t s)
|
||||
|
||||
// Check the return value of a call to the cuBLAS runtime for an error condition.
|
||||
#define CHECK_CUBLAS(call) \
|
||||
{ \
|
||||
do{ \
|
||||
const cublasStatus_t error = call; \
|
||||
if (error != CUBLAS_STATUS_SUCCESS) \
|
||||
{ \
|
||||
@ -34,7 +34,7 @@ static const char* cublas_get_error_string(cublasStatus_t s)
|
||||
sout << "code: " << error << ", reason: " << cublas_get_error_string(error);\
|
||||
throw dlib::cublas_error(sout.str()); \
|
||||
} \
|
||||
}
|
||||
}while(false)
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
// Check the return value of a call to the CUDA runtime for an error condition.
|
||||
#define CHECK_CUDA(call) \
|
||||
{ \
|
||||
do{ \
|
||||
const cudaError_t error = call; \
|
||||
if (error != cudaSuccess) \
|
||||
{ \
|
||||
@ -24,7 +24,7 @@
|
||||
sout << "code: " << error << ", reason: " << cudaGetErrorString(error);\
|
||||
throw dlib::cuda_error(sout.str()); \
|
||||
} \
|
||||
}
|
||||
}while(false)
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
|
@ -36,7 +36,7 @@ static const char* cudnn_get_error_string(cudnnStatus_t s)
|
||||
|
||||
// Check the return value of a call to the cuDNN runtime for an error condition.
|
||||
#define CHECK_CUDNN(call) \
|
||||
{ \
|
||||
do{ \
|
||||
const cudnnStatus_t error = call; \
|
||||
if (error != CUDNN_STATUS_SUCCESS) \
|
||||
{ \
|
||||
@ -45,7 +45,7 @@ static const char* cudnn_get_error_string(cudnnStatus_t s)
|
||||
sout << "code: " << error << ", reason: " << cudnn_get_error_string(error);\
|
||||
throw dlib::cudnn_error(sout.str()); \
|
||||
} \
|
||||
}
|
||||
}while(false)
|
||||
|
||||
|
||||
namespace dlib
|
||||
|
@ -24,7 +24,7 @@ static const char* curand_get_error_string(curandStatus_t s)
|
||||
|
||||
// Check the return value of a call to the cuDNN runtime for an error condition.
|
||||
#define CHECK_CURAND(call) \
|
||||
{ \
|
||||
do{ \
|
||||
const curandStatus_t error = call; \
|
||||
if (error != CURAND_STATUS_SUCCESS) \
|
||||
{ \
|
||||
@ -33,7 +33,7 @@ static const char* curand_get_error_string(curandStatus_t s)
|
||||
sout << "code: " << error << ", reason: " << curand_get_error_string(error);\
|
||||
throw dlib::curand_error(sout.str()); \
|
||||
} \
|
||||
}
|
||||
}while(false)
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
@ -10,11 +10,34 @@
|
||||
#include "gpu_data.h"
|
||||
#include <iostream>
|
||||
#include "cuda_utils.h"
|
||||
#include <cstring>
|
||||
|
||||
|
||||
namespace dlib
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void memcpy (
|
||||
gpu_data& dest,
|
||||
const gpu_data& src
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(dest.size() == src.size(), "");
|
||||
if (src.size() == 0)
|
||||
return;
|
||||
|
||||
// copy the memory efficiently based on which copy is current in each object.
|
||||
if (dest.device_ready() && src.device_ready())
|
||||
CHECK_CUDA(cudaMemcpy(dest.device(), src.device(), src.size()*sizeof(float), cudaMemcpyDeviceToDevice));
|
||||
else if (!dest.device_ready() && src.device_ready())
|
||||
CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.device(), src.size()*sizeof(float), cudaMemcpyDeviceToHost));
|
||||
else if (dest.device_ready() && !src.device_ready())
|
||||
CHECK_CUDA(cudaMemcpy(dest.device(), src.host(), src.size()*sizeof(float), cudaMemcpyHostToDevice));
|
||||
else
|
||||
CHECK_CUDA(cudaMemcpy(dest.host_write_only(), src.host(), src.size()*sizeof(float), cudaMemcpyHostToHost));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void gpu_data::
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include "gpu_data_abstract.h"
|
||||
#include <memory>
|
||||
#include <cstring>
|
||||
#include "cuda_errors.h"
|
||||
#include "../serialize.h"
|
||||
|
||||
@ -202,6 +203,18 @@ namespace dlib
|
||||
deserialize(data[i], in);
|
||||
}
|
||||
|
||||
#ifdef DLIB_USE_CUDA
|
||||
void memcpy (gpu_data& dest, const gpu_data& src);
|
||||
#else
|
||||
inline void memcpy (gpu_data& dest, const gpu_data& src)
|
||||
{
|
||||
DLIB_CASSERT(dest.size() == src.size(), "");
|
||||
if (src.size() == 0)
|
||||
return;
|
||||
std::memcpy(dest.host_write_only(), src.host(), sizeof(float)*src.size());
|
||||
}
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -208,6 +208,21 @@ namespace dlib
|
||||
provides serialization support
|
||||
!*/
|
||||
|
||||
void memcpy (
|
||||
gpu_data& dest,
|
||||
const gpu_data& src
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- dest.size() == src.size()
|
||||
ensures
|
||||
- Copies the data in src to dest. If the device data is current (i.e.
|
||||
device_ready()==true) on both src and dest then the copy will happen entirely
|
||||
on the device side.
|
||||
- It doesn't matter what GPU device is selected by cudaSetDevice(). You can
|
||||
always copy gpu_data objects to and from each other regardless.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -55,7 +55,7 @@ namespace dlib
|
||||
tensor& operator= (float val)
|
||||
{
|
||||
#ifdef DLIB_USE_CUDA
|
||||
// If you are using CUDA then presumably you will be mostly using tensor's on
|
||||
// If you are using CUDA then presumably you will be mostly using tensors on
|
||||
// the GPU. So unless you seem to be actively working with the host side's
|
||||
// data then we do this initialization on the device side since this avoids a
|
||||
// host to device transfer that would likely immediately follow.
|
||||
@ -158,6 +158,15 @@ namespace dlib
|
||||
) const = 0;
|
||||
#endif
|
||||
|
||||
friend void memcpy (
|
||||
tensor& dest,
|
||||
const tensor& src
|
||||
)
|
||||
{
|
||||
memcpy(dest.data(), src.data());
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
friend class alias_tensor;
|
||||
|
@ -298,6 +298,22 @@ namespace dlib
|
||||
tensor& operator=(tensor&& item);
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void memcpy (
|
||||
tensor& dest,
|
||||
const tensor& src
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- dest.size() == src.size()
|
||||
ensures
|
||||
- Copies the data in src to dest. If the device data is current on both src
|
||||
and dest then the copy will happen entirely on the device side.
|
||||
- It doesn't matter what GPU device is selected by cudaSetDevice(). You can
|
||||
always copy tensor objects to and from each other regardless.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
const matrix_exp mat (
|
||||
|
@ -439,6 +439,52 @@ namespace
|
||||
DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6);
|
||||
}
|
||||
|
||||
{
|
||||
resizable_tensor A, B, truth;
|
||||
A.set_size(2,3,4,5);
|
||||
truth.copy_size(A);
|
||||
B.copy_size(A);
|
||||
|
||||
A = 4;
|
||||
B = 1;
|
||||
truth = 1;
|
||||
DLIB_TEST(max(abs(mat(B)- mat(truth))) < 1e-5);
|
||||
memcpy(A, truth);
|
||||
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
|
||||
|
||||
A = 4;
|
||||
A.host();
|
||||
B.host();
|
||||
memcpy(A, truth);
|
||||
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
|
||||
|
||||
#ifdef DLIB_USE_CUDA
|
||||
A = 4;
|
||||
A.device();
|
||||
B.host();
|
||||
memcpy(A, truth);
|
||||
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
|
||||
|
||||
A = 4;
|
||||
A.device();
|
||||
B.device();
|
||||
memcpy(A, truth);
|
||||
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
|
||||
|
||||
A = 4;
|
||||
A.host();
|
||||
B.device();
|
||||
memcpy(A, truth);
|
||||
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
|
||||
|
||||
A = 4;
|
||||
A.host_write_only();
|
||||
B.device();
|
||||
memcpy(A, truth);
|
||||
DLIB_TEST(max(abs(mat(A)- mat(truth))) < 1e-5);
|
||||
#endif
|
||||
}
|
||||
|
||||
{
|
||||
resizable_tensor A, B;
|
||||
A.set_size(2,3,4,5);
|
||||
|
Loading…
Reference in New Issue
Block a user