mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Made tt::add() more general
This commit is contained in:
parent
a6c331cef3
commit
d7e4b88d99
@ -160,7 +160,8 @@ namespace dlib
|
||||
(have_same_dimensions(src, dest) ||
|
||||
(src.num_samples()==1 && src.k()==dest.k() && src.nr()==1 && src.nc()==1) ||
|
||||
(src.num_samples()==1 && src.k()==dest.k() && src.nr()==dest.nr() && src.nc()==dest.nc()) ||
|
||||
(src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc())) &&
|
||||
(src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc()) ||
|
||||
(src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1)) &&
|
||||
is_same_object(src,dest) == false ,
|
||||
"\n\t dest.num_samples(): " << dest.num_samples()
|
||||
<<"\n\t dest.k(): " << dest.k()
|
||||
|
@ -643,6 +643,38 @@ namespace dlib
|
||||
launch_kernel(_cuda_add_scaled,max_jobs(dest.size()),dest.device(), src.device(), dest.size(), scale);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
__global__ void _cuda_add_cv_to_all_columns(float beta, float* dest, float alpha, const float* src, size_t size, size_t stride)
|
||||
{
|
||||
for (auto i : grid_stride_range(0, size))
|
||||
{
|
||||
dest[i] = beta*dest[i] + alpha*src[i/stride];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void _cuda_add_cv_to_all_columns_no_beta(float* dest, float alpha, const float* src, size_t size, size_t stride)
|
||||
{
|
||||
for (auto i : grid_stride_range(0, size))
|
||||
{
|
||||
dest[i] = alpha*src[i/stride];
|
||||
}
|
||||
}
|
||||
|
||||
void add_cv_to_all_columns(
|
||||
float beta,
|
||||
tensor& dest,
|
||||
float alpha,
|
||||
const tensor& src
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(dest.num_samples() == src.num_samples() && src.num_samples() == src.size());
|
||||
if (beta == 0)
|
||||
launch_kernel(_cuda_add_cv_to_all_columns_no_beta, max_jobs(dest.size()), dest.device(), alpha, src.device(), dest.size(), dest.size()/dest.num_samples());
|
||||
else
|
||||
launch_kernel(_cuda_add_cv_to_all_columns, max_jobs(dest.size()), beta, dest.device(), alpha, src.device(), dest.size(), dest.size()/dest.num_samples());
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
__global__ void _cuda_affine_transform5(
|
||||
|
@ -238,6 +238,13 @@ namespace dlib
|
||||
const tensor& src
|
||||
);
|
||||
|
||||
void add_cv_to_all_columns(
|
||||
float beta,
|
||||
tensor& dest,
|
||||
float alpha,
|
||||
const tensor& src
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
|
||||
void affine_transform(
|
||||
|
@ -260,7 +260,8 @@ namespace dlib
|
||||
(have_same_dimensions(src, dest) ||
|
||||
(src.num_samples()==1 && src.k()==dest.k() && src.nr()==1 && src.nc()==1) ||
|
||||
(src.num_samples()==1 && src.k()==dest.k() && src.nr()==dest.nr() && src.nc()==dest.nc()) ||
|
||||
(src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc())) &&
|
||||
(src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc()) ||
|
||||
(src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1)) &&
|
||||
is_same_object(src,dest) == false ,
|
||||
"\n\t dest.num_samples(): " << dest.num_samples()
|
||||
<<"\n\t dest.k(): " << dest.k()
|
||||
@ -279,6 +280,11 @@ namespace dlib
|
||||
add_scaled(dest, alpha, src);
|
||||
return;
|
||||
}
|
||||
else if (src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1)
|
||||
{
|
||||
add_cv_to_all_columns(beta, dest, alpha, src);
|
||||
return;
|
||||
}
|
||||
|
||||
CHECK_CUDNN(cudnnAddTensor(context(),
|
||||
&alpha,
|
||||
|
@ -693,6 +693,7 @@ namespace dlib { namespace tt
|
||||
- src.num_samples()==1 && src.k()==dest.k() && src.nr()==1 && src.nc()==1
|
||||
- src.num_samples()==1 && src.k()==dest.k() && src.nr()==dest.nr() && src.nc()==dest.nc()
|
||||
- src.num_samples()==1 && src.k()==1 && src.nr()==dest.nr() && src.nc()==dest.nc()
|
||||
- src.num_samples()==dest.num_samples() && src.k()==1 && src.nr()==1 && src.nc()==1
|
||||
- is_same_object(src,dest) == false
|
||||
ensures
|
||||
- performs: dest = beta*dest + alpha*src
|
||||
|
@ -602,6 +602,35 @@ namespace
|
||||
#endif
|
||||
}
|
||||
|
||||
{
|
||||
resizable_tensor A(4,5), B(4);
|
||||
|
||||
tensor_rand rnd;
|
||||
rnd.fill_uniform(A);
|
||||
rnd.fill_uniform(B);
|
||||
|
||||
float alpha = 1.4;
|
||||
float beta = 0.5;
|
||||
|
||||
matrix<float> a(mat(A)), b(mat(B));
|
||||
for (long c = 0; c < a.nc(); ++c)
|
||||
{
|
||||
set_colm(a,c) = beta*colm(a,c) + alpha*b;
|
||||
}
|
||||
|
||||
tt::add(beta, A, alpha, B);
|
||||
DLIB_TEST_MSG(max(abs(mat(A)-a)) < 1e-6, max(abs(mat(A)-a)));
|
||||
|
||||
beta = 0;
|
||||
for (long c = 0; c < a.nc(); ++c)
|
||||
{
|
||||
set_colm(a,c) = beta*colm(a,c) + alpha*b;
|
||||
}
|
||||
|
||||
tt::add(beta, A, alpha, B);
|
||||
DLIB_TEST(max(abs(mat(A)-a)) < 1e-6);
|
||||
}
|
||||
|
||||
{
|
||||
resizable_tensor A, B;
|
||||
A.set_size(2,3,4,5);
|
||||
|
Loading…
Reference in New Issue
Block a user