mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Added affine_transform_range() and another overload of affine_transform()
This commit is contained in:
parent
15b2d7b5d8
commit
c3a74c7c1c
@ -385,6 +385,30 @@ namespace dlib
|
||||
d[i] = A*s1[i] + B*s2[i] + C*s3[i] + D;
|
||||
}
|
||||
|
||||
void affine_transform_range(
|
||||
size_t begin,
|
||||
size_t end,
|
||||
tensor& dest,
|
||||
const tensor& src1,
|
||||
const tensor& src2,
|
||||
const tensor& src3,
|
||||
const float A,
|
||||
const float B,
|
||||
const float C
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(dest.size()==src1.size(),"");
|
||||
DLIB_CASSERT(dest.size()==src2.size(),"");
|
||||
DLIB_CASSERT(dest.size()==src3.size(),"");
|
||||
DLIB_CASSERT(begin <= end && end <= dest.size(),"");
|
||||
const auto d = dest.host();
|
||||
const auto s1 = src1.host();
|
||||
const auto s2 = src2.host();
|
||||
const auto s3 = src3.host();
|
||||
for (size_t i = begin; i < end; ++i)
|
||||
d[i] = A*s1[i] + B*s2[i] + C*s3[i];
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
|
||||
void affine_transform(
|
||||
|
@ -81,6 +81,18 @@ namespace dlib
|
||||
const float D
|
||||
);
|
||||
|
||||
void affine_transform_range(
|
||||
size_t begin,
|
||||
size_t end,
|
||||
tensor& dest,
|
||||
const tensor& src1,
|
||||
const tensor& src2,
|
||||
const tensor& src3,
|
||||
const float A,
|
||||
const float B,
|
||||
const float C
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
|
||||
void affine_transform(
|
||||
|
@ -504,6 +504,40 @@ namespace dlib
|
||||
src2.device(), src3.device(), dest.size(), A, B, C, D);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
__global__ void _cuda_affine_transform_range(
|
||||
float* d, const float* s1, const float* s2, const float* s3, size_t begin, size_t end, float A, float B, float C
|
||||
)
|
||||
{
|
||||
for (auto i : grid_stride_range(begin, end))
|
||||
{
|
||||
d[i] = A*s1[i] + B*s2[i] + C*s3[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void affine_transform_range(
|
||||
size_t begin,
|
||||
size_t end,
|
||||
tensor& dest,
|
||||
const tensor& src1,
|
||||
const tensor& src2,
|
||||
const tensor& src3,
|
||||
const float A,
|
||||
const float B,
|
||||
const float C
|
||||
)
|
||||
{
|
||||
DLIB_CASSERT(dest.size()==src1.size(),"");
|
||||
DLIB_CASSERT(dest.size()==src2.size(),"");
|
||||
DLIB_CASSERT(dest.size()==src3.size(),"");
|
||||
DLIB_CASSERT(begin <= end && end <= dest.size(),"");
|
||||
launch_kernel(_cuda_affine_transform_range,max_jobs(end-begin),
|
||||
dest.device(), src1.device(),
|
||||
src2.device(), src3.device(), begin, end, A, B, C);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
|
||||
__global__ void _cuda_affine_transform2(float* d, const float* s, size_t n, const float* A, const float* B)
|
||||
|
@ -164,6 +164,18 @@ namespace dlib
|
||||
const float D
|
||||
);
|
||||
|
||||
void affine_transform_range(
|
||||
size_t begin,
|
||||
size_t end,
|
||||
tensor& dest,
|
||||
const tensor& src1,
|
||||
const tensor& src2,
|
||||
const tensor& src3,
|
||||
const float A,
|
||||
const float B,
|
||||
const float C
|
||||
);
|
||||
|
||||
// Note that this function isn't in the tt:: namespace because add_scaled() is
|
||||
// called by cuda::add() so we don't need a tt:: version of add_scaled().
|
||||
void add_scaled(
|
||||
|
@ -240,6 +240,42 @@ namespace dlib { namespace tt
|
||||
#endif
|
||||
}
|
||||
|
||||
void affine_transform_range(
|
||||
size_t begin,
|
||||
size_t end,
|
||||
tensor& dest,
|
||||
const tensor& src1,
|
||||
const tensor& src2,
|
||||
const tensor& src3,
|
||||
const float A,
|
||||
const float B,
|
||||
const float C
|
||||
)
|
||||
{
|
||||
#ifdef DLIB_USE_CUDA
|
||||
cuda::affine_transform_range(begin, end, dest,src1,src2,src3,A,B,C);
|
||||
#else
|
||||
cpu::affine_transform_range(begin, end, dest,src1,src2,src3,A,B,C);
|
||||
#endif
|
||||
}
|
||||
|
||||
void affine_transform(
|
||||
tensor& dest,
|
||||
const tensor& src1,
|
||||
const tensor& src2,
|
||||
const tensor& src3,
|
||||
const float A,
|
||||
const float B,
|
||||
const float C
|
||||
)
|
||||
{
|
||||
#ifdef DLIB_USE_CUDA
|
||||
cuda::affine_transform_range(0,dest.size(),dest,src1,src2,src3,A,B,C);
|
||||
#else
|
||||
cpu::affine_transform_range(0,dest.size(),dest,src1,src2,src3,A,B,C);
|
||||
#endif
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void affine_transform(
|
||||
|
@ -229,13 +229,58 @@ namespace dlib { namespace tt
|
||||
const float D
|
||||
);
|
||||
/*!
|
||||
requires - dest.size()==src1.size()
|
||||
requires
|
||||
- dest.size()==src1.size()
|
||||
- dest.size()==src2.size()
|
||||
- dest.size()==src3.size()
|
||||
ensures
|
||||
- #dest == A*src1 + B*src2 + C*src3 + D
|
||||
!*/
|
||||
|
||||
void affine_transform(
|
||||
tensor& dest,
|
||||
const tensor& src1,
|
||||
const tensor& src2,
|
||||
const tensor& src3,
|
||||
const float A,
|
||||
const float B,
|
||||
const float C
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- dest.size()==src1.size()
|
||||
- dest.size()==src2.size()
|
||||
- dest.size()==src3.size()
|
||||
ensures
|
||||
- #dest == A*src1 + B*src2 + C*src3
|
||||
!*/
|
||||
|
||||
void affine_transform_range(
|
||||
size_t begin,
|
||||
size_t end,
|
||||
tensor& dest,
|
||||
const tensor& src1,
|
||||
const tensor& src2,
|
||||
const tensor& src3,
|
||||
const float A,
|
||||
const float B,
|
||||
const float C
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- dest.size()==src1.size()
|
||||
- dest.size()==src2.size()
|
||||
- dest.size()==src3.size()
|
||||
- begin <= end <= dest.size()
|
||||
ensures
|
||||
- This function operates much like
|
||||
affine_transform(dest,src1,src2,src3,A,B,C,0), except that it runs over only
|
||||
the half open range [begin,end) rather than processing the entire tensor.
|
||||
Specifically, it does this:
|
||||
- for i in the range [begin, end):
|
||||
- #dest.host()[i] == A*src1.host()[i] + B*src2.host()[i] + C*src3.host()[i]
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
void affine_transform(
|
||||
|
Loading…
Reference in New Issue
Block a user