mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
add support for cudnn 8.0
This commit is contained in:
parent
f8887d8cbb
commit
fe803b566f
@ -751,6 +751,31 @@ namespace dlib
|
|||||||
backward_filters_workspace.reset();
|
backward_filters_workspace.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Given an array of cudnn algorithm performance results, like
|
||||||
|
// cudnnConvolutionFwdAlgoPerf_t, pick the best one to use.
|
||||||
|
template <typename T>
|
||||||
|
decltype(std::declval<T>().algo) pick_best_algorithm(const std::vector<T> &perf_results)
|
||||||
|
{
|
||||||
|
DLIB_CASSERT(!perf_results.empty());
|
||||||
|
CHECK_CUDNN(perf_results[0].status);
|
||||||
|
if (dnn_prefer_fastest_algorithms())
|
||||||
|
return perf_results[0].algo;
|
||||||
|
|
||||||
|
// Otherwise we find the algorithm that has a good status and uses the least amount
|
||||||
|
// of memory.
|
||||||
|
size_t best_memory = std::numeric_limits<size_t>::max();
|
||||||
|
decltype(std::declval<T>().algo) best_alg;
|
||||||
|
for (auto&& perf : perf_results)
|
||||||
|
{
|
||||||
|
if (perf.status == CUDNN_STATUS_SUCCESS && perf.memory < best_memory)
|
||||||
|
{
|
||||||
|
best_memory = perf.memory;
|
||||||
|
best_alg = perf.algo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return best_alg;
|
||||||
|
}
|
||||||
|
|
||||||
void tensor_conv::
|
void tensor_conv::
|
||||||
setup(
|
setup(
|
||||||
const tensor& data,
|
const tensor& data,
|
||||||
@ -841,6 +866,25 @@ namespace dlib
|
|||||||
// Pick which forward algorithm we will use and allocate the necessary
|
// Pick which forward algorithm we will use and allocate the necessary
|
||||||
// workspace buffer.
|
// workspace buffer.
|
||||||
cudnnConvolutionFwdAlgo_t forward_best_algo;
|
cudnnConvolutionFwdAlgo_t forward_best_algo;
|
||||||
|
#if CUDNN_MAJOR >= 8
|
||||||
|
{
|
||||||
|
int num_possilbe_algorithms = 0;
|
||||||
|
CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithmMaxCount(context(), &num_possilbe_algorithms));
|
||||||
|
std::vector<cudnnConvolutionFwdAlgoPerf_t> perf_results(num_possilbe_algorithms);
|
||||||
|
int num_algorithms = 0;
|
||||||
|
CHECK_CUDNN(cudnnFindConvolutionForwardAlgorithm(
|
||||||
|
context(),
|
||||||
|
descriptor(data),
|
||||||
|
(const cudnnFilterDescriptor_t)filter_handle,
|
||||||
|
(const cudnnConvolutionDescriptor_t)conv_handle,
|
||||||
|
descriptor(dest_desc),
|
||||||
|
num_possilbe_algorithms,
|
||||||
|
&num_algorithms,
|
||||||
|
perf_results.data()));
|
||||||
|
perf_results.resize(num_algorithms);
|
||||||
|
forward_best_algo = pick_best_algorithm(perf_results);
|
||||||
|
}
|
||||||
|
#else
|
||||||
CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithm(
|
CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithm(
|
||||||
context(),
|
context(),
|
||||||
descriptor(data),
|
descriptor(data),
|
||||||
@ -850,6 +894,7 @@ namespace dlib
|
|||||||
dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_FWD_PREFER_FASTEST:CUDNN_CONVOLUTION_FWD_NO_WORKSPACE,
|
dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_FWD_PREFER_FASTEST:CUDNN_CONVOLUTION_FWD_NO_WORKSPACE,
|
||||||
std::numeric_limits<size_t>::max(),
|
std::numeric_limits<size_t>::max(),
|
||||||
&forward_best_algo));
|
&forward_best_algo));
|
||||||
|
#endif
|
||||||
forward_algo = forward_best_algo;
|
forward_algo = forward_best_algo;
|
||||||
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(
|
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(
|
||||||
context(),
|
context(),
|
||||||
@ -863,6 +908,25 @@ namespace dlib
|
|||||||
// Pick which backward data algorithm we will use and allocate the
|
// Pick which backward data algorithm we will use and allocate the
|
||||||
// necessary workspace buffer.
|
// necessary workspace buffer.
|
||||||
cudnnConvolutionBwdDataAlgo_t backward_data_best_algo;
|
cudnnConvolutionBwdDataAlgo_t backward_data_best_algo;
|
||||||
|
#if CUDNN_MAJOR >= 8
|
||||||
|
{
|
||||||
|
int num_possilbe_algorithms = 0;
|
||||||
|
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(context(), &num_possilbe_algorithms));
|
||||||
|
std::vector<cudnnConvolutionBwdDataAlgoPerf_t> perf_results(num_possilbe_algorithms);
|
||||||
|
int num_algorithms = 0;
|
||||||
|
CHECK_CUDNN(cudnnFindConvolutionBackwardDataAlgorithm(
|
||||||
|
context(),
|
||||||
|
(const cudnnFilterDescriptor_t)filter_handle,
|
||||||
|
descriptor(dest_desc),
|
||||||
|
(const cudnnConvolutionDescriptor_t)conv_handle,
|
||||||
|
descriptor(data),
|
||||||
|
num_possilbe_algorithms,
|
||||||
|
&num_algorithms,
|
||||||
|
perf_results.data()));
|
||||||
|
perf_results.resize(num_algorithms);
|
||||||
|
backward_data_best_algo = pick_best_algorithm(perf_results);
|
||||||
|
}
|
||||||
|
#else
|
||||||
CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(
|
CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(
|
||||||
context(),
|
context(),
|
||||||
(const cudnnFilterDescriptor_t)filter_handle,
|
(const cudnnFilterDescriptor_t)filter_handle,
|
||||||
@ -872,6 +936,7 @@ namespace dlib
|
|||||||
dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST:CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE,
|
dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST:CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE,
|
||||||
std::numeric_limits<size_t>::max(),
|
std::numeric_limits<size_t>::max(),
|
||||||
&backward_data_best_algo));
|
&backward_data_best_algo));
|
||||||
|
#endif
|
||||||
backward_data_algo = backward_data_best_algo;
|
backward_data_algo = backward_data_best_algo;
|
||||||
|
|
||||||
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
|
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
|
||||||
@ -886,6 +951,25 @@ namespace dlib
|
|||||||
// Pick which backward filters algorithm we will use and allocate the
|
// Pick which backward filters algorithm we will use and allocate the
|
||||||
// necessary workspace buffer.
|
// necessary workspace buffer.
|
||||||
cudnnConvolutionBwdFilterAlgo_t backward_filters_best_algo;
|
cudnnConvolutionBwdFilterAlgo_t backward_filters_best_algo;
|
||||||
|
#if CUDNN_MAJOR >= 8
|
||||||
|
{
|
||||||
|
int num_possilbe_algorithms = 0;
|
||||||
|
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(context(), &num_possilbe_algorithms));
|
||||||
|
std::vector<cudnnConvolutionBwdFilterAlgoPerf_t> perf_results(num_possilbe_algorithms);
|
||||||
|
int num_algorithms = 0;
|
||||||
|
CHECK_CUDNN(cudnnFindConvolutionBackwardFilterAlgorithm(
|
||||||
|
context(),
|
||||||
|
descriptor(data),
|
||||||
|
descriptor(dest_desc),
|
||||||
|
(const cudnnConvolutionDescriptor_t)conv_handle,
|
||||||
|
(const cudnnFilterDescriptor_t)filter_handle,
|
||||||
|
num_possilbe_algorithms,
|
||||||
|
&num_algorithms,
|
||||||
|
perf_results.data()));
|
||||||
|
perf_results.resize(num_algorithms);
|
||||||
|
backward_filters_best_algo = pick_best_algorithm(perf_results);
|
||||||
|
}
|
||||||
|
#else
|
||||||
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(
|
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(
|
||||||
context(),
|
context(),
|
||||||
descriptor(data),
|
descriptor(data),
|
||||||
@ -895,6 +979,7 @@ namespace dlib
|
|||||||
dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST:CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE,
|
dnn_prefer_fastest_algorithms()?CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST:CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE,
|
||||||
std::numeric_limits<size_t>::max(),
|
std::numeric_limits<size_t>::max(),
|
||||||
&backward_filters_best_algo));
|
&backward_filters_best_algo));
|
||||||
|
#endif
|
||||||
// cuDNN 5.1 has a bug that causes
|
// cuDNN 5.1 has a bug that causes
|
||||||
// cudnnGetConvolutionBackwardFilterAlgorithm() to pick the winograd
|
// cudnnGetConvolutionBackwardFilterAlgorithm() to pick the winograd
|
||||||
// algorithm even for cases where cuDNN doesn't support it, leading to
|
// algorithm even for cases where cuDNN doesn't support it, leading to
|
||||||
|
Loading…
Reference in New Issue
Block a user