Problem: Error while calling cudnnGetConvolutionForwardWorkspaceSize( context(), descriptor(data), (const cudnnFilterDescriptor_t)filter_handle, (const cudnnConvolutionDescriptor_t)conv_handle, descriptor(dest_desc), (cudnnConvolutionFwdAlgo_t)forward_algo, &forward_workspace_size_in_bytes) in file C:\a\2\s\3rdparty\dlib\dlib\cuda\cudnn_dlibapi.cpp:1029. code: 9, reason: CUDNN_STATUS_NOT_SUPPORTED (#2532)

Solution: when this happens, select the best algorithms again - but this time bypassing the cache
This commit is contained in:
Juha Reunanen 2022-03-03 14:18:35 +02:00 committed by GitHub
parent 5f7e19b785
commit bf427f56c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 56 additions and 34 deletions

View File

@ -787,10 +787,11 @@ namespace dlib
void tensor_conv::
select_best_algorithms (
const tensor& data,
const tensor_descriptor& dest_desc
const tensor_descriptor& dest_desc,
allow_cache_use allow_cache_use
)
{
// Calling the cuDNN "find the best algorithm" functions are really slow. So we keep a
// Calling the cuDNN "find the best algorithm" functions is really slow. So we keep a
// cache that tells us what method was best for a particular configuration.
thread_local std::map<std::tuple<int,int,int,int,long,long>,
std::tuple<int,int,int>> config_to_algo_cache;
@ -799,7 +800,7 @@ namespace dlib
// the cache.
const auto cache_key = std::make_tuple(stride_y, stride_x, padding_y, padding_x, filters_nr, filters_nc);
const auto iter = config_to_algo_cache.find(cache_key);
if (iter != config_to_algo_cache.end())
if (iter != config_to_algo_cache.end() && allow_cache_use == allow_cache_use::yes)
{
std::tie(forward_algo, backward_data_algo, backward_filters_algo) = iter->second;
return;
@ -933,6 +934,40 @@ namespace dlib
config_to_algo_cache[cache_key] = std::make_tuple(forward_algo, backward_data_algo, backward_filters_algo);
}
void tensor_conv::
update_convolution_data_workspace_sizes(
const tensor& data,
const tensor_descriptor& dest_desc
)
{
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(
context(),
descriptor(data),
(const cudnnFilterDescriptor_t)filter_handle,
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(dest_desc),
(cudnnConvolutionFwdAlgo_t)forward_algo,
&forward_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
context(),
(const cudnnFilterDescriptor_t)filter_handle,
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(data),
(cudnnConvolutionBwdDataAlgo_t)backward_data_algo,
&backward_data_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(
context(),
descriptor(data),
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
(const cudnnFilterDescriptor_t)filter_handle,
(cudnnConvolutionBwdFilterAlgo_t)backward_filters_algo,
&backward_filters_workspace_size_in_bytes));
}
void tensor_conv::
setup(
const tensor& data,
@ -1021,36 +1056,18 @@ namespace dlib
tensor_descriptor dest_desc;
dest_desc.set_size(out_num_samples,out_k,out_nr,out_nc);
select_best_algorithms(data, dest_desc);
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(
context(),
descriptor(data),
(const cudnnFilterDescriptor_t)filter_handle,
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(dest_desc),
(cudnnConvolutionFwdAlgo_t)forward_algo,
&forward_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
context(),
(const cudnnFilterDescriptor_t)filter_handle,
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
descriptor(data),
(cudnnConvolutionBwdDataAlgo_t)backward_data_algo,
&backward_data_workspace_size_in_bytes));
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(
context(),
descriptor(data),
descriptor(dest_desc),
(const cudnnConvolutionDescriptor_t)conv_handle,
(const cudnnFilterDescriptor_t)filter_handle,
(cudnnConvolutionBwdFilterAlgo_t)backward_filters_algo,
&backward_filters_workspace_size_in_bytes));
try
{
select_best_algorithms(data, dest_desc, allow_cache_use::yes);
update_convolution_data_workspace_sizes(data, dest_desc);
}
catch (dlib::cudnn_error&)
{
// Sometimes the values stored in `config_to_algo_cache` do not quite work -
// so let's get a fresh estimate, instead of using a cached value.
select_best_algorithms(data, dest_desc, allow_cache_use::no);
update_convolution_data_workspace_sizes(data, dest_desc);
}
}
catch(...)
{

View File

@ -254,15 +254,20 @@ namespace dlib
int out_nr;
int out_nc;
enum class allow_cache_use { no, yes };
// sets the three _algo fields.
void select_best_algorithms(const tensor& data, const tensor_descriptor& dest_desc);
void select_best_algorithms(const tensor& data, const tensor_descriptor& dest_desc, allow_cache_use allow_cache_use);
int forward_algo;
int backward_data_algo;
int backward_filters_algo;
// sets the three _workspace_size_in_bytes fields.
void update_convolution_data_workspace_sizes(const tensor& data, const tensor_descriptor& dest_desc);
size_t forward_workspace_size_in_bytes;
size_t backward_data_workspace_size_in_bytes;
size_t backward_filters_workspace_size_in_bytes;
cuda_data_void_ptr forward_workspace;
cuda_data_void_ptr backward_data_workspace;
cuda_data_void_ptr backward_filters_workspace;