mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Problem: Error while calling cudnnGetConvolutionForwardWorkspaceSize( context(), descriptor(data), (const cudnnFilterDescriptor_t)filter_handle, (const cudnnConvolutionDescriptor_t)conv_handle, descriptor(dest_desc), (cudnnConvolutionFwdAlgo_t)forward_algo, &forward_workspace_size_in_bytes) in file C:\a\2\s\3rdparty\dlib\dlib\cuda\cudnn_dlibapi.cpp:1029. code: 9, reason: CUDNN_STATUS_NOT_SUPPORTED
(#2532)
Solution: when this happens, select the best algorithms again - but this time bypassing the cache
This commit is contained in:
parent
5f7e19b785
commit
bf427f56c7
@ -787,10 +787,11 @@ namespace dlib
|
||||
void tensor_conv::
|
||||
select_best_algorithms (
|
||||
const tensor& data,
|
||||
const tensor_descriptor& dest_desc
|
||||
const tensor_descriptor& dest_desc,
|
||||
allow_cache_use allow_cache_use
|
||||
)
|
||||
{
|
||||
// Calling the cuDNN "find the best algorithm" functions are really slow. So we keep a
|
||||
// Calling the cuDNN "find the best algorithm" functions is really slow. So we keep a
|
||||
// cache that tells us what method was best for a particular configuration.
|
||||
thread_local std::map<std::tuple<int,int,int,int,long,long>,
|
||||
std::tuple<int,int,int>> config_to_algo_cache;
|
||||
@ -799,7 +800,7 @@ namespace dlib
|
||||
// the cache.
|
||||
const auto cache_key = std::make_tuple(stride_y, stride_x, padding_y, padding_x, filters_nr, filters_nc);
|
||||
const auto iter = config_to_algo_cache.find(cache_key);
|
||||
if (iter != config_to_algo_cache.end())
|
||||
if (iter != config_to_algo_cache.end() && allow_cache_use == allow_cache_use::yes)
|
||||
{
|
||||
std::tie(forward_algo, backward_data_algo, backward_filters_algo) = iter->second;
|
||||
return;
|
||||
@ -933,6 +934,40 @@ namespace dlib
|
||||
config_to_algo_cache[cache_key] = std::make_tuple(forward_algo, backward_data_algo, backward_filters_algo);
|
||||
}
|
||||
|
||||
void tensor_conv::
|
||||
update_convolution_data_workspace_sizes(
|
||||
const tensor& data,
|
||||
const tensor_descriptor& dest_desc
|
||||
)
|
||||
{
|
||||
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(
|
||||
context(),
|
||||
descriptor(data),
|
||||
(const cudnnFilterDescriptor_t)filter_handle,
|
||||
(const cudnnConvolutionDescriptor_t)conv_handle,
|
||||
descriptor(dest_desc),
|
||||
(cudnnConvolutionFwdAlgo_t)forward_algo,
|
||||
&forward_workspace_size_in_bytes));
|
||||
|
||||
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
|
||||
context(),
|
||||
(const cudnnFilterDescriptor_t)filter_handle,
|
||||
descriptor(dest_desc),
|
||||
(const cudnnConvolutionDescriptor_t)conv_handle,
|
||||
descriptor(data),
|
||||
(cudnnConvolutionBwdDataAlgo_t)backward_data_algo,
|
||||
&backward_data_workspace_size_in_bytes));
|
||||
|
||||
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(
|
||||
context(),
|
||||
descriptor(data),
|
||||
descriptor(dest_desc),
|
||||
(const cudnnConvolutionDescriptor_t)conv_handle,
|
||||
(const cudnnFilterDescriptor_t)filter_handle,
|
||||
(cudnnConvolutionBwdFilterAlgo_t)backward_filters_algo,
|
||||
&backward_filters_workspace_size_in_bytes));
|
||||
}
|
||||
|
||||
void tensor_conv::
|
||||
setup(
|
||||
const tensor& data,
|
||||
@ -1021,36 +1056,18 @@ namespace dlib
|
||||
tensor_descriptor dest_desc;
|
||||
dest_desc.set_size(out_num_samples,out_k,out_nr,out_nc);
|
||||
|
||||
select_best_algorithms(data, dest_desc);
|
||||
|
||||
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(
|
||||
context(),
|
||||
descriptor(data),
|
||||
(const cudnnFilterDescriptor_t)filter_handle,
|
||||
(const cudnnConvolutionDescriptor_t)conv_handle,
|
||||
descriptor(dest_desc),
|
||||
(cudnnConvolutionFwdAlgo_t)forward_algo,
|
||||
&forward_workspace_size_in_bytes));
|
||||
|
||||
|
||||
CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(
|
||||
context(),
|
||||
(const cudnnFilterDescriptor_t)filter_handle,
|
||||
descriptor(dest_desc),
|
||||
(const cudnnConvolutionDescriptor_t)conv_handle,
|
||||
descriptor(data),
|
||||
(cudnnConvolutionBwdDataAlgo_t)backward_data_algo,
|
||||
&backward_data_workspace_size_in_bytes));
|
||||
|
||||
|
||||
CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(
|
||||
context(),
|
||||
descriptor(data),
|
||||
descriptor(dest_desc),
|
||||
(const cudnnConvolutionDescriptor_t)conv_handle,
|
||||
(const cudnnFilterDescriptor_t)filter_handle,
|
||||
(cudnnConvolutionBwdFilterAlgo_t)backward_filters_algo,
|
||||
&backward_filters_workspace_size_in_bytes));
|
||||
try
|
||||
{
|
||||
select_best_algorithms(data, dest_desc, allow_cache_use::yes);
|
||||
update_convolution_data_workspace_sizes(data, dest_desc);
|
||||
}
|
||||
catch (dlib::cudnn_error&)
|
||||
{
|
||||
// Sometimes the values stored in `config_to_algo_cache` do not quite work -
|
||||
// so let's get a fresh estimate, instead of using a cached value.
|
||||
select_best_algorithms(data, dest_desc, allow_cache_use::no);
|
||||
update_convolution_data_workspace_sizes(data, dest_desc);
|
||||
}
|
||||
}
|
||||
catch(...)
|
||||
{
|
||||
|
@ -254,15 +254,20 @@ namespace dlib
|
||||
int out_nr;
|
||||
int out_nc;
|
||||
|
||||
enum class allow_cache_use { no, yes };
|
||||
|
||||
// sets the three _algo fields.
|
||||
void select_best_algorithms(const tensor& data, const tensor_descriptor& dest_desc);
|
||||
void select_best_algorithms(const tensor& data, const tensor_descriptor& dest_desc, allow_cache_use allow_cache_use);
|
||||
int forward_algo;
|
||||
int backward_data_algo;
|
||||
int backward_filters_algo;
|
||||
|
||||
// sets the three _workspace_size_in_bytes fields.
|
||||
void update_convolution_data_workspace_sizes(const tensor& data, const tensor_descriptor& dest_desc);
|
||||
size_t forward_workspace_size_in_bytes;
|
||||
size_t backward_data_workspace_size_in_bytes;
|
||||
size_t backward_filters_workspace_size_in_bytes;
|
||||
|
||||
cuda_data_void_ptr forward_workspace;
|
||||
cuda_data_void_ptr backward_data_workspace;
|
||||
cuda_data_void_ptr backward_filters_workspace;
|
||||
|
Loading…
Reference in New Issue
Block a user