Cleanup of cuda code.

This commit is contained in:
Davis King 2018-04-04 10:55:41 -04:00
parent 8073f4b164
commit 0ce6ed5beb
4 changed files with 26 additions and 4 deletions

View File

@ -160,8 +160,12 @@ namespace dlib
cuda_data_void_ptr get(size_t size)
/*!
ensures
- This object will return the buffer of requested size of larger
- This object will return the buffer of requested size or larger.
- buffer.size() >= size
- Client code should not hold the returned cuda_data_void_ptr for long
durations, but instead should call get() whenever the buffer is
needed. Doing so ensures that multiple buffers are not kept around
in the event of a resize.
!*/
{
if (buffer.size() < size)

View File

@ -160,12 +160,12 @@ namespace dlib
std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
};
static std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
{
thread_local cudnn_device_buffer buffer;
return buffer.get_buffer();
}
// ------------------------------------------------------------------------------------
class cudnn_activation_descriptor

View File

@ -17,6 +17,24 @@ namespace dlib
namespace cuda
{
// ----------------------------------------------------------------------------------------
std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
);
/*!
ensures
- Returns a pointer to a globally shared CUDA memory buffer on the
currently selected CUDA device. The buffer is also thread local. So
each host thread will get its own buffer. You can use this global buffer
as scratch space for CUDA computations that all take place on the default
stream. Using it in this way ensures that there aren't any race conditions
involving the use of the buffer.
- The global buffer is deallocated once all references to it are
destructed. It will be reallocated as required. So if you want to avoid
these reallocations then hold a copy of the shared_ptr returned by this
function.
!*/
// -----------------------------------------------------------------------------------
class tensor_descriptor

View File

@ -366,7 +366,7 @@ namespace dlib
follows:
ensures
- calling clean() Causes this object to forget about everything except its
- calling clean() causes this object to forget about everything except its
parameters. This is useful if your layer caches information between
forward and backward passes and you want to clean out that cache
information before saving the network to disk.