More cleanup

This commit is contained in:
Davis King 2018-04-04 11:30:16 -04:00
parent 0ce6ed5beb
commit bb8e0bc8b7
4 changed files with 69 additions and 67 deletions

View File

@ -59,6 +59,55 @@ namespace dlib
}
}
// ------------------------------------------------------------------------------------
class cudnn_device_buffer
{
public:
// not copyable
cudnn_device_buffer(const cudnn_device_buffer&) = delete;
cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
cudnn_device_buffer()
{
buffers.resize(16);
}
~cudnn_device_buffer()
{
}
std::shared_ptr<resizable_cuda_buffer> get_buffer (
)
{
int new_device_id;
CHECK_CUDA(cudaGetDevice(&new_device_id));
// make room for more devices if needed
if (new_device_id >= (long)buffers.size())
buffers.resize(new_device_id+16);
// If we don't have a buffer already for this device then make one
std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
if (!buff)
{
buff = std::make_shared<resizable_cuda_buffer>();
buffers[new_device_id] = buff;
}
// Finally, return the buffer for the current device
return buff;
}
private:
std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
};
std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
{
thread_local cudnn_device_buffer buffer;
return buffer.get_buffer();
}
// ------------------------------------------------------------------------------------
}

View File

@ -179,6 +179,26 @@ namespace dlib
cuda_data_void_ptr buffer;
};
// ----------------------------------------------------------------------------------------
std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
);
/*!
ensures
- Returns a pointer to a globally shared CUDA memory buffer on the
currently selected CUDA device. The buffer is also thread local. So
each host thread will get its own buffer. You can use this global buffer
as scratch space for CUDA computations that all take place on the default
stream. Using it in this way ensures that there aren't any race conditions
involving the use of the buffer.
- The global buffer is deallocated once all references to it are
destructed. It will be reallocated as required. So if you want to avoid
these reallocations then hold a copy of the shared_ptr returned by this
function.
!*/
// ----------------------------------------------------------------------------------------
}
}

View File

@ -117,55 +117,6 @@ namespace dlib
thread_local cudnn_context c;
return c.get_handle();
}
// ------------------------------------------------------------------------------------
class cudnn_device_buffer
{
public:
// not copyable
cudnn_device_buffer(const cudnn_device_buffer&) = delete;
cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
cudnn_device_buffer()
{
buffers.resize(16);
}
~cudnn_device_buffer()
{
}
std::shared_ptr<resizable_cuda_buffer> get_buffer (
)
{
int new_device_id;
CHECK_CUDA(cudaGetDevice(&new_device_id));
// make room for more devices if needed
if (new_device_id >= (long)buffers.size())
buffers.resize(new_device_id+16);
// If we don't have a buffer already for this device then make one
std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
if (!buff)
{
buff = std::make_shared<resizable_cuda_buffer>();
buffers[new_device_id] = buff;
}
// Finally, return the buffer for the current device
return buff;
}
private:
std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
};
std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
{
thread_local cudnn_device_buffer buffer;
return buffer.get_buffer();
}
// ------------------------------------------------------------------------------------
class cudnn_activation_descriptor

View File

@ -17,24 +17,6 @@ namespace dlib
namespace cuda
{
// ----------------------------------------------------------------------------------------
std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
);
/*!
ensures
- Returns a pointer to a globally shared CUDA memory buffer on the
currently selected CUDA device. The buffer is also thread local. So
each host thread will get its own buffer. You can use this global buffer
as scratch space for CUDA computations that all take place on the default
stream. Using it in this way ensures that there aren't any race conditions
involving the use of the buffer.
- The global buffer is deallocated once all references to it are
destructed. It will be reallocated as required. So if you want to avoid
these reallocations then hold a copy of the shared_ptr returned by this
function.
!*/
// -----------------------------------------------------------------------------------
class tensor_descriptor