mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
More cleanup
This commit is contained in:
parent
0ce6ed5beb
commit
bb8e0bc8b7
@ -59,6 +59,55 @@ namespace dlib
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
class cudnn_device_buffer
|
||||
{
|
||||
public:
|
||||
// not copyable
|
||||
cudnn_device_buffer(const cudnn_device_buffer&) = delete;
|
||||
cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
|
||||
|
||||
cudnn_device_buffer()
|
||||
{
|
||||
buffers.resize(16);
|
||||
}
|
||||
~cudnn_device_buffer()
|
||||
{
|
||||
}
|
||||
|
||||
std::shared_ptr<resizable_cuda_buffer> get_buffer (
|
||||
)
|
||||
{
|
||||
int new_device_id;
|
||||
CHECK_CUDA(cudaGetDevice(&new_device_id));
|
||||
// make room for more devices if needed
|
||||
if (new_device_id >= (long)buffers.size())
|
||||
buffers.resize(new_device_id+16);
|
||||
|
||||
// If we don't have a buffer already for this device then make one
|
||||
std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
|
||||
if (!buff)
|
||||
{
|
||||
buff = std::make_shared<resizable_cuda_buffer>();
|
||||
buffers[new_device_id] = buff;
|
||||
}
|
||||
|
||||
// Finally, return the buffer for the current device
|
||||
return buff;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
|
||||
};
|
||||
|
||||
std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
|
||||
{
|
||||
thread_local cudnn_device_buffer buffer;
|
||||
return buffer.get_buffer();
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -179,6 +179,26 @@ namespace dlib
|
||||
cuda_data_void_ptr buffer;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Returns a pointer to a globally shared CUDA memory buffer on the
|
||||
currently selected CUDA device. The buffer is also thread local. So
|
||||
each host thread will get its own buffer. You can use this global buffer
|
||||
as scratch space for CUDA computations that all take place on the default
|
||||
stream. Using it in this way ensures that there aren't any race conditions
|
||||
involving the use of the buffer.
|
||||
- The global buffer is deallocated once all references to it are
|
||||
destructed. It will be reallocated as required. So if you want to avoid
|
||||
these reallocations then hold a copy of the shared_ptr returned by this
|
||||
function.
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,55 +117,6 @@ namespace dlib
|
||||
thread_local cudnn_context c;
|
||||
return c.get_handle();
|
||||
}
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
class cudnn_device_buffer
|
||||
{
|
||||
public:
|
||||
// not copyable
|
||||
cudnn_device_buffer(const cudnn_device_buffer&) = delete;
|
||||
cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
|
||||
|
||||
cudnn_device_buffer()
|
||||
{
|
||||
buffers.resize(16);
|
||||
}
|
||||
~cudnn_device_buffer()
|
||||
{
|
||||
}
|
||||
|
||||
std::shared_ptr<resizable_cuda_buffer> get_buffer (
|
||||
)
|
||||
{
|
||||
int new_device_id;
|
||||
CHECK_CUDA(cudaGetDevice(&new_device_id));
|
||||
// make room for more devices if needed
|
||||
if (new_device_id >= (long)buffers.size())
|
||||
buffers.resize(new_device_id+16);
|
||||
|
||||
// If we don't have a buffer already for this device then make one
|
||||
std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
|
||||
if (!buff)
|
||||
{
|
||||
buff = std::make_shared<resizable_cuda_buffer>();
|
||||
buffers[new_device_id] = buff;
|
||||
}
|
||||
|
||||
// Finally, return the buffer for the current device
|
||||
return buff;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
|
||||
};
|
||||
|
||||
std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
|
||||
{
|
||||
thread_local cudnn_device_buffer buffer;
|
||||
return buffer.get_buffer();
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
class cudnn_activation_descriptor
|
||||
|
@ -17,24 +17,6 @@ namespace dlib
|
||||
namespace cuda
|
||||
{
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
|
||||
);
|
||||
/*!
|
||||
ensures
|
||||
- Returns a pointer to a globally shared CUDA memory buffer on the
|
||||
currently selected CUDA device. The buffer is also thread local. So
|
||||
each host thread will get its own buffer. You can use this global buffer
|
||||
as scratch space for CUDA computations that all take place on the default
|
||||
stream. Using it in this way ensures that there aren't any race conditions
|
||||
involving the use of the buffer.
|
||||
- The global buffer is deallocated once all references to it are
|
||||
destructed. It will be reallocated as required. So if you want to avoid
|
||||
these reallocations then hold a copy of the shared_ptr returned by this
|
||||
function.
|
||||
!*/
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
|
||||
class tensor_descriptor
|
||||
|
Loading…
Reference in New Issue
Block a user