Cleanup of cuda code.

2024-11-01 10:14:53 +08:00 · 2018-04-04 10:55:41 -04:00 · 2018-04-04 10:55:41 -04:00 · 0ce6ed5beb
commit 0ce6ed5beb
parent 8073f4b164
4 changed files with 26 additions and 4 deletions
--- a/dlib/dnn/cuda_data_ptr.h
+++ b/dlib/dnn/cuda_data_ptr.h
@ -160,8 +160,12 @@ namespace dlib
            cuda_data_void_ptr get(size_t size)
            /*!
                ensures
-                    - This object will return the buffer of requested size of larger
+                    - This object will return the buffer of requested size or larger.
                    - buffer.size() >= size
                    - Client code should not hold the returned cuda_data_void_ptr for long
                      durations, but instead should call get() whenever the buffer is
                      needed.  Doing so ensures that multiple buffers are not kept around
                      in the event of a resize.
            !*/
            {
                if (buffer.size() < size)
--- a/dlib/dnn/cudnn_dlibapi.cpp
+++ b/dlib/dnn/cudnn_dlibapi.cpp
@ -160,12 +160,12 @@ namespace dlib
            std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
        };
-
+        std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
        static std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
        {
            thread_local cudnn_device_buffer buffer;
            return buffer.get_buffer();
        }
    // ------------------------------------------------------------------------------------
        class cudnn_activation_descriptor
--- a/dlib/dnn/cudnn_dlibapi.h
+++ b/dlib/dnn/cudnn_dlibapi.h
@ -17,6 +17,24 @@ namespace dlib
    namespace cuda 
    {
    // ----------------------------------------------------------------------------------------
        std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
        );
        /*!
            ensures
                - Returns a pointer to a globally shared CUDA memory buffer on the
                  currently selected CUDA device.  The buffer is also thread local.  So
                  each host thread will get its own buffer.  You can use this global buffer
                  as scratch space for CUDA computations that all take place on the default
                  stream.  Using it in this way ensures that there aren't any race conditions
                  involving the use of the buffer.
                - The global buffer is deallocated once all references to it are
                  destructed.  It will be reallocated as required.  So if you want to avoid
                  these reallocations then hold a copy of the shared_ptr returned by this
                  function.
        !*/
    // -----------------------------------------------------------------------------------
        class tensor_descriptor
--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@ -366,7 +366,7 @@ namespace dlib
            follows:
            ensures
-                - calling clean() Causes this object to forget about everything except its
+                - calling clean() causes this object to forget about everything except its
                  parameters.  This is useful if your layer caches information between
                  forward and backward passes and you want to clean out that cache
                  information before saving the network to disk.