More cleanup

2024-11-01 10:14:53 +08:00 · 2018-04-04 11:30:16 -04:00 · 2018-04-04 11:30:16 -04:00 · bb8e0bc8b7
commit bb8e0bc8b7
parent 0ce6ed5beb
4 changed files with 69 additions and 67 deletions
--- a/dlib/dnn/cuda_data_ptr.cpp
+++ b/dlib/dnn/cuda_data_ptr.cpp
@ -59,6 +59,55 @@ namespace dlib
            }
        }

+    // ------------------------------------------------------------------------------------
+
+        class cudnn_device_buffer
+        {
+        public:
+            // not copyable
+            cudnn_device_buffer(const cudnn_device_buffer&) = delete;
+            cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
+
+            cudnn_device_buffer()
+            {
+                buffers.resize(16);
+            }
+            ~cudnn_device_buffer()
+            {
+            }
+
+            std::shared_ptr<resizable_cuda_buffer> get_buffer (
+            )
+            {
+                int new_device_id;
+                CHECK_CUDA(cudaGetDevice(&new_device_id));
+                // make room for more devices if needed
+                if (new_device_id >= (long)buffers.size())
+                    buffers.resize(new_device_id+16);
+
+                // If we don't have a buffer already for this device then make one
+                std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
+                if (!buff)
+                {
+                    buff = std::make_shared<resizable_cuda_buffer>();
+                    buffers[new_device_id] = buff;
+                }
+
+                // Finally, return the buffer for the current device
+                return buff;
+            }
+
+        private:
+
+            std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
+        };
+
+        std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
+        {
+            thread_local cudnn_device_buffer buffer;
+            return buffer.get_buffer();
+        }
+
    // ------------------------------------------------------------------------------------

    }  
--- a/dlib/dnn/cuda_data_ptr.h
+++ b/dlib/dnn/cuda_data_ptr.h
@ -179,6 +179,26 @@ namespace dlib
            cuda_data_void_ptr buffer;
        };

+    // ----------------------------------------------------------------------------------------
+
+        std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
+        );
+        /*!
+            ensures
+                - Returns a pointer to a globally shared CUDA memory buffer on the
+                  currently selected CUDA device.  The buffer is also thread local.  So
+                  each host thread will get its own buffer.  You can use this global buffer
+                  as scratch space for CUDA computations that all take place on the default
+                  stream.  Using it in this way ensures that there aren't any race conditions
+                  involving the use of the buffer.
+                - The global buffer is deallocated once all references to it are
+                  destructed.  It will be reallocated as required.  So if you want to avoid
+                  these reallocations then hold a copy of the shared_ptr returned by this
+                  function.
+        !*/
+
+    // ----------------------------------------------------------------------------------------
+
    }
 }

--- a/dlib/dnn/cudnn_dlibapi.cpp
+++ b/dlib/dnn/cudnn_dlibapi.cpp
@ -117,55 +117,6 @@ namespace dlib
            thread_local cudnn_context c;
            return c.get_handle();
        }
-    // ------------------------------------------------------------------------------------
-
-        class cudnn_device_buffer
-        {
-        public:
-            // not copyable
-            cudnn_device_buffer(const cudnn_device_buffer&) = delete;
-            cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
-
-            cudnn_device_buffer()
-            {
-                buffers.resize(16);
-            }
-            ~cudnn_device_buffer()
-            {
-            }
-
-            std::shared_ptr<resizable_cuda_buffer> get_buffer (
-            )
-            {
-                int new_device_id;
-                CHECK_CUDA(cudaGetDevice(&new_device_id));
-                // make room for more devices if needed
-                if (new_device_id >= (long)buffers.size())
-                    buffers.resize(new_device_id+16);
-
-                // If we don't have a buffer already for this device then make one
-                std::shared_ptr<resizable_cuda_buffer> buff = buffers[new_device_id].lock();
-                if (!buff)
-                {
-                    buff = std::make_shared<resizable_cuda_buffer>();
-                    buffers[new_device_id] = buff;
-                }
-
-                // Finally, return the buffer for the current device
-                return buff;
-            }
-
-        private:
-
-            std::vector<std::weak_ptr<resizable_cuda_buffer>> buffers;
-        };
-
-        std::shared_ptr<resizable_cuda_buffer> device_global_buffer()
-        {
-            thread_local cudnn_device_buffer buffer;
-            return buffer.get_buffer();
-        }
-
    // ------------------------------------------------------------------------------------

        class cudnn_activation_descriptor
--- a/dlib/dnn/cudnn_dlibapi.h
+++ b/dlib/dnn/cudnn_dlibapi.h
@ -17,24 +17,6 @@ namespace dlib
    namespace cuda 
    {

-    // ----------------------------------------------------------------------------------------
-
-        std::shared_ptr<resizable_cuda_buffer> device_global_buffer(
-        );
-        /*!
-            ensures
-                - Returns a pointer to a globally shared CUDA memory buffer on the
-                  currently selected CUDA device.  The buffer is also thread local.  So
-                  each host thread will get its own buffer.  You can use this global buffer
-                  as scratch space for CUDA computations that all take place on the default
-                  stream.  Using it in this way ensures that there aren't any race conditions
-                  involving the use of the buffer.
-                - The global buffer is deallocated once all references to it are
-                  destructed.  It will be reallocated as required.  So if you want to avoid
-                  these reallocations then hold a copy of the shared_ptr returned by this
-                  function.
-        !*/
-
    // -----------------------------------------------------------------------------------

        class tensor_descriptor