updated example

2024-11-01 10:14:53 +08:00 · 2013-03-03 12:05:14 -05:00 · 2013-03-03 12:05:14 -05:00 · 649ed2f160
commit 649ed2f160
parent 1f8f1261e3
1 changed files with 204 additions and 43 deletions
--- a/examples/parallel_for_ex.cpp
+++ b/examples/parallel_for_ex.cpp
@ -1,10 +1,15 @@
 // The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
 /*

-    This is an example illustrating the use of the parallel for loop
-    tools from the dlib C++ Library.
-
+    This is an example illustrating the use of the parallel for loop tools from the dlib
+    C++ Library.

+    Normally, a for loop executes the body of the loop in a serial manner.  This means
+    that, for example, if it takes 1 second to execute the body of the loop and the loop
+    body needs to execute 10 times then it will take 10 seconds to execute the entire loop.
+    However, on modern multi-core computers we have the opportunity to speed this up by
+    executing multiple steps of a for loop in parallel.  This example program will walk you
+    though a few examples showing how to do just that.  
 */


@ -16,56 +21,212 @@
 using namespace dlib;
 using namespace std;

-struct function_object
+// ----------------------------------------------------------------------------------------
+
+void print(const std::vector<int>& vect)
 {
-    function_object( std::vector<int>& vect ) : vect1(vect) {}
-
-    std::vector<int>& vect1;
-
-    void operator() (long i) const
+    for (unsigned long i = 0; i < vect.size(); ++i)
    {
-        vect1[i] = i;
-        dlib::sleep(100);
+        cout << vect[i] << endl;
    }
-};
+    cout << "\n**************************************\n";
+}
+
+// ----------------------------------------------------------------------------------------
+
+void example_using_regular_non_parallel_loops();
+void example_using_lambda_functions();
+void example_without_using_lambda_functions();
+
+// ----------------------------------------------------------------------------------------

 int main()
 {
+    // We have 3 examples, each contained in a separate function.  Each example performs
+    // exactly the same computation, however, the second two do so using parallel for
+    // loops.  So the first example is here to show you what we are doing in terms of
+    // classical non-parallel for loops.  Then the next two examples will illustrate two
+    // ways to write parallelize the for loops in C++.  The first, and simplest way, uses
+    // C++11 lambda functions.  Since lambda functions are a relatively recent addition to
+    // C++ we also show how to write parallel for loops without using lambda functions.
+    // This way, users who don't yet have access to a current C++ compiler can learn to
+    // write parallel for loops as well.

-    const unsigned long num_threads = 4;
-    
-    std::vector<int> vect1(10);
-    parallel_for(num_threads, 0, vect1.size(), function_object(vect1));
-
-    for (unsigned long i = 0; i < vect1.size(); ++i)
-        cout << vect1[i] << endl;
-    cout << "\n**************************************\n";
-
-    vect1.assign(10, -1);
-    parallel_for(num_threads, 1, 5, function_object(vect1));
-    for (unsigned long i = 0; i < vect1.size(); ++i)
-        cout << vect1[i] << endl;
-    cout << "\n**************************************\n";
-
-
-// uncomment this line if your compiler supports the new C++0x lambda functions
-#define COMPILER_SUPPORTS_CPP0X_LAMBDA_FUNCTIONS
-#ifdef COMPILER_SUPPORTS_CPP0X_LAMBDA_FUNCTIONS
-
-    std::vector<int> vect2(10);
-    parallel_for(num_threads, 0, vect2.size(), [&](long i){
-        vect2[i] = i;
-        dlib::sleep(100);
-    });
-
-    for (unsigned long i = 0; i < vect2.size(); ++i)
-        cout << vect2[i] << endl;
-
-#endif
-
+    example_using_regular_non_parallel_loops();
+    example_using_lambda_functions();
+    example_without_using_lambda_functions();
 }

+// ----------------------------------------------------------------------------------------
+
+void example_using_regular_non_parallel_loops()
+{
+    cout << "\nExample using regular non-parallel for loops\n" << endl;
+
+    std::vector<int> vect;
+
+    // put 10 elements into vect which are all equal to -1
+    vect.assign(10, -1);
+
+    // Now set each element equal to its index value.  We put a sleep call in here so that
+    // when we run the same thing with a parallel for loop later on you will be able to
+    // observe the speedup. 
+    for (unsigned long i = 0; i < vect.size(); ++i)
+    {
+        vect[i] = i;
+        dlib::sleep(1000); // sleep for 1 second
+    }
+    print(vect);



+    vect.assign(10, -1);
+    for (unsigned long i = 1; i < 5; ++i)
+    {
+        vect[i] = i;
+        dlib::sleep(1000);
+    }
+    print(vect);
+
+
+
+    int sum = 0;
+    vect.assign(10, 2);
+    for (unsigned long i = 0; i < vect.size(); ++i)
+    {
+        dlib::sleep(1000);
+        sum += vect[i];
+    }
+
+    cout << "sum: "<< sum << endl;
+}
+
+// ----------------------------------------------------------------------------------------
+
+void example_using_lambda_functions()
+{
+// Change the next line to #if 1 if your compiler supports the new C++11 lambda functions. 
+#if 0
+    cout << "\nExample using parallel for loops\n" << endl;
+
+    // This variable should be set to the number of processing cores on your computer since
+    // it determines the amount of parallelism in the for loop.  
+    const unsigned long num_threads = 10;
+
+    std::vector<int> vect;
+
+    vect.assign(10, -1);
+    parallel_for(num_threads, 0, vect.size(), [&](long i){
+        // The i variable is the loop counter as in a normal for loop.  So we simply need
+        // to place the body of the for loop right here and we get the same thing.  The
+        // range for the for loop is determined by the 2nd and 3rd arguments to
+        // parallel_for().
+        vect[i] = i;
+        dlib::sleep(1000);
+    });
+    print(vect);
+
+
+    vect.assign(10, -1);
+    parallel_for(num_threads, 1, 5, [&](long i){
+        vect[i] = i;
+        dlib::sleep(1000);
+    });
+    print(vect);
+
+
+    // Note that things become a little more complex if the loop bodies are not totally
+    // independent.  In the first two cases each iteration of the loop touched different
+    // memory locations, so we didn't need to use any kind of thread synchronization.
+    // However, in the summing loop we need to add some synchronization to protect the sum
+    // variable.  This is easy accomplished by creating a mutex and locking it before
+    // adding to sum.  More generally, you must ensure that the bodies of your parallel for
+    // loops are thread safe using whatever means is appropriate for your code.  Since a
+    // parallel for loop is implemented using threads, all the usual techniques for
+    // ensuring thread safety can be used. 
+    int sum = 0;
+    mutex m;
+    vect.assign(10, 2);
+    parallel_for(num_threads, 0, vect.size(), [&](long i){
+        // The sleep statements still execute in parallel.  
+        dlib::sleep(1000);
+
+        // Lock the m mutex.  The auto_mutex will automatically unlock at the closing }.
+        // This will ensure only one thread can execute the sum += vect[i] statement at
+        // a time.
+        auto_mutex lock(m);
+        sum += vect[i];
+    });
+
+    cout << "sum: "<< sum << endl;
+
+#endif
+}
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+//    The rest of this example program shows how to create parallel for loops without
+//    using lambda functions.  So the first thing we do is explicitly create function
+//    objects equivalent to the lambda functions we used.  Then we call parallel_for() 
+//    as done above.
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+struct function_object
+{
+    function_object( std::vector<int>& vect ) : vect(vect) {}
+
+    std::vector<int>& vect;
+
+    void operator() (long i) const
+    {
+        vect[i] = i;
+        dlib::sleep(1000); 
+    }
+};
+
+struct function_object_sum
+{
+    function_object_sum( const std::vector<int>& vect, int& sum_ ) : vect(vect), sum(sum_) {}
+
+    const std::vector<int>& vect;
+    int& sum;
+    mutex m;
+
+    void operator() (long i) const
+    {
+        dlib::sleep(1000); 
+        auto_mutex lock(m);
+        sum += vect[i];
+    }
+};
+
+void example_without_using_lambda_functions()
+{
+    // Again, note that this function does exactly the same thing as
+    // example_using_regular_non_parallel_loops() and example_using_lambda_functions().
+
+    cout << "\nExample using parallel for loops and no lambda functions\n" << endl;
+
+    const unsigned long num_threads = 10;
+    std::vector<int> vect;
+
+
+    vect.assign(10, -1); 
+    parallel_for(num_threads, 0, vect.size(), function_object(vect));
+    print(vect);
+
+
+    vect.assign(10, -1);
+    parallel_for(num_threads, 1, 5, function_object(vect));
+    print(vect);
+
+
+    int sum = 0;
+    vect.assign(10, 2);
+    parallel_for(num_threads, 0, vect.size(), function_object_sum(vect, sum));
+    cout << "sum: " << sum << endl;
+}
+
+// ----------------------------------------------------------------------------------------