From f9d3da11d05c94bccbc59fe9dc3bdf56c79653b2 Mon Sep 17 00:00:00 2001
From: Davis King <davis@dlib.net>
Date: Sat, 22 Feb 2014 15:52:28 -0500
Subject: [PATCH] Fleshed out example program.

---
 examples/learning_to_track_ex.cpp | 227 ++++++++++++++++++++++++++----
 1 file changed, 196 insertions(+), 31 deletions(-)

diff --git a/examples/learning_to_track_ex.cpp b/examples/learning_to_track_ex.cpp
index 41933039a..2d707fedf 100644
--- a/examples/learning_to_track_ex.cpp
+++ b/examples/learning_to_track_ex.cpp
@@ -1,6 +1,19 @@
 // The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
 /*
+    This example shows how you can use the dlib machine learning tools to make
+    an object tracker.  Depending on your tracking application there can be a
+    lot of components to a tracker.  However, a central element of many trackers
+    is the "detection to track" association step and this is the part of the
+    tracker we discuss in this example.  Therefore, in the code below we define
+    simple detection and track structures and then go through the steps needed
+    to learn, using training data, how to best associate detections to tracks.  
 
+    It should be noted that these tools are implemented essentially as wrappers
+    around the more general assignment learning tools present in dlib.  So if
+    you want to get an idea of how they work under the covers you should read
+    the assignment_learning_ex.cpp example program and its supporting
+    documentation.  However, to just use the learning-to-track tools you won't
+    need to understand these implementation details.
 */
 
 
@@ -13,21 +26,49 @@ using namespace dlib;
 
 // ----------------------------------------------------------------------------------------
 
-class detection
+struct detection 
 {
-
-public:
+    /*
+        When you use these tools you need to define two structures.  One represents a
+        detection and another a track.  In this example we call these structures detection
+        and track but you can name them however you like.  Moreover, You can put anything
+        you want in your detection structure.  The only requirement is that detection be
+        copyable and contain a public typedef named track_type that tells us the track type
+        meant for use with this detection object.
+    */
     typedef class track track_type;
 
+    
+    
+    // Again, note that this field is NOT REQUIRED by the dlib tools.  You can put whatever
+    // you want in your detection object.  Here we are including a column vector of
+    // measurements from the sensor that generated the detection.  In this example we don't
+    // have a real sensor so we will simulate a very basic one using a random number
+    // generator.   But the idea is that you should be able to use the contents of your
+    // detection to somehow tell which track it goes with.  So these numbers should contain
+    // some identifying information about the real world object that caused this detection.  
     matrix<double,0,1> measurements;
 };
 
-class track
-{
 
-public:
+struct track
+{
+    /*
+        Here we define our corresponding track object.  This object has more requirements
+        than the detection.  In particular, the dlib machine learning tools require it to
+        have the following elements:
+            - A typedef named feature_vector_type
+            - It should be copyable and default constructable
+            - The three functions: get_similarity_features(), update_track(), and propagate_track()
+    
+        Just like the detection object, you can also add any additional fields you like.
+        In this example we keep it simple and say that a track maintains only a copy of the
+        most recent sensor measurements it has seen and also a number telling us how long
+        it has been since the track was updated with a detection.
+    */
+
     // This type should be a dlib::matrix capable of storing column vectors or an
-    // unsorted sparse vector type as defined in dlib/svm/sparse_vector_abstract.h.
+    // unsorted sparse vector type such as std::vector<std::pair<unsigned long,double>>.
     typedef matrix<double,0,1> feature_vector_type;
 
     track()
@@ -35,25 +76,56 @@ public:
         time_since_last_association = 0;
     }
 
-    void get_similarity_features (
-        const detection& det,
-        feature_vector_type& feats
-    ) const
+    void get_similarity_features(const detection& det, feature_vector_type& feats) const
     {
+        /*
+            The get_similarity_features() function takes a detection and outputs a feature
+            vector that tells the machine learning tools how "similar" the detection is to
+            the track.  The idea here is to output a set of numbers (i.e. the contents of
+            feats) that can be used to decide if det should be associated with this track.
+            In this example we output the difference between the last sensor measurements
+            for this track and the detection's measurements.  This works since we expect
+            the sensor measurements to be relatively constant for each track because that's
+            how our simple sensor simulator in this example works.  However, in a real
+            world application it's likely to be much more complex.  But here we keep things
+            simple.
+
+            It should also be noted that get_similarity_features() must always output
+            feature vectors with the same number of dimensions.  Finally, the machine
+            learning tools are going to learn a linear function of feats and use that to
+            predict if det should associate to this track.  So try and define features that
+            you think would work in a linear function.  There are all kinds of ways to do
+            this.  If you want to get really clever about it you can even use kernel
+            methods like the empirical_kernel_map (see empirical_kernel_map_ex.cpp).  I
+            would start out with something simple first though.
+        */
         feats = abs(last_measurements - det.measurements);
     }
 
-    void update_track (
-        const detection& det
-    )
+    void update_track(const detection& det)
     {
+        /*
+            This function is called when the dlib tools have decided that det should be
+            associated with this track.  So the point of update_track() is to, as the name
+            suggests, update the track with the given detection.  In general, you can do
+            whatever you want in this function.  Here we simply record the last measurement
+            state and reset the time since last association.
+        */
         last_measurements = det.measurements;
         time_since_last_association = 0;
     }
 
-    void propagate_track (
-    )
+    void propagate_track()
     {
+        /*
+            This function is called when the dlib tools have decided, for the current time
+            step, that none of the available detections associate with this track.  So the
+            point of this function is to perform a track update without a detection.  To
+            say that another way.  Every time you ask the dlib tools to perform detection
+            to track association they will update each track by calling either
+            update_track() or propagate_track().  Which function they call depends on
+            whether or not a detection was associated to the track.
+        */
         ++time_since_last_association;
     }
 
@@ -63,8 +135,17 @@ public:
 
 // ----------------------------------------------------------------------------------------
 
-typedef std::vector<labeled_detection<detection> > detections_at_single_time_step;
-typedef std::vector<detections_at_single_time_step> track_history;
+/*
+    Now that we have defined our detection and track structures we are going to define our
+    sensor simulator.  In it we will imagine that there are num_objects things in the world
+    and those things generate detections from our sensor.  Moreover, each detection from
+    the sensor comes with a measurement vector with num_properties elements.  
+
+    So the first function, initialize_object_properties(), just randomly generates
+    num_objects and saves them in a global variable.  Then when we are generating
+    detections we will output copies of these objects that have been corrupted by a little
+    bit of random noise.
+*/
 
 dlib::rand rnd;
 const long num_objects = 4;
@@ -77,36 +158,68 @@ void initialize_object_properties()
         object_properties[i] = randm(num_properties,1,rnd);
 }
 
+// So here is our function that samples a detection from our simulated sensor.  You tell it
+// what object you want to sample a detection from and it returns a detection from that
+// object.
+detection sample_detection_from_sensor(unsigned long object_id)
+{
+    DLIB_CASSERT(object_id < num_objects, 
+        "You can't ask to sample a detection from an object that doesn't exist."); 
+    detection temp;
+    // Set the measurements equal to the object's true property values plus a little bit of
+    // noise.
+    temp.measurements = object_properties[object_id] + randm(num_properties,1,rnd)*0.1;
+    return temp;
+}
+
 // ----------------------------------------------------------------------------------------
 
+typedef std::vector<labeled_detection<detection> > detections_at_single_time_step;
+typedef std::vector<detections_at_single_time_step> track_history;
+
 track_history make_random_tracking_data_for_training()
 {
+    /*
+        Since we are using machine learning we need some training data.  This function
+        samples data from our sensor and creates labeled track histories.  In these track
+        histories, each detection is labeled with its true track ID.  The goal of the
+        machine learning tools will then be to learn to associate all the detections with
+        the same ID to the same track object.
+    */
+
     track_history data;
 
-
+    // At each time step we get a set of detections from the objects in the world.
+    // Simulate 100 time steps worth of data where there are 3 objects present. 
     const int num_time_steps = 100;
     for (int i = 0; i < num_time_steps; ++i)
     {
         detections_at_single_time_step dets(3);
-        dets[0].det.measurements = object_properties[0] + randm(num_properties,1,rnd)*0.1;
+        // sample a detection from object 0
+        dets[0].det = sample_detection_from_sensor(0);
         dets[0].label = 0;
 
-        dets[1].det.measurements = object_properties[1] + randm(num_properties,1,rnd)*0.1;
+        // sample a detection from object 1
+        dets[1].det = sample_detection_from_sensor(1);
         dets[1].label = 1;
 
-        dets[2].det.measurements = object_properties[2] + randm(num_properties,1,rnd)*0.1;
+        // sample a detection from object 2
+        dets[2].det = sample_detection_from_sensor(2);
         dets[2].label = 2;
 
         data.push_back(dets);
     }
 
+    // Now let's imagine object 1 and 2 are gone but a new object, object 3 has arrived.  
     for (int i = 0; i < num_time_steps; ++i)
     {
         detections_at_single_time_step dets(2);
-        dets[0].det.measurements = object_properties[0] + randm(num_properties,1,rnd)*0.1;
+        // sample a detection from object 0
+        dets[0].det = sample_detection_from_sensor(0);
         dets[0].label = 0;
 
-        dets[1].det.measurements = object_properties[3] + randm(num_properties,1,rnd)*0.1;
+        // sample a detection from object 3
+        dets[1].det = sample_detection_from_sensor(3);
         dets[1].label = 3;
 
         data.push_back(dets);
@@ -119,10 +232,17 @@ track_history make_random_tracking_data_for_training()
 
 std::vector<detection> make_random_detections(unsigned long num_dets)
 {
+    /*
+        Finally, when we test the tracker we learned we will need to sample regular old
+        unlabeled detections.  This function helps us do that.
+    */
+    DLIB_CASSERT(num_dets <= num_objects, 
+        "You can't ask for more detections than there are objects in our little simulation."); 
+
     std::vector<detection> dets(num_dets);
     for (unsigned long i = 0; i < dets.size(); ++i)
     {
-        dets[i].measurements = object_properties[i] + randm(num_properties,1,rnd)*0.1;
+        dets[i] = sample_detection_from_sensor(i);
     }
     return dets;
 }
@@ -134,6 +254,10 @@ int main()
     initialize_object_properties();
 
 
+    // Get some training data.  Here we sample 5 independent track histories.  In a real
+    // world problem you would get this kind of data by, for example, collecting data from
+    // your sensor on 5 separate days where you did an independent collection each day.
+    // You can train a model with just one track history but the more you have the better.
     std::vector<track_history> data;
     data.push_back(make_random_tracking_data_for_training());
     data.push_back(make_random_tracking_data_for_training());
@@ -143,18 +267,48 @@ int main()
 
 
     structural_track_association_trainer trainer;
-    trainer.be_verbose();
+    // Note that the machine learning tools have a parameter.  This is the usual SVM C
+    // parameter that controls the trade-off between trying to fit the training data or
+    // producing a "simpler" solution.  You need to try a few different values of this
+    // parameter to find out what setting works best for your problem (try values in the
+    // range 0.001 to 1000000).
     trainer.set_c(100);
-
+    // Now do the training.
     track_association_function<detection> assoc = trainer.train(data);
 
-    cout << "accuracy on training data: "<< test_track_association_function(assoc, data) << endl;
-    cout << "cross validation: "<< cross_validate_track_association_trainer(trainer, data, 5) << endl;
+    // We can test the accuracy of the learned association function on some track history
+    // data.  Here we test it on the data we trained on.  It outputs a single number that
+    // measures the fraction of detections which were correctly associated to their tracks.
+    // So a value of 1 indicates perfect tracking and a value of 0 indicates totally wrong
+    // tracking.
+    cout << "Association accuracy on training data: "<< test_track_association_function(assoc, data) << endl;
+    // It's very important to test the output of a machine learning method on data it
+    // wasn't trained on.  You can do that by calling test_track_association_function() on
+    // held out data.  You can also use cross-validation like so:
+    cout << "Association accuracy from 5-fold CV:   "<< cross_validate_track_association_trainer(trainer, data, 5) << endl;
+    // Unsurprisingly, the testing functions show that the assoc function we learned
+    // perfectly associates all detections to tracks in this easy data.
 
-    std::vector<detection> dets;
+
+
+
+    // OK.  So how do you use this assoc thing?  Let's use it to do some tracking!
+
+    // tracks contains all our current tracks.  Initially it is empty.
     std::vector<track> tracks;
-
     cout << "number of tracks: "<< tracks.size() << endl;
+
+    // Sample detections from 3 objects.
+    std::vector<detection> dets = make_random_detections(3);
+    // Calling assoc(), the function we just learned, performs the detection to track
+    // association.  It will also call each track's update_track() function with the
+    // associated detection.  For tracks that don't get a detection, it calls
+    // propagate_track(). 
+    assoc(tracks, dets);
+    // Now there are 3 things in tracks.
+    cout << "number of tracks: "<< tracks.size() << endl;
+
+    // Run the tracker for a few more time steps...
     dets = make_random_detections(3);
     assoc(tracks, dets);
     cout << "number of tracks: "<< tracks.size() << endl;
@@ -163,10 +317,15 @@ int main()
     assoc(tracks, dets);
     cout << "number of tracks: "<< tracks.size() << endl;
 
+    // Now another object has appeared!  There are 4 objects now.
     dets = make_random_detections(4);
     assoc(tracks, dets);
+    // Now there are 4 tracks instead of 3!
     cout << "number of tracks: "<< tracks.size() << endl;
 
+    // That 4th object just vanished.  Let's look at the time_since_last_association values
+    // for each track.  We will see that one of the tracks isn't getting updated with
+    // detections anymore since the object it corresponds to is no longer present.
     dets = make_random_detections(3);
     assoc(tracks, dets);
     cout << "number of tracks: "<< tracks.size() << endl;
@@ -180,10 +339,16 @@ int main()
         cout << "   time since last association: "<< tracks[i].time_since_last_association << endl;
 
 
+
+
+
+
+    // Finally, you can save your track_association_function to disk like so:
     ofstream fout("track_assoc.svm", ios::binary);
     serialize(assoc, fout);
     fout.close();
 
+    // And recall it from disk later like so:
     ifstream fin("track_assoc.svm", ios::binary);
     deserialize(assoc, fin);
 }