From ed22f0400aa3b8b3a3cea9d26367166660c898ed Mon Sep 17 00:00:00 2001
From: Davis King <davis@dlib.net>
Date: Wed, 2 Sep 2020 21:48:30 -0400
Subject: [PATCH] Make dnn_trainer use robust statistic to determine if the
 loss is exploding and if it should backtrack.

Previously we used only the non-robust version, and so would mistakenly
not catch sequenes of loss increase that begin with an extremely large
value and then settled down to still large but less extreme values.
---
 dlib/dnn/trainer.h | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/dlib/dnn/trainer.h b/dlib/dnn/trainer.h
index 4ca3025f6..3243ae347 100644
--- a/dlib/dnn/trainer.h
+++ b/dlib/dnn/trainer.h
@@ -1108,29 +1108,21 @@ namespace dlib
             while (previous_loss_values_to_keep_until_disk_sync.size() > 2 * gradient_updates_since_last_sync)
                 previous_loss_values_to_keep_until_disk_sync.pop_front();
 
-            running_gradient g;
-
+            // Always retry if there are any nan values
             for (auto x : previous_loss_values_to_keep_until_disk_sync)
             {
-                // If we get a NaN value of loss assume things have gone horribly wrong and
-                // we should reload the state of the trainer.
                 if (std::isnan(x))
                     return true;
-
-                g.add(x);
             }
 
             // if we haven't seen much data yet then just say false.
             if (gradient_updates_since_last_sync < 30)
                 return false;
 
-            // if learning rate was changed from outside during training, for example
-            if (g.current_n() <= 2)
-                return false;
-
             // if the loss is very likely to be increasing then return true
-            const double prob = g.probability_gradient_greater_than(0);
-            if (prob > prob_loss_increasing_thresh)
+            const double prob1 = probability_values_are_increasing(previous_loss_values_to_keep_until_disk_sync);
+            const double prob2 = probability_values_are_increasing_robust(previous_loss_values_to_keep_until_disk_sync);
+            if (std::max(prob1, prob2) > prob_loss_increasing_thresh)
             {
                 // Exponentially decay the threshold towards 1 so that if we keep finding
                 // the loss to be increasing over and over we will make the test