From ed22f0400aa3b8b3a3cea9d26367166660c898ed Mon Sep 17 00:00:00 2001 From: Davis King Date: Wed, 2 Sep 2020 21:48:30 -0400 Subject: [PATCH] Make dnn_trainer use robust statistic to determine if the loss is exploding and if it should backtrack. Previously we used only the non-robust version, and so would mistakenly not catch sequenes of loss increase that begin with an extremely large value and then settled down to still large but less extreme values. --- dlib/dnn/trainer.h | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/dlib/dnn/trainer.h b/dlib/dnn/trainer.h index 4ca3025f6..3243ae347 100644 --- a/dlib/dnn/trainer.h +++ b/dlib/dnn/trainer.h @@ -1108,29 +1108,21 @@ namespace dlib while (previous_loss_values_to_keep_until_disk_sync.size() > 2 * gradient_updates_since_last_sync) previous_loss_values_to_keep_until_disk_sync.pop_front(); - running_gradient g; - + // Always retry if there are any nan values for (auto x : previous_loss_values_to_keep_until_disk_sync) { - // If we get a NaN value of loss assume things have gone horribly wrong and - // we should reload the state of the trainer. if (std::isnan(x)) return true; - - g.add(x); } // if we haven't seen much data yet then just say false. if (gradient_updates_since_last_sync < 30) return false; - // if learning rate was changed from outside during training, for example - if (g.current_n() <= 2) - return false; - // if the loss is very likely to be increasing then return true - const double prob = g.probability_gradient_greater_than(0); - if (prob > prob_loss_increasing_thresh) + const double prob1 = probability_values_are_increasing(previous_loss_values_to_keep_until_disk_sync); + const double prob2 = probability_values_are_increasing_robust(previous_loss_values_to_keep_until_disk_sync); + if (std::max(prob1, prob2) > prob_loss_increasing_thresh) { // Exponentially decay the threshold towards 1 so that if we keep finding // the loss to be increasing over and over we will make the test