mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Make dnn_trainer use robust statistic to determine if the loss is exploding and if it should backtrack.
Previously we used only the non-robust version, and so would mistakenly not catch sequenes of loss increase that begin with an extremely large value and then settled down to still large but less extreme values.
This commit is contained in:
parent
0bb6ce36d8
commit
ed22f0400a
@ -1108,29 +1108,21 @@ namespace dlib
|
||||
while (previous_loss_values_to_keep_until_disk_sync.size() > 2 * gradient_updates_since_last_sync)
|
||||
previous_loss_values_to_keep_until_disk_sync.pop_front();
|
||||
|
||||
running_gradient g;
|
||||
|
||||
// Always retry if there are any nan values
|
||||
for (auto x : previous_loss_values_to_keep_until_disk_sync)
|
||||
{
|
||||
// If we get a NaN value of loss assume things have gone horribly wrong and
|
||||
// we should reload the state of the trainer.
|
||||
if (std::isnan(x))
|
||||
return true;
|
||||
|
||||
g.add(x);
|
||||
}
|
||||
|
||||
// if we haven't seen much data yet then just say false.
|
||||
if (gradient_updates_since_last_sync < 30)
|
||||
return false;
|
||||
|
||||
// if learning rate was changed from outside during training, for example
|
||||
if (g.current_n() <= 2)
|
||||
return false;
|
||||
|
||||
// if the loss is very likely to be increasing then return true
|
||||
const double prob = g.probability_gradient_greater_than(0);
|
||||
if (prob > prob_loss_increasing_thresh)
|
||||
const double prob1 = probability_values_are_increasing(previous_loss_values_to_keep_until_disk_sync);
|
||||
const double prob2 = probability_values_are_increasing_robust(previous_loss_values_to_keep_until_disk_sync);
|
||||
if (std::max(prob1, prob2) > prob_loss_increasing_thresh)
|
||||
{
|
||||
// Exponentially decay the threshold towards 1 so that if we keep finding
|
||||
// the loss to be increasing over and over we will make the test
|
||||
|
Loading…
Reference in New Issue
Block a user