Keep estimating steps_without_progress, even if steps_since_last_learning_rate_shrink < iter_without_progress_thresh

This commit is contained in:
Juha Reunanen 2019-11-12 14:37:43 +02:00
parent 3746b3c1d3
commit 9191ebc776

View File

@ -697,10 +697,10 @@ namespace dlib
// Check if we should shrink the learning rate based on how the test // Check if we should shrink the learning rate based on how the test
// error has been doing lately. // error has been doing lately.
if (learning_rate_shrink != 1 && steps_since_last_learning_rate_shrink > iter_without_progress_thresh) if (learning_rate_shrink != 1)
{ {
test_steps_without_progress = count_steps_without_decrease(test_previous_loss_values); test_steps_without_progress = count_steps_without_decrease(test_previous_loss_values);
if (test_steps_without_progress >= test_iter_without_progress_thresh) if (test_steps_without_progress >= test_iter_without_progress_thresh && steps_since_last_learning_rate_shrink >= test_iter_without_progress_thresh)
{ {
test_steps_without_progress = count_steps_without_decrease_robust(test_previous_loss_values); test_steps_without_progress = count_steps_without_decrease_robust(test_previous_loss_values);
if (test_steps_without_progress >= test_iter_without_progress_thresh) if (test_steps_without_progress >= test_iter_without_progress_thresh)
@ -809,13 +809,11 @@ namespace dlib
// have a "budget" that prevents us from calling // have a "budget" that prevents us from calling
// count_steps_without_decrease() every iteration. We do this because // count_steps_without_decrease() every iteration. We do this because
// it can be expensive to compute when previous_loss_values is large. // it can be expensive to compute when previous_loss_values is large.
if (gradient_check_budget > iter_without_progress_thresh if (gradient_check_budget > iter_without_progress_thresh && learning_rate_shrink != 1)
&& learning_rate_shrink != 1
&& steps_since_last_learning_rate_shrink > iter_without_progress_thresh)
{ {
gradient_check_budget = 0; gradient_check_budget = 0;
steps_without_progress = count_steps_without_decrease(previous_loss_values); steps_without_progress = count_steps_without_decrease(previous_loss_values);
if (steps_without_progress >= iter_without_progress_thresh) if (steps_without_progress >= iter_without_progress_thresh && steps_since_last_learning_rate_shrink >= iter_without_progress_thresh)
{ {
// Double check that we aren't seeing decrease. This second check // Double check that we aren't seeing decrease. This second check
// discards the top 10% largest values and checks again. We do // discards the top 10% largest values and checks again. We do