Made the DNN layer visiting routines more convenient.

Now the user doesn't have to supply a visitor capable of visiting all
layers, but instead just the ones they are interested in.  Also added
visit_computational_layers() and visit_computational_layers_range()
since those capture a very common use case more concisely than
visit_layers().  That is, users generally want to mess with the
computational layers specifically as those are the stateful layers.
This commit is contained in:
Davis King 2020-09-05 18:33:04 -04:00
parent 7dcc7b4ebc
commit afe19fcb8b
5 changed files with 236 additions and 164 deletions

View File

@ -3471,140 +3471,6 @@ namespace dlib
return impl_test_layer(l, 0.01);
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <size_t i, size_t num>
struct vlp_loop
{
template <typename T, typename U>
static typename std::enable_if<!is_add_layer<U>::value>::type invoke_functor(T&& , size_t& , U&& )
{
// intentionally left empty
}
template <typename T, typename U>
static typename std::enable_if<is_add_layer<U>::value>::type invoke_functor(T&& v , size_t& comp_i, U&& l )
{
v(comp_i, l.layer_details().get_layer_params());
++comp_i;
}
template <
typename net_type,
typename visitor
>
static void visit(
size_t comp_i,
net_type& net,
visitor&& v
)
{
invoke_functor(v, comp_i, layer<i>(net));
vlp_loop<i+1, num>::visit(comp_i, net,v);
}
};
template <size_t num>
struct vlp_loop<num,num>
{
template <
typename net_type,
typename visitor
>
static void visit(
size_t,
net_type&,
visitor&&
)
{
// Base case of recursion. Don't do anything.
}
};
}
template <
typename net_type,
typename visitor
>
void visit_layer_parameters(
net_type& net,
visitor v
)
{
size_t comp_i = 0;
impl::vlp_loop<0, net_type::num_layers>::visit(comp_i, net, v);
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <size_t i, size_t num>
struct vlpg_loop
{
template <typename T, typename U>
static typename std::enable_if<!is_add_layer<U>::value>::type invoke_functor(T&& , size_t& , U&& )
{
// intentionally left empty
}
template <typename T, typename U>
static typename std::enable_if<is_add_layer<U>::value>::type invoke_functor(T&& v , size_t& comp_i, U&& l )
{
v(comp_i, l.get_parameter_gradient());
++comp_i;
}
template <
typename net_type,
typename visitor
>
static void visit(
size_t comp_i,
net_type& net,
visitor&& v
)
{
invoke_functor(v, comp_i, layer<i>(net));
vlpg_loop<i+1, num>::visit(comp_i, net,v);
}
};
template <size_t num>
struct vlpg_loop<num,num>
{
template <
typename net_type,
typename visitor
>
static void visit(
size_t,
net_type&,
visitor&&
)
{
// Base case of recursion. Don't do anything.
}
};
}
template <
typename net_type,
typename visitor
>
void visit_layer_parameter_gradients(
net_type& net,
visitor v
)
{
size_t comp_i = 0;
impl::vlpg_loop<0, net_type::num_layers>::visit(comp_i, net, v);
}
// ----------------------------------------------------------------------------------------
namespace impl
@ -3621,7 +3487,9 @@ namespace dlib
visitor&& v
)
{
v(i, layer<i>(net));
// Call whatever version of the visitor the user provided.
call_if_valid(v, i, layer<i>(net));
call_if_valid(v, layer<i>(net));
vl_loop<i+1, num>::visit(net,v);
}
};
@ -3655,7 +3523,9 @@ namespace dlib
)
{
vl_loop_backwards<i+1, num>::visit(net,v);
v(i, layer<i>(net));
// Call whatever version of the visitor the user provided.
call_if_valid(v, i, layer<i>(net));
call_if_valid(v, layer<i>(net));
}
};
@ -3751,7 +3621,7 @@ namespace dlib
visitor&& v
)
{
v(next_net);
call_if_valid(v, next_net);
vl_until_tag<i+1,tag_id>::visit(net,layer<i+1>(net),v);
}
@ -3766,7 +3636,7 @@ namespace dlib
visitor&& v
)
{
v(next_net);
call_if_valid(v, next_net);
}
template <
@ -3780,7 +3650,7 @@ namespace dlib
visitor&& v
)
{
v(next_net);
call_if_valid(v, next_net);
}
};
}
@ -3798,6 +3668,137 @@ namespace dlib
impl::vl_until_tag<0,tag_id>::visit(net, net, v);
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <
typename visitor
>
class visitor_computational_layer
{
public:
explicit visitor_computational_layer(visitor& v) : v_(v) {}
template <typename T, typename U, typename E>
void operator()(size_t idx, add_layer<T,U,E>& l) const
{
// Call whatever version of the visitor the user provided.
call_if_valid(v_, idx, l.layer_details());
call_if_valid(v_, l.layer_details());
}
private:
visitor& v_;
};
}
template <
typename net_type,
typename visitor
>
void visit_computational_layers(
net_type& net,
visitor v
)
{
visit_layers(net, impl::visitor_computational_layer<visitor>(v));
}
template <
size_t begin,
size_t end,
typename net_type,
typename visitor
>
void visit_computational_layers_range(
net_type& net,
visitor v
)
{
visit_layers_range<begin,end>(net, impl::visitor_computational_layer<visitor>(v));
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <
typename visitor
>
class visit_layer_parameters
{
public:
explicit visit_layer_parameters(visitor& v) : v_(v) {}
template <typename layer>
void operator()(layer& l)
{
// Call whatever version of the visitor the user provided.
const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_layer_params()) ||
call_if_valid(v_, l.get_layer_params());
DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameters()");
++computational_layer_idx;
}
private:
size_t computational_layer_idx = 0;
visitor& v_;
};
}
template <
typename net_type,
typename visitor
>
void visit_layer_parameters(
net_type& net,
visitor v
)
{
visit_computational_layers(net, impl::visit_layer_parameters<visitor>(v));
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <
typename visitor
>
class visit_layer_parameter_gradients
{
public:
explicit visit_layer_parameter_gradients(visitor& v) : v_(v) {}
template <typename T, typename U, typename E>
void operator()(add_layer<T,U,E>& l)
{
// Call whatever version of the visitor the user provided.
const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_parameter_gradient()) ||
call_if_valid(v_, l.get_parameter_gradient());
DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameter_gradients()");
++computational_layer_idx;
}
private:
size_t computational_layer_idx = 0;
visitor& v_;
};
}
template <
typename net_type,
typename visitor
>
void visit_layer_parameter_gradients(
net_type& net,
visitor v
)
{
visit_layers(net, impl::visit_layer_parameter_gradients<visitor>(v));
}
// ----------------------------------------------------------------------------------------
}

View File

@ -1676,6 +1676,8 @@ namespace dlib
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, tensor& t)
or:
v(tensor& t)
ensures
- Loops over all the computational layers (i.e. layers with parameters, as
opposed to loss, tag, or input layers) in net and passes their parameters to
@ -1709,6 +1711,8 @@ namespace dlib
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, tensor& t)
or:
v(tensor& t)
ensures
- Loops over all the computational layers (i.e. layers with parameters, as
opposed to loss, tag, or input layers) in net and passes their parameter
@ -1743,7 +1747,9 @@ namespace dlib
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, any_net_type& t)
That is, it must take a size_t and then any of the network types such as
or:
v(any_net_type& t)
That is, it takes an optional size_t and then any of the network types such as
add_layer, add_loss_layer, etc.
ensures
- Loops over all the layers in net and calls v() on them. To be specific, this
@ -1767,7 +1773,9 @@ namespace dlib
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, any_net_type& t)
That is, it must take a size_t and then any of the network types such as
or:
v(any_net_type& t)
That is, it takes an optional size_t and then any of the network types such as
add_layer, add_loss_layer, etc.
ensures
- Loops over all the layers in net and calls v() on them. The loop happens in
@ -1778,6 +1786,64 @@ namespace dlib
v(i-1, layer<i-1>(net));
!*/
// ----------------------------------------------------------------------------------------
template <
typename net_type,
typename visitor
>
void visit_computational_layers(
net_type& net,
visitor v
);
/*!
requires
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, any_computational_layer& t)
or:
v(any_computational_layer& t)
That is, it takes an optional size_t and then any of the computational layers. E.g.
one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_.
ensures
- Loops over all the computational layers in net and calls v() on them. To be specific, this
function essentially performs the following:
for (size_t i = 0; i < net_type::num_layers; ++i)
if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer)
v(i, layer<i>(net).layer_details());
!*/
template <
size_t begin,
size_t end,
typename net_type,
typename visitor
>
void visit_computational_layers_range(
net_type& net,
visitor v
);
/*!
requires
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, any_computational_layer& t)
or:
v(any_computational_layer& t)
That is, it takes an optional size_t and then any of the computational layers. E.g.
one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_.
ensures
- Loops over all the computational layers in the range [begin,end) in net and calls v()
on them. To be specific, this function essentially performs the following:
for (size_t i = begin; i < end; ++i)
if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer)
v(i, layer<i>(net).layer_details());
!*/
// ----------------------------------------------------------------------------------------
template <
@ -1796,13 +1862,14 @@ namespace dlib
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, any_net_type& t)
That is, it must take a size_t and then any of the network types such as
or:
v(any_net_type& t)
That is, it takes an optional size_t and then any of the network types such as
add_layer, add_loss_layer, etc.
- begin <= end <= net_type::num_layers
ensures
- Loops over the layers in the range [begin,end) in net and calls v() on them.
The loop happens in the reverse order of visit_layers(). To be specific,
this function essentially performs the following:
To be specific, this function essentially performs the following:
for (size_t i = begin; i < end; ++i)
v(i, layer<i>(net));
@ -1824,7 +1891,9 @@ namespace dlib
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, any_net_type& t)
That is, it must take a size_t and then any of the network types such as
or:
v(any_net_type& t)
That is, it takes an optional size_t and then any of the network types such as
add_layer, add_loss_layer, etc.
- begin <= end <= net_type::num_layers
ensures

View File

@ -667,7 +667,7 @@ namespace dlib
// periodically copy these tensors to all the other devices to make sure the
// different GPUs don't go out of sync.
std::vector<tensor*> reference_params;
visit_layer_parameters(devices[0]->net, [&](size_t, tensor& t) { reference_params.push_back(&t); });
visit_layer_parameters(devices[0]->net, [&](tensor& t) { reference_params.push_back(&t); });
// If no external thread pools vector was passed, then create one that will
// be automatically destructed as soon as the dnn_trainer object goes out of

View File

@ -1995,14 +1995,14 @@ namespace
pres<res<res<res_down< // 2 prelu layers here
tag4<repeat<9,pres, // 9 groups, each containing 2 prelu layers
res_down<
res<
leaky_relu<res<
input<matrix<unsigned char>>
>>>>>>>>>>>;
>>>>>>>>>>>>;
net_type2 pnet;
DLIB_TEST_MSG(pnet.num_layers == 131, pnet.num_layers);
DLIB_TEST_MSG(pnet.num_computational_layers == 109, pnet.num_computational_layers);
DLIB_TEST_MSG(pnet.num_layers == 132, pnet.num_layers);
DLIB_TEST_MSG(pnet.num_computational_layers == 110, pnet.num_computational_layers);
std::vector<bool> hit(pnet.num_computational_layers, false);
size_t count = 0;
@ -2017,6 +2017,14 @@ namespace
for (auto x : hit2)
DLIB_TEST(x);
DLIB_TEST(count == pnet.num_computational_layers);
int num_relus = 0;
visit_computational_layers(pnet, [&num_relus](relu_&) { ++num_relus; });
DLIB_TEST(num_relus == 10);
DLIB_TEST(layer<leaky_relu>(pnet).layer_details().get_alpha() == 0.01f);
visit_computational_layers(pnet, [](leaky_relu_& l) { l = leaky_relu_(0.001f); });
DLIB_TEST(layer<leaky_relu>(pnet).layer_details().get_alpha() == 0.001f);
}
float tensor_read_cpu(const tensor& t, long i, long k, long r, long c)

View File

@ -47,16 +47,10 @@ public:
visitor_weight_decay_multiplier(double new_weight_decay_multiplier_) :
new_weight_decay_multiplier(new_weight_decay_multiplier_) {}
template<typename input_layer_type>
void operator()(size_t , input_layer_type& ) const
template <typename layer>
void operator()(layer& l) const
{
// ignore other layers
}
template <typename T, typename U, typename E>
void operator()(size_t , add_layer<T,U,E>& l) const
{
set_weight_decay_multiplier(l.layer_details(), new_weight_decay_multiplier);
set_weight_decay_multiplier(l, new_weight_decay_multiplier);
}
private:
@ -98,7 +92,7 @@ int main() try
// We can use the visit_layers function to modify the weight decay of the entire
// network:
visit_layers(net, visitor_weight_decay_multiplier(0.001));
visit_computational_layers(net, visitor_weight_decay_multiplier(0.001));
// We can also use predefined visitors to affect the learning rate of the whole
// network.
@ -109,14 +103,14 @@ int main() try
// visitor that is very similar to the one defined in this example.
// Usually, we want to freeze the network, except for the top layers:
visit_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0));
visit_computational_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0));
set_all_learning_rate_multipliers(net.subnet().subnet(), 0);
// Alternatively, we can use the visit_layers_range to modify only a specific set of
// layers:
visit_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1));
visit_computational_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1));
// Sometimes we might want to set the learning rate differently thoughout the network.
// Sometimes we might want to set the learning rate differently throughout the network.
// Here we show how to use adjust the learning rate at the different ResNet50's
// convolutional blocks:
set_learning_rate_multipliers_range< 0, 2>(net, 1);
@ -143,7 +137,7 @@ int main() try
// We can also print the number of parameters of the network:
cout << "number of network parameters: " << count_parameters(net) << endl;
// From this point on, we can finetune the new network using this pretrained backbone
// From this point on, we can fine-tune the new network using this pretrained backbone
// on another task, such as the one showed in dnn_metric_learning_on_images_ex.cpp.
return EXIT_SUCCESS;