Made the DNN layer visiting routines more convenient.

Now the user doesn't have to supply a visitor capable of visiting all
layers, but instead just the ones they are interested in.  Also added
visit_computational_layers() and visit_computational_layers_range()
since those capture a very common use case more concisely than
visit_layers().  That is, users generally want to mess with the
computational layers specifically as those are the stateful layers.
pull/2162/head
Davis King 4 years ago
parent 7dcc7b4ebc
commit afe19fcb8b

@ -3471,140 +3471,6 @@ namespace dlib
return impl_test_layer(l, 0.01); return impl_test_layer(l, 0.01);
} }
// ----------------------------------------------------------------------------------------
namespace impl
{
template <size_t i, size_t num>
struct vlp_loop
{
template <typename T, typename U>
static typename std::enable_if<!is_add_layer<U>::value>::type invoke_functor(T&& , size_t& , U&& )
{
// intentionally left empty
}
template <typename T, typename U>
static typename std::enable_if<is_add_layer<U>::value>::type invoke_functor(T&& v , size_t& comp_i, U&& l )
{
v(comp_i, l.layer_details().get_layer_params());
++comp_i;
}
template <
typename net_type,
typename visitor
>
static void visit(
size_t comp_i,
net_type& net,
visitor&& v
)
{
invoke_functor(v, comp_i, layer<i>(net));
vlp_loop<i+1, num>::visit(comp_i, net,v);
}
};
template <size_t num>
struct vlp_loop<num,num>
{
template <
typename net_type,
typename visitor
>
static void visit(
size_t,
net_type&,
visitor&&
)
{
// Base case of recursion. Don't do anything.
}
};
}
template <
typename net_type,
typename visitor
>
void visit_layer_parameters(
net_type& net,
visitor v
)
{
size_t comp_i = 0;
impl::vlp_loop<0, net_type::num_layers>::visit(comp_i, net, v);
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <size_t i, size_t num>
struct vlpg_loop
{
template <typename T, typename U>
static typename std::enable_if<!is_add_layer<U>::value>::type invoke_functor(T&& , size_t& , U&& )
{
// intentionally left empty
}
template <typename T, typename U>
static typename std::enable_if<is_add_layer<U>::value>::type invoke_functor(T&& v , size_t& comp_i, U&& l )
{
v(comp_i, l.get_parameter_gradient());
++comp_i;
}
template <
typename net_type,
typename visitor
>
static void visit(
size_t comp_i,
net_type& net,
visitor&& v
)
{
invoke_functor(v, comp_i, layer<i>(net));
vlpg_loop<i+1, num>::visit(comp_i, net,v);
}
};
template <size_t num>
struct vlpg_loop<num,num>
{
template <
typename net_type,
typename visitor
>
static void visit(
size_t,
net_type&,
visitor&&
)
{
// Base case of recursion. Don't do anything.
}
};
}
template <
typename net_type,
typename visitor
>
void visit_layer_parameter_gradients(
net_type& net,
visitor v
)
{
size_t comp_i = 0;
impl::vlpg_loop<0, net_type::num_layers>::visit(comp_i, net, v);
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
namespace impl namespace impl
@ -3621,7 +3487,9 @@ namespace dlib
visitor&& v visitor&& v
) )
{ {
v(i, layer<i>(net)); // Call whatever version of the visitor the user provided.
call_if_valid(v, i, layer<i>(net));
call_if_valid(v, layer<i>(net));
vl_loop<i+1, num>::visit(net,v); vl_loop<i+1, num>::visit(net,v);
} }
}; };
@ -3655,7 +3523,9 @@ namespace dlib
) )
{ {
vl_loop_backwards<i+1, num>::visit(net,v); vl_loop_backwards<i+1, num>::visit(net,v);
v(i, layer<i>(net)); // Call whatever version of the visitor the user provided.
call_if_valid(v, i, layer<i>(net));
call_if_valid(v, layer<i>(net));
} }
}; };
@ -3751,7 +3621,7 @@ namespace dlib
visitor&& v visitor&& v
) )
{ {
v(next_net); call_if_valid(v, next_net);
vl_until_tag<i+1,tag_id>::visit(net,layer<i+1>(net),v); vl_until_tag<i+1,tag_id>::visit(net,layer<i+1>(net),v);
} }
@ -3766,7 +3636,7 @@ namespace dlib
visitor&& v visitor&& v
) )
{ {
v(next_net); call_if_valid(v, next_net);
} }
template < template <
@ -3780,7 +3650,7 @@ namespace dlib
visitor&& v visitor&& v
) )
{ {
v(next_net); call_if_valid(v, next_net);
} }
}; };
} }
@ -3798,6 +3668,137 @@ namespace dlib
impl::vl_until_tag<0,tag_id>::visit(net, net, v); impl::vl_until_tag<0,tag_id>::visit(net, net, v);
} }
// ----------------------------------------------------------------------------------------
namespace impl
{
template <
typename visitor
>
class visitor_computational_layer
{
public:
explicit visitor_computational_layer(visitor& v) : v_(v) {}
template <typename T, typename U, typename E>
void operator()(size_t idx, add_layer<T,U,E>& l) const
{
// Call whatever version of the visitor the user provided.
call_if_valid(v_, idx, l.layer_details());
call_if_valid(v_, l.layer_details());
}
private:
visitor& v_;
};
}
template <
typename net_type,
typename visitor
>
void visit_computational_layers(
net_type& net,
visitor v
)
{
visit_layers(net, impl::visitor_computational_layer<visitor>(v));
}
template <
size_t begin,
size_t end,
typename net_type,
typename visitor
>
void visit_computational_layers_range(
net_type& net,
visitor v
)
{
visit_layers_range<begin,end>(net, impl::visitor_computational_layer<visitor>(v));
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <
typename visitor
>
class visit_layer_parameters
{
public:
explicit visit_layer_parameters(visitor& v) : v_(v) {}
template <typename layer>
void operator()(layer& l)
{
// Call whatever version of the visitor the user provided.
const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_layer_params()) ||
call_if_valid(v_, l.get_layer_params());
DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameters()");
++computational_layer_idx;
}
private:
size_t computational_layer_idx = 0;
visitor& v_;
};
}
template <
typename net_type,
typename visitor
>
void visit_layer_parameters(
net_type& net,
visitor v
)
{
visit_computational_layers(net, impl::visit_layer_parameters<visitor>(v));
}
// ----------------------------------------------------------------------------------------
namespace impl
{
template <
typename visitor
>
class visit_layer_parameter_gradients
{
public:
explicit visit_layer_parameter_gradients(visitor& v) : v_(v) {}
template <typename T, typename U, typename E>
void operator()(add_layer<T,U,E>& l)
{
// Call whatever version of the visitor the user provided.
const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_parameter_gradient()) ||
call_if_valid(v_, l.get_parameter_gradient());
DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameter_gradients()");
++computational_layer_idx;
}
private:
size_t computational_layer_idx = 0;
visitor& v_;
};
}
template <
typename net_type,
typename visitor
>
void visit_layer_parameter_gradients(
net_type& net,
visitor v
)
{
visit_layers(net, impl::visit_layer_parameter_gradients<visitor>(v));
}
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
} }

@ -1676,6 +1676,8 @@ namespace dlib
add_tag_layer. add_tag_layer.
- v is a function object with a signature equivalent to: - v is a function object with a signature equivalent to:
v(size_t idx, tensor& t) v(size_t idx, tensor& t)
or:
v(tensor& t)
ensures ensures
- Loops over all the computational layers (i.e. layers with parameters, as - Loops over all the computational layers (i.e. layers with parameters, as
opposed to loss, tag, or input layers) in net and passes their parameters to opposed to loss, tag, or input layers) in net and passes their parameters to
@ -1709,6 +1711,8 @@ namespace dlib
add_tag_layer. add_tag_layer.
- v is a function object with a signature equivalent to: - v is a function object with a signature equivalent to:
v(size_t idx, tensor& t) v(size_t idx, tensor& t)
or:
v(tensor& t)
ensures ensures
- Loops over all the computational layers (i.e. layers with parameters, as - Loops over all the computational layers (i.e. layers with parameters, as
opposed to loss, tag, or input layers) in net and passes their parameter opposed to loss, tag, or input layers) in net and passes their parameter
@ -1743,7 +1747,9 @@ namespace dlib
add_tag_layer. add_tag_layer.
- v is a function object with a signature equivalent to: - v is a function object with a signature equivalent to:
v(size_t idx, any_net_type& t) v(size_t idx, any_net_type& t)
That is, it must take a size_t and then any of the network types such as or:
v(any_net_type& t)
That is, it takes an optional size_t and then any of the network types such as
add_layer, add_loss_layer, etc. add_layer, add_loss_layer, etc.
ensures ensures
- Loops over all the layers in net and calls v() on them. To be specific, this - Loops over all the layers in net and calls v() on them. To be specific, this
@ -1767,7 +1773,9 @@ namespace dlib
add_tag_layer. add_tag_layer.
- v is a function object with a signature equivalent to: - v is a function object with a signature equivalent to:
v(size_t idx, any_net_type& t) v(size_t idx, any_net_type& t)
That is, it must take a size_t and then any of the network types such as or:
v(any_net_type& t)
That is, it takes an optional size_t and then any of the network types such as
add_layer, add_loss_layer, etc. add_layer, add_loss_layer, etc.
ensures ensures
- Loops over all the layers in net and calls v() on them. The loop happens in - Loops over all the layers in net and calls v() on them. The loop happens in
@ -1778,6 +1786,64 @@ namespace dlib
v(i-1, layer<i-1>(net)); v(i-1, layer<i-1>(net));
!*/ !*/
// ----------------------------------------------------------------------------------------
template <
typename net_type,
typename visitor
>
void visit_computational_layers(
net_type& net,
visitor v
);
/*!
requires
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, any_computational_layer& t)
or:
v(any_computational_layer& t)
That is, it takes an optional size_t and then any of the computational layers. E.g.
one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_.
ensures
- Loops over all the computational layers in net and calls v() on them. To be specific, this
function essentially performs the following:
for (size_t i = 0; i < net_type::num_layers; ++i)
if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer)
v(i, layer<i>(net).layer_details());
!*/
template <
size_t begin,
size_t end,
typename net_type,
typename visitor
>
void visit_computational_layers_range(
net_type& net,
visitor v
);
/*!
requires
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
add_tag_layer.
- v is a function object with a signature equivalent to:
v(size_t idx, any_computational_layer& t)
or:
v(any_computational_layer& t)
That is, it takes an optional size_t and then any of the computational layers. E.g.
one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_.
ensures
- Loops over all the computational layers in the range [begin,end) in net and calls v()
on them. To be specific, this function essentially performs the following:
for (size_t i = begin; i < end; ++i)
if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer)
v(i, layer<i>(net).layer_details());
!*/
// ---------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------
template < template <
@ -1796,13 +1862,14 @@ namespace dlib
add_tag_layer. add_tag_layer.
- v is a function object with a signature equivalent to: - v is a function object with a signature equivalent to:
v(size_t idx, any_net_type& t) v(size_t idx, any_net_type& t)
That is, it must take a size_t and then any of the network types such as or:
v(any_net_type& t)
That is, it takes an optional size_t and then any of the network types such as
add_layer, add_loss_layer, etc. add_layer, add_loss_layer, etc.
- begin <= end <= net_type::num_layers - begin <= end <= net_type::num_layers
ensures ensures
- Loops over the layers in the range [begin,end) in net and calls v() on them. - Loops over the layers in the range [begin,end) in net and calls v() on them.
The loop happens in the reverse order of visit_layers(). To be specific, To be specific, this function essentially performs the following:
this function essentially performs the following:
for (size_t i = begin; i < end; ++i) for (size_t i = begin; i < end; ++i)
v(i, layer<i>(net)); v(i, layer<i>(net));
@ -1824,7 +1891,9 @@ namespace dlib
add_tag_layer. add_tag_layer.
- v is a function object with a signature equivalent to: - v is a function object with a signature equivalent to:
v(size_t idx, any_net_type& t) v(size_t idx, any_net_type& t)
That is, it must take a size_t and then any of the network types such as or:
v(any_net_type& t)
That is, it takes an optional size_t and then any of the network types such as
add_layer, add_loss_layer, etc. add_layer, add_loss_layer, etc.
- begin <= end <= net_type::num_layers - begin <= end <= net_type::num_layers
ensures ensures

@ -667,7 +667,7 @@ namespace dlib
// periodically copy these tensors to all the other devices to make sure the // periodically copy these tensors to all the other devices to make sure the
// different GPUs don't go out of sync. // different GPUs don't go out of sync.
std::vector<tensor*> reference_params; std::vector<tensor*> reference_params;
visit_layer_parameters(devices[0]->net, [&](size_t, tensor& t) { reference_params.push_back(&t); }); visit_layer_parameters(devices[0]->net, [&](tensor& t) { reference_params.push_back(&t); });
// If no external thread pools vector was passed, then create one that will // If no external thread pools vector was passed, then create one that will
// be automatically destructed as soon as the dnn_trainer object goes out of // be automatically destructed as soon as the dnn_trainer object goes out of

@ -1995,14 +1995,14 @@ namespace
pres<res<res<res_down< // 2 prelu layers here pres<res<res<res_down< // 2 prelu layers here
tag4<repeat<9,pres, // 9 groups, each containing 2 prelu layers tag4<repeat<9,pres, // 9 groups, each containing 2 prelu layers
res_down< res_down<
res< leaky_relu<res<
input<matrix<unsigned char>> input<matrix<unsigned char>>
>>>>>>>>>>>; >>>>>>>>>>>>;
net_type2 pnet; net_type2 pnet;
DLIB_TEST_MSG(pnet.num_layers == 131, pnet.num_layers); DLIB_TEST_MSG(pnet.num_layers == 132, pnet.num_layers);
DLIB_TEST_MSG(pnet.num_computational_layers == 109, pnet.num_computational_layers); DLIB_TEST_MSG(pnet.num_computational_layers == 110, pnet.num_computational_layers);
std::vector<bool> hit(pnet.num_computational_layers, false); std::vector<bool> hit(pnet.num_computational_layers, false);
size_t count = 0; size_t count = 0;
@ -2017,6 +2017,14 @@ namespace
for (auto x : hit2) for (auto x : hit2)
DLIB_TEST(x); DLIB_TEST(x);
DLIB_TEST(count == pnet.num_computational_layers); DLIB_TEST(count == pnet.num_computational_layers);
int num_relus = 0;
visit_computational_layers(pnet, [&num_relus](relu_&) { ++num_relus; });
DLIB_TEST(num_relus == 10);
DLIB_TEST(layer<leaky_relu>(pnet).layer_details().get_alpha() == 0.01f);
visit_computational_layers(pnet, [](leaky_relu_& l) { l = leaky_relu_(0.001f); });
DLIB_TEST(layer<leaky_relu>(pnet).layer_details().get_alpha() == 0.001f);
} }
float tensor_read_cpu(const tensor& t, long i, long k, long r, long c) float tensor_read_cpu(const tensor& t, long i, long k, long r, long c)

@ -47,16 +47,10 @@ public:
visitor_weight_decay_multiplier(double new_weight_decay_multiplier_) : visitor_weight_decay_multiplier(double new_weight_decay_multiplier_) :
new_weight_decay_multiplier(new_weight_decay_multiplier_) {} new_weight_decay_multiplier(new_weight_decay_multiplier_) {}
template<typename input_layer_type> template <typename layer>
void operator()(size_t , input_layer_type& ) const void operator()(layer& l) const
{ {
// ignore other layers set_weight_decay_multiplier(l, new_weight_decay_multiplier);
}
template <typename T, typename U, typename E>
void operator()(size_t , add_layer<T,U,E>& l) const
{
set_weight_decay_multiplier(l.layer_details(), new_weight_decay_multiplier);
} }
private: private:
@ -98,7 +92,7 @@ int main() try
// We can use the visit_layers function to modify the weight decay of the entire // We can use the visit_layers function to modify the weight decay of the entire
// network: // network:
visit_layers(net, visitor_weight_decay_multiplier(0.001)); visit_computational_layers(net, visitor_weight_decay_multiplier(0.001));
// We can also use predefined visitors to affect the learning rate of the whole // We can also use predefined visitors to affect the learning rate of the whole
// network. // network.
@ -109,14 +103,14 @@ int main() try
// visitor that is very similar to the one defined in this example. // visitor that is very similar to the one defined in this example.
// Usually, we want to freeze the network, except for the top layers: // Usually, we want to freeze the network, except for the top layers:
visit_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0)); visit_computational_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0));
set_all_learning_rate_multipliers(net.subnet().subnet(), 0); set_all_learning_rate_multipliers(net.subnet().subnet(), 0);
// Alternatively, we can use the visit_layers_range to modify only a specific set of // Alternatively, we can use the visit_layers_range to modify only a specific set of
// layers: // layers:
visit_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1)); visit_computational_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1));
// Sometimes we might want to set the learning rate differently thoughout the network. // Sometimes we might want to set the learning rate differently throughout the network.
// Here we show how to use adjust the learning rate at the different ResNet50's // Here we show how to use adjust the learning rate at the different ResNet50's
// convolutional blocks: // convolutional blocks:
set_learning_rate_multipliers_range< 0, 2>(net, 1); set_learning_rate_multipliers_range< 0, 2>(net, 1);
@ -143,7 +137,7 @@ int main() try
// We can also print the number of parameters of the network: // We can also print the number of parameters of the network:
cout << "number of network parameters: " << count_parameters(net) << endl; cout << "number of network parameters: " << count_parameters(net) << endl;
// From this point on, we can finetune the new network using this pretrained backbone // From this point on, we can fine-tune the new network using this pretrained backbone
// on another task, such as the one showed in dnn_metric_learning_on_images_ex.cpp. // on another task, such as the one showed in dnn_metric_learning_on_images_ex.cpp.
return EXIT_SUCCESS; return EXIT_SUCCESS;

Loading…
Cancel
Save