mirror of
https://github.com/davisking/dlib.git
synced 2024-11-01 10:14:53 +08:00
Made the DNN layer visiting routines more convenient.
Now the user doesn't have to supply a visitor capable of visiting all layers, but instead just the ones they are interested in. Also added visit_computational_layers() and visit_computational_layers_range() since those capture a very common use case more concisely than visit_layers(). That is, users generally want to mess with the computational layers specifically as those are the stateful layers.
This commit is contained in:
parent
7dcc7b4ebc
commit
afe19fcb8b
279
dlib/dnn/core.h
279
dlib/dnn/core.h
@ -3471,140 +3471,6 @@ namespace dlib
|
||||
return impl_test_layer(l, 0.01);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
template <size_t i, size_t num>
|
||||
struct vlp_loop
|
||||
{
|
||||
template <typename T, typename U>
|
||||
static typename std::enable_if<!is_add_layer<U>::value>::type invoke_functor(T&& , size_t& , U&& )
|
||||
{
|
||||
// intentionally left empty
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
static typename std::enable_if<is_add_layer<U>::value>::type invoke_functor(T&& v , size_t& comp_i, U&& l )
|
||||
{
|
||||
v(comp_i, l.layer_details().get_layer_params());
|
||||
++comp_i;
|
||||
}
|
||||
|
||||
template <
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
static void visit(
|
||||
size_t comp_i,
|
||||
net_type& net,
|
||||
visitor&& v
|
||||
)
|
||||
{
|
||||
invoke_functor(v, comp_i, layer<i>(net));
|
||||
vlp_loop<i+1, num>::visit(comp_i, net,v);
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t num>
|
||||
struct vlp_loop<num,num>
|
||||
{
|
||||
template <
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
static void visit(
|
||||
size_t,
|
||||
net_type&,
|
||||
visitor&&
|
||||
)
|
||||
{
|
||||
// Base case of recursion. Don't do anything.
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template <
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
void visit_layer_parameters(
|
||||
net_type& net,
|
||||
visitor v
|
||||
)
|
||||
{
|
||||
size_t comp_i = 0;
|
||||
impl::vlp_loop<0, net_type::num_layers>::visit(comp_i, net, v);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
template <size_t i, size_t num>
|
||||
struct vlpg_loop
|
||||
{
|
||||
template <typename T, typename U>
|
||||
static typename std::enable_if<!is_add_layer<U>::value>::type invoke_functor(T&& , size_t& , U&& )
|
||||
{
|
||||
// intentionally left empty
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
static typename std::enable_if<is_add_layer<U>::value>::type invoke_functor(T&& v , size_t& comp_i, U&& l )
|
||||
{
|
||||
v(comp_i, l.get_parameter_gradient());
|
||||
++comp_i;
|
||||
}
|
||||
|
||||
template <
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
static void visit(
|
||||
size_t comp_i,
|
||||
net_type& net,
|
||||
visitor&& v
|
||||
)
|
||||
{
|
||||
invoke_functor(v, comp_i, layer<i>(net));
|
||||
vlpg_loop<i+1, num>::visit(comp_i, net,v);
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t num>
|
||||
struct vlpg_loop<num,num>
|
||||
{
|
||||
template <
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
static void visit(
|
||||
size_t,
|
||||
net_type&,
|
||||
visitor&&
|
||||
)
|
||||
{
|
||||
// Base case of recursion. Don't do anything.
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template <
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
void visit_layer_parameter_gradients(
|
||||
net_type& net,
|
||||
visitor v
|
||||
)
|
||||
{
|
||||
size_t comp_i = 0;
|
||||
impl::vlpg_loop<0, net_type::num_layers>::visit(comp_i, net, v);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
@ -3621,7 +3487,9 @@ namespace dlib
|
||||
visitor&& v
|
||||
)
|
||||
{
|
||||
v(i, layer<i>(net));
|
||||
// Call whatever version of the visitor the user provided.
|
||||
call_if_valid(v, i, layer<i>(net));
|
||||
call_if_valid(v, layer<i>(net));
|
||||
vl_loop<i+1, num>::visit(net,v);
|
||||
}
|
||||
};
|
||||
@ -3655,7 +3523,9 @@ namespace dlib
|
||||
)
|
||||
{
|
||||
vl_loop_backwards<i+1, num>::visit(net,v);
|
||||
v(i, layer<i>(net));
|
||||
// Call whatever version of the visitor the user provided.
|
||||
call_if_valid(v, i, layer<i>(net));
|
||||
call_if_valid(v, layer<i>(net));
|
||||
}
|
||||
};
|
||||
|
||||
@ -3751,7 +3621,7 @@ namespace dlib
|
||||
visitor&& v
|
||||
)
|
||||
{
|
||||
v(next_net);
|
||||
call_if_valid(v, next_net);
|
||||
vl_until_tag<i+1,tag_id>::visit(net,layer<i+1>(net),v);
|
||||
}
|
||||
|
||||
@ -3766,7 +3636,7 @@ namespace dlib
|
||||
visitor&& v
|
||||
)
|
||||
{
|
||||
v(next_net);
|
||||
call_if_valid(v, next_net);
|
||||
}
|
||||
|
||||
template <
|
||||
@ -3780,7 +3650,7 @@ namespace dlib
|
||||
visitor&& v
|
||||
)
|
||||
{
|
||||
v(next_net);
|
||||
call_if_valid(v, next_net);
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -3798,6 +3668,137 @@ namespace dlib
|
||||
impl::vl_until_tag<0,tag_id>::visit(net, net, v);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
template <
|
||||
typename visitor
|
||||
>
|
||||
class visitor_computational_layer
|
||||
{
|
||||
public:
|
||||
explicit visitor_computational_layer(visitor& v) : v_(v) {}
|
||||
|
||||
template <typename T, typename U, typename E>
|
||||
void operator()(size_t idx, add_layer<T,U,E>& l) const
|
||||
{
|
||||
// Call whatever version of the visitor the user provided.
|
||||
call_if_valid(v_, idx, l.layer_details());
|
||||
call_if_valid(v_, l.layer_details());
|
||||
}
|
||||
private:
|
||||
|
||||
visitor& v_;
|
||||
};
|
||||
}
|
||||
|
||||
template <
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
void visit_computational_layers(
|
||||
net_type& net,
|
||||
visitor v
|
||||
)
|
||||
{
|
||||
visit_layers(net, impl::visitor_computational_layer<visitor>(v));
|
||||
}
|
||||
|
||||
template <
|
||||
size_t begin,
|
||||
size_t end,
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
void visit_computational_layers_range(
|
||||
net_type& net,
|
||||
visitor v
|
||||
)
|
||||
{
|
||||
visit_layers_range<begin,end>(net, impl::visitor_computational_layer<visitor>(v));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
template <
|
||||
typename visitor
|
||||
>
|
||||
class visit_layer_parameters
|
||||
{
|
||||
public:
|
||||
explicit visit_layer_parameters(visitor& v) : v_(v) {}
|
||||
|
||||
template <typename layer>
|
||||
void operator()(layer& l)
|
||||
{
|
||||
// Call whatever version of the visitor the user provided.
|
||||
const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_layer_params()) ||
|
||||
call_if_valid(v_, l.get_layer_params());
|
||||
DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameters()");
|
||||
++computational_layer_idx;
|
||||
}
|
||||
private:
|
||||
|
||||
size_t computational_layer_idx = 0;
|
||||
visitor& v_;
|
||||
};
|
||||
}
|
||||
|
||||
template <
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
void visit_layer_parameters(
|
||||
net_type& net,
|
||||
visitor v
|
||||
)
|
||||
{
|
||||
visit_computational_layers(net, impl::visit_layer_parameters<visitor>(v));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
namespace impl
|
||||
{
|
||||
template <
|
||||
typename visitor
|
||||
>
|
||||
class visit_layer_parameter_gradients
|
||||
{
|
||||
public:
|
||||
explicit visit_layer_parameter_gradients(visitor& v) : v_(v) {}
|
||||
|
||||
template <typename T, typename U, typename E>
|
||||
void operator()(add_layer<T,U,E>& l)
|
||||
{
|
||||
// Call whatever version of the visitor the user provided.
|
||||
const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_parameter_gradient()) ||
|
||||
call_if_valid(v_, l.get_parameter_gradient());
|
||||
DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameter_gradients()");
|
||||
++computational_layer_idx;
|
||||
}
|
||||
private:
|
||||
|
||||
size_t computational_layer_idx = 0;
|
||||
visitor& v_;
|
||||
};
|
||||
}
|
||||
|
||||
template <
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
void visit_layer_parameter_gradients(
|
||||
net_type& net,
|
||||
visitor v
|
||||
)
|
||||
{
|
||||
visit_layers(net, impl::visit_layer_parameter_gradients<visitor>(v));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
@ -1676,6 +1676,8 @@ namespace dlib
|
||||
add_tag_layer.
|
||||
- v is a function object with a signature equivalent to:
|
||||
v(size_t idx, tensor& t)
|
||||
or:
|
||||
v(tensor& t)
|
||||
ensures
|
||||
- Loops over all the computational layers (i.e. layers with parameters, as
|
||||
opposed to loss, tag, or input layers) in net and passes their parameters to
|
||||
@ -1709,6 +1711,8 @@ namespace dlib
|
||||
add_tag_layer.
|
||||
- v is a function object with a signature equivalent to:
|
||||
v(size_t idx, tensor& t)
|
||||
or:
|
||||
v(tensor& t)
|
||||
ensures
|
||||
- Loops over all the computational layers (i.e. layers with parameters, as
|
||||
opposed to loss, tag, or input layers) in net and passes their parameter
|
||||
@ -1743,7 +1747,9 @@ namespace dlib
|
||||
add_tag_layer.
|
||||
- v is a function object with a signature equivalent to:
|
||||
v(size_t idx, any_net_type& t)
|
||||
That is, it must take a size_t and then any of the network types such as
|
||||
or:
|
||||
v(any_net_type& t)
|
||||
That is, it takes an optional size_t and then any of the network types such as
|
||||
add_layer, add_loss_layer, etc.
|
||||
ensures
|
||||
- Loops over all the layers in net and calls v() on them. To be specific, this
|
||||
@ -1767,7 +1773,9 @@ namespace dlib
|
||||
add_tag_layer.
|
||||
- v is a function object with a signature equivalent to:
|
||||
v(size_t idx, any_net_type& t)
|
||||
That is, it must take a size_t and then any of the network types such as
|
||||
or:
|
||||
v(any_net_type& t)
|
||||
That is, it takes an optional size_t and then any of the network types such as
|
||||
add_layer, add_loss_layer, etc.
|
||||
ensures
|
||||
- Loops over all the layers in net and calls v() on them. The loop happens in
|
||||
@ -1778,6 +1786,64 @@ namespace dlib
|
||||
v(i-1, layer<i-1>(net));
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
void visit_computational_layers(
|
||||
net_type& net,
|
||||
visitor v
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
|
||||
add_tag_layer.
|
||||
- v is a function object with a signature equivalent to:
|
||||
v(size_t idx, any_computational_layer& t)
|
||||
or:
|
||||
v(any_computational_layer& t)
|
||||
That is, it takes an optional size_t and then any of the computational layers. E.g.
|
||||
one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_.
|
||||
ensures
|
||||
- Loops over all the computational layers in net and calls v() on them. To be specific, this
|
||||
function essentially performs the following:
|
||||
|
||||
for (size_t i = 0; i < net_type::num_layers; ++i)
|
||||
if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer)
|
||||
v(i, layer<i>(net).layer_details());
|
||||
!*/
|
||||
|
||||
template <
|
||||
size_t begin,
|
||||
size_t end,
|
||||
typename net_type,
|
||||
typename visitor
|
||||
>
|
||||
void visit_computational_layers_range(
|
||||
net_type& net,
|
||||
visitor v
|
||||
);
|
||||
/*!
|
||||
requires
|
||||
- net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
|
||||
add_tag_layer.
|
||||
- v is a function object with a signature equivalent to:
|
||||
v(size_t idx, any_computational_layer& t)
|
||||
or:
|
||||
v(any_computational_layer& t)
|
||||
That is, it takes an optional size_t and then any of the computational layers. E.g.
|
||||
one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_.
|
||||
ensures
|
||||
- Loops over all the computational layers in the range [begin,end) in net and calls v()
|
||||
on them. To be specific, this function essentially performs the following:
|
||||
|
||||
for (size_t i = begin; i < end; ++i)
|
||||
if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer)
|
||||
v(i, layer<i>(net).layer_details());
|
||||
!*/
|
||||
|
||||
// ----------------------------------------------------------------------------------------
|
||||
|
||||
template <
|
||||
@ -1796,13 +1862,14 @@ namespace dlib
|
||||
add_tag_layer.
|
||||
- v is a function object with a signature equivalent to:
|
||||
v(size_t idx, any_net_type& t)
|
||||
That is, it must take a size_t and then any of the network types such as
|
||||
or:
|
||||
v(any_net_type& t)
|
||||
That is, it takes an optional size_t and then any of the network types such as
|
||||
add_layer, add_loss_layer, etc.
|
||||
- begin <= end <= net_type::num_layers
|
||||
ensures
|
||||
- Loops over the layers in the range [begin,end) in net and calls v() on them.
|
||||
The loop happens in the reverse order of visit_layers(). To be specific,
|
||||
this function essentially performs the following:
|
||||
To be specific, this function essentially performs the following:
|
||||
|
||||
for (size_t i = begin; i < end; ++i)
|
||||
v(i, layer<i>(net));
|
||||
@ -1824,7 +1891,9 @@ namespace dlib
|
||||
add_tag_layer.
|
||||
- v is a function object with a signature equivalent to:
|
||||
v(size_t idx, any_net_type& t)
|
||||
That is, it must take a size_t and then any of the network types such as
|
||||
or:
|
||||
v(any_net_type& t)
|
||||
That is, it takes an optional size_t and then any of the network types such as
|
||||
add_layer, add_loss_layer, etc.
|
||||
- begin <= end <= net_type::num_layers
|
||||
ensures
|
||||
|
@ -667,7 +667,7 @@ namespace dlib
|
||||
// periodically copy these tensors to all the other devices to make sure the
|
||||
// different GPUs don't go out of sync.
|
||||
std::vector<tensor*> reference_params;
|
||||
visit_layer_parameters(devices[0]->net, [&](size_t, tensor& t) { reference_params.push_back(&t); });
|
||||
visit_layer_parameters(devices[0]->net, [&](tensor& t) { reference_params.push_back(&t); });
|
||||
|
||||
// If no external thread pools vector was passed, then create one that will
|
||||
// be automatically destructed as soon as the dnn_trainer object goes out of
|
||||
|
@ -1995,14 +1995,14 @@ namespace
|
||||
pres<res<res<res_down< // 2 prelu layers here
|
||||
tag4<repeat<9,pres, // 9 groups, each containing 2 prelu layers
|
||||
res_down<
|
||||
res<
|
||||
leaky_relu<res<
|
||||
input<matrix<unsigned char>>
|
||||
>>>>>>>>>>>;
|
||||
>>>>>>>>>>>>;
|
||||
|
||||
net_type2 pnet;
|
||||
|
||||
DLIB_TEST_MSG(pnet.num_layers == 131, pnet.num_layers);
|
||||
DLIB_TEST_MSG(pnet.num_computational_layers == 109, pnet.num_computational_layers);
|
||||
DLIB_TEST_MSG(pnet.num_layers == 132, pnet.num_layers);
|
||||
DLIB_TEST_MSG(pnet.num_computational_layers == 110, pnet.num_computational_layers);
|
||||
|
||||
std::vector<bool> hit(pnet.num_computational_layers, false);
|
||||
size_t count = 0;
|
||||
@ -2017,6 +2017,14 @@ namespace
|
||||
for (auto x : hit2)
|
||||
DLIB_TEST(x);
|
||||
DLIB_TEST(count == pnet.num_computational_layers);
|
||||
|
||||
int num_relus = 0;
|
||||
visit_computational_layers(pnet, [&num_relus](relu_&) { ++num_relus; });
|
||||
DLIB_TEST(num_relus == 10);
|
||||
|
||||
DLIB_TEST(layer<leaky_relu>(pnet).layer_details().get_alpha() == 0.01f);
|
||||
visit_computational_layers(pnet, [](leaky_relu_& l) { l = leaky_relu_(0.001f); });
|
||||
DLIB_TEST(layer<leaky_relu>(pnet).layer_details().get_alpha() == 0.001f);
|
||||
}
|
||||
|
||||
float tensor_read_cpu(const tensor& t, long i, long k, long r, long c)
|
||||
|
@ -47,16 +47,10 @@ public:
|
||||
visitor_weight_decay_multiplier(double new_weight_decay_multiplier_) :
|
||||
new_weight_decay_multiplier(new_weight_decay_multiplier_) {}
|
||||
|
||||
template<typename input_layer_type>
|
||||
void operator()(size_t , input_layer_type& ) const
|
||||
template <typename layer>
|
||||
void operator()(layer& l) const
|
||||
{
|
||||
// ignore other layers
|
||||
}
|
||||
|
||||
template <typename T, typename U, typename E>
|
||||
void operator()(size_t , add_layer<T,U,E>& l) const
|
||||
{
|
||||
set_weight_decay_multiplier(l.layer_details(), new_weight_decay_multiplier);
|
||||
set_weight_decay_multiplier(l, new_weight_decay_multiplier);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -98,7 +92,7 @@ int main() try
|
||||
|
||||
// We can use the visit_layers function to modify the weight decay of the entire
|
||||
// network:
|
||||
visit_layers(net, visitor_weight_decay_multiplier(0.001));
|
||||
visit_computational_layers(net, visitor_weight_decay_multiplier(0.001));
|
||||
|
||||
// We can also use predefined visitors to affect the learning rate of the whole
|
||||
// network.
|
||||
@ -109,14 +103,14 @@ int main() try
|
||||
// visitor that is very similar to the one defined in this example.
|
||||
|
||||
// Usually, we want to freeze the network, except for the top layers:
|
||||
visit_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0));
|
||||
visit_computational_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0));
|
||||
set_all_learning_rate_multipliers(net.subnet().subnet(), 0);
|
||||
|
||||
// Alternatively, we can use the visit_layers_range to modify only a specific set of
|
||||
// layers:
|
||||
visit_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1));
|
||||
visit_computational_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1));
|
||||
|
||||
// Sometimes we might want to set the learning rate differently thoughout the network.
|
||||
// Sometimes we might want to set the learning rate differently throughout the network.
|
||||
// Here we show how to use adjust the learning rate at the different ResNet50's
|
||||
// convolutional blocks:
|
||||
set_learning_rate_multipliers_range< 0, 2>(net, 1);
|
||||
@ -143,7 +137,7 @@ int main() try
|
||||
// We can also print the number of parameters of the network:
|
||||
cout << "number of network parameters: " << count_parameters(net) << endl;
|
||||
|
||||
// From this point on, we can finetune the new network using this pretrained backbone
|
||||
// From this point on, we can fine-tune the new network using this pretrained backbone
|
||||
// on another task, such as the one showed in dnn_metric_learning_on_images_ex.cpp.
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
|
Loading…
Reference in New Issue
Block a user