2020-02-07 20:59:36 +08:00
|
|
|
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
|
|
|
/*
|
|
|
|
This is an example illustrating the use of the deep learning tools from the
|
|
|
|
dlib C++ Library. I'm assuming you have already read the dnn_introduction_ex.cpp and
|
|
|
|
the dnn_introduction2_ex.cpp examples. So in this example program I'm going to go
|
|
|
|
over a transfer learning example, which includes:
|
|
|
|
- Defining a layer visitor to modify the some network parameters for fine-tuning
|
|
|
|
- Using pretrained layers of a network for another task
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <dlib/dnn.h>
|
|
|
|
#include <iostream>
|
|
|
|
|
|
|
|
// This header file includes a generic definition of the most common ResNet architectures
|
|
|
|
#include "resnet.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
using namespace dlib;
|
|
|
|
|
|
|
|
// In this simple example we will show how to load a pretrained network and use it for a
|
|
|
|
// different task. In particular, we will load a ResNet50 trained on ImageNet, adjust
|
|
|
|
// some of its parameters and use it as a pretrained backbone for some metric learning
|
|
|
|
// task.
|
|
|
|
|
|
|
|
// Let's start by defining a network that will use the ResNet50 backbone from resnet.h
|
|
|
|
namespace model
|
|
|
|
{
|
|
|
|
template<template<typename> class BN>
|
|
|
|
using net_type = loss_metric<
|
|
|
|
fc_no_bias<128,
|
|
|
|
avg_pool_everything<
|
2020-03-10 09:21:04 +08:00
|
|
|
typename resnet::def<BN>::template backbone_50<
|
2020-02-07 20:59:36 +08:00
|
|
|
input_rgb_image
|
|
|
|
>>>>;
|
|
|
|
|
|
|
|
using train = net_type<bn_con>;
|
|
|
|
using infer = net_type<affine>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Next, we define a layer visitor that will modify the weight decay of a network. The
|
|
|
|
// main interest of this class is to show how one can define custom visitors that modify
|
|
|
|
// some network parameters.
|
|
|
|
class visitor_weight_decay_multiplier
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
|
|
|
|
visitor_weight_decay_multiplier(double new_weight_decay_multiplier_) :
|
|
|
|
new_weight_decay_multiplier(new_weight_decay_multiplier_) {}
|
|
|
|
|
2020-09-06 06:33:04 +08:00
|
|
|
template <typename layer>
|
|
|
|
void operator()(layer& l) const
|
2020-02-07 20:59:36 +08:00
|
|
|
{
|
2020-09-06 06:33:04 +08:00
|
|
|
set_weight_decay_multiplier(l, new_weight_decay_multiplier);
|
2020-02-07 20:59:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
double new_weight_decay_multiplier;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
int main() try
|
|
|
|
{
|
|
|
|
// Let's instantiate our network in train mode.
|
|
|
|
model::train net;
|
|
|
|
|
|
|
|
// We create a new scope so that resources from the loaded network are freed
|
|
|
|
// automatically when leaving the scope.
|
|
|
|
{
|
|
|
|
// Now, let's define the classic ResNet50 network and load the pretrained model on
|
|
|
|
// ImageNet.
|
2020-03-10 09:21:04 +08:00
|
|
|
resnet::train_50 resnet50;
|
2020-02-07 20:59:36 +08:00
|
|
|
std::vector<string> labels;
|
|
|
|
deserialize("resnet50_1000_imagenet_classifier.dnn") >> resnet50 >> labels;
|
|
|
|
|
|
|
|
// For transfer learning, we are only interested in the ResNet50's backbone, which
|
|
|
|
// lays below the loss and the fc layers, so we can extract it as:
|
|
|
|
auto backbone = std::move(resnet50.subnet().subnet());
|
|
|
|
|
|
|
|
// We can now assign ResNet50's backbone to our network skipping the different
|
|
|
|
// layers, in our case, the loss layer and the fc layer:
|
|
|
|
net.subnet().subnet() = backbone;
|
|
|
|
|
|
|
|
// An alternative way to use the pretrained network on a different
|
|
|
|
// network is to extract the relevant part of the network (we remove
|
|
|
|
// loss and fc layers), stack the new layers on top of it and assign
|
|
|
|
// the network.
|
|
|
|
using net_type = loss_metric<fc_no_bias<128, decltype(backbone)>>;
|
|
|
|
net_type net2;
|
|
|
|
net2.subnet().subnet() = backbone;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We can use the visit_layers function to modify the weight decay of the entire
|
|
|
|
// network:
|
2020-09-06 06:33:04 +08:00
|
|
|
visit_computational_layers(net, visitor_weight_decay_multiplier(0.001));
|
2020-02-07 20:59:36 +08:00
|
|
|
|
|
|
|
// We can also use predefined visitors to affect the learning rate of the whole
|
|
|
|
// network.
|
|
|
|
set_all_learning_rate_multipliers(net, 0.5);
|
|
|
|
|
|
|
|
// Modifying the learning rates of a network is a common practice for fine tuning, for
|
|
|
|
// this reason it is already provided. However, it is implemented internally using a
|
|
|
|
// visitor that is very similar to the one defined in this example.
|
|
|
|
|
|
|
|
// Usually, we want to freeze the network, except for the top layers:
|
2020-09-06 06:33:04 +08:00
|
|
|
visit_computational_layers(net.subnet().subnet(), visitor_weight_decay_multiplier(0));
|
2020-02-07 20:59:36 +08:00
|
|
|
set_all_learning_rate_multipliers(net.subnet().subnet(), 0);
|
|
|
|
|
|
|
|
// Alternatively, we can use the visit_layers_range to modify only a specific set of
|
|
|
|
// layers:
|
2020-09-06 06:33:04 +08:00
|
|
|
visit_computational_layers_range<0, 2>(net, visitor_weight_decay_multiplier(1));
|
2020-02-07 20:59:36 +08:00
|
|
|
|
2020-09-06 06:33:04 +08:00
|
|
|
// Sometimes we might want to set the learning rate differently throughout the network.
|
2020-02-07 20:59:36 +08:00
|
|
|
// Here we show how to use adjust the learning rate at the different ResNet50's
|
|
|
|
// convolutional blocks:
|
|
|
|
set_learning_rate_multipliers_range< 0, 2>(net, 1);
|
|
|
|
set_learning_rate_multipliers_range< 2, 38>(net, 0.1);
|
|
|
|
set_learning_rate_multipliers_range< 38, 107>(net, 0.01);
|
|
|
|
set_learning_rate_multipliers_range<107, 154>(net, 0.001);
|
|
|
|
set_learning_rate_multipliers_range<154, 193>(net, 0.0001);
|
|
|
|
|
|
|
|
// Finally, we can check the results by printing the network. But before, if we
|
|
|
|
// forward an image through the network, we will see tensors shape at every layer.
|
|
|
|
matrix<rgb_pixel> image(224, 224);
|
|
|
|
assign_all_pixels(image, rgb_pixel(0, 0, 0));
|
|
|
|
std::vector<matrix<rgb_pixel>> minibatch(1, image);
|
|
|
|
resizable_tensor input;
|
|
|
|
net.to_tensor(minibatch.begin(), minibatch.end(), input);
|
2020-04-01 07:35:23 +08:00
|
|
|
net.forward(input);
|
2020-02-07 20:59:36 +08:00
|
|
|
cout << net << endl;
|
|
|
|
cout << "input size=(" <<
|
|
|
|
"num:" << input.num_samples() << ", " <<
|
|
|
|
"k:" << input.k() << ", " <<
|
|
|
|
"nr:" << input.nr() << ", "
|
|
|
|
"nc:" << input.nc() << ")" << endl;
|
|
|
|
|
|
|
|
// We can also print the number of parameters of the network:
|
|
|
|
cout << "number of network parameters: " << count_parameters(net) << endl;
|
|
|
|
|
2020-09-06 06:33:04 +08:00
|
|
|
// From this point on, we can fine-tune the new network using this pretrained backbone
|
2020-02-07 20:59:36 +08:00
|
|
|
// on another task, such as the one showed in dnn_metric_learning_on_images_ex.cpp.
|
|
|
|
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
catch (const serialization_error& e)
|
|
|
|
{
|
|
|
|
cout << e.what() << endl;
|
|
|
|
cout << "You need to download a copy of the file resnet50_1000_imagenet_classifier.dnn" << endl;
|
|
|
|
cout << "available at http://dlib.net/files/resnet50_1000_imagenet_classifier.dnn.bz2" << endl;
|
|
|
|
cout << endl;
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
catch (const exception& e)
|
|
|
|
{
|
|
|
|
cout << e.what() << endl;
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|