// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt /* This is an example illustrating the use of the deep learning tools from the dlib C++ Library. I'm assuming you have already read the dnn_mnist_ex.cpp example. So in this example program I'm going to go over a number of more advanced parts of the API, including: - Using grp layer for constructing inception layer Inception layer is a kind of NN architecture for running sevelar convolution types on the same input area and joining all convolution results into one output. For further reading refer http://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf */ #include #include #include using namespace std; using namespace dlib; // Inception layer has some different convolutions inside // Here we define blocks as convolutions with different kernel size that we will use in // inception layer block. template using block_a1 = relu>; template using block_a2 = relu>>>; template using block_a3 = relu>>>; template using block_a4 = relu>>; // Here is inception layer definition. It uses different blocks to process input and returns combined output template using incept_a = inception4; // Network can have inception layers of different structure. // Here are blocks with different convolutions template using block_b1 = relu>; template using block_b2 = relu>; template using block_b3 = relu>>; // Here is inception layer definition. It uses different blocks to process input and returns combined output template using incept_b = inception3; // and then the network type is using net_type = loss_multiclass_log< fc<10, relu> >>>>>>>>; int main(int argc, char** argv) try { // This example is going to run on the MNIST dataset. if (argc != 2) { cout << "This example needs the MNIST dataset to run!" << endl; cout << "You can get MNIST from http://yann.lecun.com/exdb/mnist/" << endl; cout << "Download the 4 files that comprise the dataset, decompress them, and" << endl; cout << "put them in a folder. Then give that folder as input to this program." << endl; return 1; } std::vector> training_images; std::vector training_labels; std::vector> testing_images; std::vector testing_labels; load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels); // The rest of the sample is identical to dnn_minst_ex // Create network of predefined type. net_type net; // And then train it using the MNIST data. The code below uses mini-batch stochastic // gradient descent with an initial learning rate of 0.01 to accomplish this. dnn_trainer trainer(net); trainer.set_learning_rate(0.01); trainer.set_min_learning_rate(0.00001); trainer.set_mini_batch_size(128); trainer.be_verbose(); // Since DNN training can take a long time, we can ask the trainer to save its state to // a file named "mnist_sync" every 20 seconds. This way, if we kill this program and // start it again it will begin where it left off rather than restarting the training // from scratch. This is because, when the program restarts, this call to // set_synchronization_file() will automatically reload the settings from mnist_sync if // the file exists. trainer.set_synchronization_file("inception_sync", std::chrono::seconds(20)); // Finally, this line begins training. By default, it runs SGD with our specified // learning rate until the loss stops decreasing. Then it reduces the learning rate by // a factor of 10 and continues running until the loss stops decreasing again. It will // keep doing this until the learning rate has dropped below the min learning rate // defined above or the maximum number of epochs as been executed (defaulted to 10000). trainer.train(training_images, training_labels); // At this point our net object should have learned how to classify MNIST images. But // before we try it out let's save it to disk. Note that, since the trainer has been // running images through the network, net will have a bunch of state in it related to // the last batch of images it processed (e.g. outputs from each layer). Since we // don't care about saving that kind of stuff to disk we can tell the network to forget // about that kind of transient data so that our file will be smaller. We do this by // "cleaning" the network before saving it. net.clean(); serialize("mnist_network_inception.dat") << net; // Now if we later wanted to recall the network from disk we can simply say: // deserialize("mnist_network.dat") >> net; // Now let's run the training images through the network. This statement runs all the // images through it and asks the loss layer to convert the network's raw output into // labels. In our case, these labels are the numbers between 0 and 9. std::vector predicted_labels = net(training_images); int num_right = 0; int num_wrong = 0; // And then let's see if it classified them correctly. for (size_t i = 0; i < training_images.size(); ++i) { if (predicted_labels[i] == training_labels[i]) ++num_right; else ++num_wrong; } cout << "training num_right: " << num_right << endl; cout << "training num_wrong: " << num_wrong << endl; cout << "training accuracy: " << num_right/(double)(num_right+num_wrong) << endl; // Let's also see if the network can correctly classify the testing images. Since // MNIST is an easy dataset, we should see at least 99% accuracy. predicted_labels = net(testing_images); num_right = 0; num_wrong = 0; for (size_t i = 0; i < testing_images.size(); ++i) { if (predicted_labels[i] == testing_labels[i]) ++num_right; else ++num_wrong; } cout << "testing num_right: " << num_right << endl; cout << "testing num_wrong: " << num_wrong << endl; cout << "testing accuracy: " << num_right/(double)(num_right+num_wrong) << endl; } catch(std::exception& e) { cout << e.what() << endl; }