dlib/examples/ffmpeg_webcam_face_pose_ex.cpp

// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*

    This example program shows how to find frontal human faces in an image and
    estimate their pose.  The pose takes the form of 68 landmarks.  These are
    points on the face such as the corners of the mouth, along the eyebrows, on
    the eyes, and so forth.  
    

    This example is essentially just a version of the face_landmark_detection_ex.cpp
    example modified to use dlib's demuxer object to read from a camera instead 
    of files.


    Finally, note that the face detector is fastest when compiled with at least
    SSE2 instructions enabled.  So if you are using a PC with an Intel or AMD
    chip then you should enable at least SSE2 instructions.  If you are using
    cmake to compile this program you can enable them by using one of the
    following commands when you create the build project:
        cmake path_to_dlib_root/examples -DUSE_SSE2_INSTRUCTIONS=ON
        cmake path_to_dlib_root/examples -DUSE_SSE4_INSTRUCTIONS=ON
        cmake path_to_dlib_root/examples -DUSE_AVX_INSTRUCTIONS=ON
    This will set the appropriate compiler options for GCC, clang, Visual
    Studio, or the Intel compiler.  If you are using another compiler then you
    need to consult your compiler's manual to determine how to enable these
    instructions.  Note that AVX is the fastest but requires a CPU from at least
    2011.  SSE4 is the next fastest and is supported by most current machines.  
*/

#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/image_processing/render_face_detections.h>
#include <dlib/image_processing.h>
#include <dlib/cmd_line_parser.h>
#include <dlib/gui_widgets.h>
#include <dlib/media.h>

using namespace dlib;
using namespace std;

int main(int argc, const char** argv)
{
    try
    {
        command_line_parser parser;
        parser.add_option("height",     "height of frames", 1);
        parser.add_option("width",      "width of frames", 1);
        parser.add_option("framerate",  "webcam desired framerate", 1);
        parser.set_group_name("Help Options");
        parser.add_option("h",          "alias of --help");
        parser.add_option("help",       "display this message and exit");

        parser.parse(argc, argv);
        const char* one_time_opts[] = {"height", "width", "framerate"};
        parser.check_one_time_options(one_time_opts);

        if (parser.option("h") || parser.option("help"))
        {
            parser.print_options();
            cout << "Please use `v4l2-ctl --list-formats-ext` to view all supported hardware formats\n";
            return 0;
        }

        ffmpeg::demuxer cap{[&]
        {
            ffmpeg::demuxer::args args;
            args.filepath               = "/dev/video0";
            args.args_image.h           = get_option(parser, "height", 0);
            args.args_image.w           = get_option(parser, "width",  0);
            args.args_image.framerate   = get_option(parser, "framerate", 0);
            return args;
        }()};

        if (!cap.is_open())
        {
            cerr << "Unable to connect to camera" << endl;
            return 1;
        }

        cout << "height  : " << cap.height() << '\n';
        cout << "width   : " << cap.width() << '\n';
        cout << "fps     : " << cap.fps() << '\n';
        
        image_window win;

        // Load face detection and pose estimation models.
        frontal_face_detector detector = get_frontal_face_detector();
        shape_predictor pose_model;
        deserialize("shape_predictor_68_face_landmarks.dat") >> pose_model;

        ffmpeg::frame frame;
        array2d<rgb_pixel> img;

        // Grab and process frames until the main window is closed by the user.
        while(cap.read(frame) && !win.is_closed())
        {
            // Convert the frame object into a dlib image object
            convert(frame, img);

            // Detect faces 
            std::vector<rectangle> faces = detector(img);
            // Find the pose of each face.
            std::vector<full_object_detection> shapes;
            for (unsigned long i = 0; i < faces.size(); ++i)
                shapes.push_back(pose_model(img, faces[i]));

            // Display it all on the screen
            win.clear_overlay();
            win.set_image(img);
            win.add_overlay(render_face_detections(shapes));
        }
    }
    catch(serialization_error& e)
    {
        cout << "You need dlib's default face landmarking model file to run this example." << endl;
        cout << "You can get it from the following URL: " << endl;
        cout << "   http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << endl;
        cout << endl << e.what() << endl;
    }
    catch(exception& e)
    {
        cout << e.what() << endl;
    }
}
FFMPEG wrappers: dlib::ffmpeg::decoder and dlib::ffmpeg::demuxer (#2707) * - added ffmpeg stuff to cmake * - added observer_ptr * ffmpeg utils * WIP * - added ffmpeg_decoder * config file for test data * another test file * install ffmpeg * added ffmpeg_demuxer * install all ffmpeg libraries * support older version of ffmpeg * simplified loop * - test converting to dlib object - added docs - support older ffmpeg * added convert() overload * added comment * only register stuff when API not deprecated * - fixed version issues - fixed decoding * added tests for ffmpeg_demuxer * removed unused code * test GIF * added docs * added audio test * test for audio * more tests * review changes * don't need observer_ptr * made deps public. I could be wrong but just in case. * - added some static asserts. Some areas of the code might do memcpy's on arrays of pixels. This requires the structures to be packed. Check this. - added convert() functions - changed default decoder options. By default, always decode to RGB and S16 audio - added convenience constructor to demuxer * - no longer need opencv * oops. I let that slip * - made a few functions public - more precise requires clauses * enhanced example * - avoid FFMPEG_INITIALIZED being optimized away at link time - added decoding example * - avoid -Wunused-parameter error * constexpr and noexcept correctness. This probably makes no difference to performance, BUT, it's what the core guidelines tell you to do. It does however demonstrate how complicated and unecessarily verbose C++ is becoming. Sigh, maybe one day i'll make the switch to something that doesn't make my eyes twitch. * - simplified metadata structure * hopefully more educational * added another example * ditto * typo * screen grab example * whoops * avoid -Wunused-parameter errors * ditto * - added methods to av_dict - print the demuxer format options that were not used - enhanced webcam_face_pose_ex.cpp so you can set webcam options * if height and width are specified, attempt to set video_size in format_options. Otherwise set the bilinear resizer. * updated docs * once again, the ffmpeg APIs do a lot for you. It's a matter of knowing which APIs to call. * made header-only * - some Werror thing * don't use type_safe_union * - templated sample type - reverted deep copy of AVFrame for frame copy constructor * - added is_pixel_type and is_pixel_check * unit tests for pixel traits * enhanced is_image_type type trait and added is_image_check * added unit tests for is_image_type * added pix_traits, improved convert() functions * bug fix * get rid of -Werror=unused-variable error * added a type alias * that's the last of the manual memcpys gone. We'using ffmpeg API everywhere now for copying frames to buffers and back * missing doc * set framerate for webcam * list input devices * oops. I was trying to make ffmpeg 5 happy but i've given up on ffmpeg v5 compatibility in this PR. Future PR. * enhanced the information provided by list_input_devices and list_output_devices * removed vscode settings.json file * - added a type trait for checking whether a type is complete. This is useful for writing type traits that check other types have type trait specializations. But also other useful things. For example, std::unique_ptr uses something similar to this. * Davis was keen to simply check pixel_traits is specialised. That's equivalent to checking pixel_traits<> is complete for some type * code review * juse use the void_t in dlib/type_traits.h * one liners * just need is_image_check * more tests for is_image_type * i think this is correct * removed printf * better docs * Keep opencv out of it * keep old face pose example, then add new one which uses dlib's ffmpeg wrappers * revert * revert * better docs * better docs --------- Co-authored-by: pf <pf@me> 2023-01-30 09:17:34 +08:00			`// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt`
			`/*`

			`This example program shows how to find frontal human faces in an image and`
			`estimate their pose. The pose takes the form of 68 landmarks. These are`
			`points on the face such as the corners of the mouth, along the eyebrows, on`
			`the eyes, and so forth.`


			`This example is essentially just a version of the face_landmark_detection_ex.cpp`
			`example modified to use dlib's demuxer object to read from a camera instead`
			`of files.`


			`Finally, note that the face detector is fastest when compiled with at least`
			`SSE2 instructions enabled. So if you are using a PC with an Intel or AMD`
			`chip then you should enable at least SSE2 instructions. If you are using`
			`cmake to compile this program you can enable them by using one of the`
			`following commands when you create the build project:`
			`cmake path_to_dlib_root/examples -DUSE_SSE2_INSTRUCTIONS=ON`
			`cmake path_to_dlib_root/examples -DUSE_SSE4_INSTRUCTIONS=ON`
			`cmake path_to_dlib_root/examples -DUSE_AVX_INSTRUCTIONS=ON`
			`This will set the appropriate compiler options for GCC, clang, Visual`
			`Studio, or the Intel compiler. If you are using another compiler then you`
			`need to consult your compiler's manual to determine how to enable these`
			`instructions. Note that AVX is the fastest but requires a CPU from at least`
			`2011. SSE4 is the next fastest and is supported by most current machines.`
			`*/`

			`#include <dlib/image_processing/frontal_face_detector.h>`
			`#include <dlib/image_processing/render_face_detections.h>`
			`#include <dlib/image_processing.h>`
			`#include <dlib/cmd_line_parser.h>`
			`#include <dlib/gui_widgets.h>`
			`#include <dlib/media.h>`

			`using namespace dlib;`
			`using namespace std;`

			`int main(int argc, const char** argv)`
			`{`
			`try`
			`{`
			`command_line_parser parser;`
			`parser.add_option("height", "height of frames", 1);`
			`parser.add_option("width", "width of frames", 1);`
			`parser.add_option("framerate", "webcam desired framerate", 1);`
			`parser.set_group_name("Help Options");`
			`parser.add_option("h", "alias of --help");`
			`parser.add_option("help", "display this message and exit");`

			`parser.parse(argc, argv);`
			`const char* one_time_opts[] = {"height", "width", "framerate"};`
			`parser.check_one_time_options(one_time_opts);`

			`if (parser.option("h") \|\| parser.option("help"))`
			`{`
			`parser.print_options();`
			cout << "Please use `v4l2-ctl --list-formats-ext` to view all supported hardware formats\n";
			`return 0;`
			`}`

			`ffmpeg::demuxer cap{[&]`
			`{`
			`ffmpeg::demuxer::args args;`
FFMPEG : misc + ffmpeg5 support (#2746) * - enhanced list_muxers() - added fail() error handling helper function - moved framerate setting to decoder_image_args * docs * oops * - don't use std::endl, use `\n` instead - use fail(). See, on average, it removes lines of code * convenient constructor for demuxer * ffmpeg5 support * added docs for == -1 * oops * grouping audio channel compatibility stuff together * more compatibility stuff * more channel abstractions * build with ffmpeg 5 * install assembler * cache the installation * cmake doesn't like using ~ in filepath * at some point this will work * i think i need to change the key * test FFmpeg-n5.1.3_try3 cache * bug fix * Update build_cpp.yml Giving this another go * Update build_cpp.yml Disable building documentation and CLI tools * Update CMakeLists.txt Fix cmake script when using 3.8.0 and expecting imported targets to work when there are link flags included * - use environment variables - on ubuntu 18 gcc7, use ffmpeg 3.2.18 * correct way of dereferencing variables ? * can't get variables to work * Revert "can't get variables to work" This reverts commit 5eef96a43ef9e04bc7780abfce75e1bb2f0ca25f. * Revert "correct way of dereferencing variables ?" This reverts commit e8ff95f5c6c317bc8f3e6dde4b017a533c6806b3. * Revert "- use environment variables" This reverts commit a6938333d555d64e6cddcb7482ab579827ba6dfb. * using ffmpeg 3.2.18 with ubuntu18 gcc7 * Update build_cpp.yml Disable ubuntu18 job for now. Hopefully no more cancelled jobs, then i can re-enable * Re-enabled ubuntu18 job. Hopefully this time it won't get cancelled * Fixed bad indentation * Can go in details namespace * Update dlib/CMakeLists.txt Co-authored-by: Davis E. King <davis685@gmail.com> * use details namespace * remove declaration. It's in details now * don't need get_channels_from_layout() --------- Co-authored-by: pf <pf@me> Co-authored-by: Davis E. King <davis685@gmail.com> 2023-03-30 10:12:47 +08:00			`args.filepath = "/dev/video0";`
			`args.args_image.h = get_option(parser, "height", 0);`
			`args.args_image.w = get_option(parser, "width", 0);`
			`args.args_image.framerate = get_option(parser, "framerate", 0);`
FFMPEG wrappers: dlib::ffmpeg::decoder and dlib::ffmpeg::demuxer (#2707) * - added ffmpeg stuff to cmake * - added observer_ptr * ffmpeg utils * WIP * - added ffmpeg_decoder * config file for test data * another test file * install ffmpeg * added ffmpeg_demuxer * install all ffmpeg libraries * support older version of ffmpeg * simplified loop * - test converting to dlib object - added docs - support older ffmpeg * added convert() overload * added comment * only register stuff when API not deprecated * - fixed version issues - fixed decoding * added tests for ffmpeg_demuxer * removed unused code * test GIF * added docs * added audio test * test for audio * more tests * review changes * don't need observer_ptr * made deps public. I could be wrong but just in case. * - added some static asserts. Some areas of the code might do memcpy's on arrays of pixels. This requires the structures to be packed. Check this. - added convert() functions - changed default decoder options. By default, always decode to RGB and S16 audio - added convenience constructor to demuxer * - no longer need opencv * oops. I let that slip * - made a few functions public - more precise requires clauses * enhanced example * - avoid FFMPEG_INITIALIZED being optimized away at link time - added decoding example * - avoid -Wunused-parameter error * constexpr and noexcept correctness. This probably makes no difference to performance, BUT, it's what the core guidelines tell you to do. It does however demonstrate how complicated and unecessarily verbose C++ is becoming. Sigh, maybe one day i'll make the switch to something that doesn't make my eyes twitch. * - simplified metadata structure * hopefully more educational * added another example * ditto * typo * screen grab example * whoops * avoid -Wunused-parameter errors * ditto * - added methods to av_dict - print the demuxer format options that were not used - enhanced webcam_face_pose_ex.cpp so you can set webcam options * if height and width are specified, attempt to set video_size in format_options. Otherwise set the bilinear resizer. * updated docs * once again, the ffmpeg APIs do a lot for you. It's a matter of knowing which APIs to call. * made header-only * - some Werror thing * don't use type_safe_union * - templated sample type - reverted deep copy of AVFrame for frame copy constructor * - added is_pixel_type and is_pixel_check * unit tests for pixel traits * enhanced is_image_type type trait and added is_image_check * added unit tests for is_image_type * added pix_traits, improved convert() functions * bug fix * get rid of -Werror=unused-variable error * added a type alias * that's the last of the manual memcpys gone. We'using ffmpeg API everywhere now for copying frames to buffers and back * missing doc * set framerate for webcam * list input devices * oops. I was trying to make ffmpeg 5 happy but i've given up on ffmpeg v5 compatibility in this PR. Future PR. * enhanced the information provided by list_input_devices and list_output_devices * removed vscode settings.json file * - added a type trait for checking whether a type is complete. This is useful for writing type traits that check other types have type trait specializations. But also other useful things. For example, std::unique_ptr uses something similar to this. * Davis was keen to simply check pixel_traits is specialised. That's equivalent to checking pixel_traits<> is complete for some type * code review * juse use the void_t in dlib/type_traits.h * one liners * just need is_image_check * more tests for is_image_type * i think this is correct * removed printf * better docs * Keep opencv out of it * keep old face pose example, then add new one which uses dlib's ffmpeg wrappers * revert * revert * better docs * better docs --------- Co-authored-by: pf <pf@me> 2023-01-30 09:17:34 +08:00			`return args;`
			`}()};`

			`if (!cap.is_open())`
			`{`
			`cerr << "Unable to connect to camera" << endl;`
			`return 1;`
			`}`

			`cout << "height : " << cap.height() << '\n';`
			`cout << "width : " << cap.width() << '\n';`
			`cout << "fps : " << cap.fps() << '\n';`

			`image_window win;`

			`// Load face detection and pose estimation models.`
			`frontal_face_detector detector = get_frontal_face_detector();`
			`shape_predictor pose_model;`
			`deserialize("shape_predictor_68_face_landmarks.dat") >> pose_model;`

			`ffmpeg::frame frame;`
			`array2d<rgb_pixel> img;`

			`// Grab and process frames until the main window is closed by the user.`
			`while(cap.read(frame) && !win.is_closed())`
			`{`
			`// Convert the frame object into a dlib image object`
			`convert(frame, img);`

			`// Detect faces`
			`std::vector<rectangle> faces = detector(img);`
			`// Find the pose of each face.`
			`std::vector<full_object_detection> shapes;`
			`for (unsigned long i = 0; i < faces.size(); ++i)`
			`shapes.push_back(pose_model(img, faces[i]));`

			`// Display it all on the screen`
			`win.clear_overlay();`
			`win.set_image(img);`
			`win.add_overlay(render_face_detections(shapes));`
			`}`
			`}`
			`catch(serialization_error& e)`
			`{`
			`cout << "You need dlib's default face landmarking model file to run this example." << endl;`
			`cout << "You can get it from the following URL: " << endl;`
			`cout << " http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << endl;`
			`cout << endl << e.what() << endl;`
			`}`
			`catch(exception& e)`
			`{`
			`cout << e.what() << endl;`
			`}`
			`}`