// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt /* This is an example illustrating the use of the ffmpeg wrappers, in this case the demuxer API. This is a pretty simple example. It loads a video file, and plots the image frames on a GUI window. */ #include #include #include #include #include using namespace std; using namespace dlib; using namespace std::chrono; int main(const int argc, const char** argv) try { command_line_parser parser; parser.add_option("i", "input video", 1); parser.add_option("verbose", "enable all internal ffmpeg logging"); parser.set_group_name("Help Options"); parser.add_option("h", "alias of --help"); parser.add_option("help", "display this message and exit"); parser.parse(argc, argv); const char* one_time_opts[] = {"i"}; parser.check_one_time_options(one_time_opts); if (parser.option("h") || parser.option("help")) { parser.print_options(); return 0; } if (parser.option("verbose")) { ffmpeg::logger_dlib_wrapper().set_level(LALL); ffmpeg::logger_ffmpeg().set_level(LALL); } const std::string filepath = get_option(parser, "i", ""); image_window win; /* For simplicity we use the constructor which takes a filepath only. All other parameters are defaulted or guessed. Equivalently, we could have done: ffmpeg::demuxer::args args; args.filepath = filepath; ffmpeg::demuxer cap(args); Furthermore, we can set additional settings in args, for example: // This disables extracting and decoding images. // You may want to do this if you're only interesting in extracting audio args.enable_image = false; // This disables extracting and decoding audio. // You may want to do this if you don't care about audio in your video. // This saves processing time and you don't have to deal with audio frame objects in your code. args.enable_audio = false; // This will resize frames before presenting them to the user. // I.e. frames "returned" by demuxer::read() will have this height. // By default, the demuxer object does not resize frames. args.image_options.h = SOME_HEIGHT; // Same as above but for width. args.image_options.w = SOME_WIDTH; // By default, demuxer reformats frames from the default format in the encoded stream to RGB. // You can set this to AV_PIX_FMT_NONE and demuxer will leave frames in their default format. // This is likely to be AV_PIX_FMT_YUV420P. // However, you can set it to anything that FFMPEG supports, and frames will be presented // in that format. args.image_options.fmt = SOME_OTHER_PIXEL_FORMAT; // Same as above, by default, demuxer leaves audio frames in their default sample rate. // But user can change this, and audio will be resampled to that rate. // Note, reducing the sample rate reduces the quality of the audio. // You can artificially upsample audio, but it won't make the quality any better. args.audio_options.sample_rate = SOME_SAMPLE_RATE; // You may want to do this if you want more or less channels. // Note, dlib only has one audio object "audio_frame", which is stereo and uses int16_t sample format. // So if you're going to use other layouts and sample formats, you won't be able to use audio_frame. // You will have to use ffmpeg::frame directly. Use with care and please visit ffmpeg's documentation. args.audio_options.channel_layout = SOME_OTHER_LAYOUT; // e.g. AV_CH_LAYOUT_MONO, AV_CH_LAYOUT_STEREO. See libavutil/channel_layout.h // This changes the default sample format. args.audio_options.fmt = SOME_OTHER_SAMPLE_FORMAT; */ ffmpeg::demuxer cap(filepath); if (!cap.is_open()) { printf("%s is not a valid video file\n", filepath.c_str()); return EXIT_FAILURE; } printf("Video properties:\n\n"); printf("Estimated duration : %f\n", cap.duration()); printf("Video contains images : %i\n", cap.video_enabled()); if (cap.video_enabled()) { printf(" height : %i\n", cap.height()); printf(" width : %i\n", cap.width()); printf(" pixel format : %s\n", ffmpeg::get_pixel_fmt_str(cap.pixel_fmt()).c_str()); printf(" fps : %f\n", cap.fps()); printf(" nframes : %d\n", cap.estimated_nframes()); printf(" codec : %s\n", cap.get_video_codec_name().c_str()); } printf("Video contains audio : %i\n", cap.audio_enabled()); if (cap.audio_enabled()) { printf(" sample rate : %i\n", cap.sample_rate()); printf(" channel layout : %s\n", ffmpeg::get_channel_layout_str(cap.channel_layout()).c_str()); printf(" sample format : %s\n", ffmpeg::get_audio_fmt_str(cap.sample_fmt()).c_str()); printf(" nchannels : %i\n", cap.nchannels()); printf(" estimated samples : %i\n", cap.estimated_total_samples()); printf(" codec : %s\n", cap.get_audio_codec_name().c_str()); } printf("\n\n"); printf("Video metadata:\n"); for (auto&& metadata : cap.get_metadata()) printf(" key : %-32s ; val : %-32s\n", metadata.first.c_str(), metadata.second.c_str()); ffmpeg::frame frame; array2d img; size_t audio_samples{0}; const auto start = high_resolution_clock::now(); while (cap.read(frame)) { convert(frame, img); if (frame.is_image() && frame.pixfmt() == AV_PIX_FMT_RGB24) { convert(frame, img); win.set_image(img); } if (frame.is_audio()) { audio_samples += frame.nsamples(); printf("\r\tDecoding %zu samples", audio_samples); fflush(stdout); } } const auto stop = high_resolution_clock::now(); printf("Ran in %f s\n", duration_cast(stop - start).count() * 1e-6); printf("\n"); return EXIT_SUCCESS; } catch (const std::exception& e) { printf("%s\n", e.what()); return EXIT_FAILURE; }