From eccb9675ffae443336cff9bb14125d4ac550b98f Mon Sep 17 00:00:00 2001 From: pfeatherstone <45853521+pfeatherstone@users.noreply.github.com> Date: Wed, 24 May 2023 12:46:30 +0100 Subject: [PATCH] [FFmpeg] added save_frame() and improved picking of codecs when not specified (#2800) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * WIP * - pick best codec - added save_frame() - fixed example * Add missing variable declaration * added extensions fixed tests * attempt to set quality as high as possible. lower expected PSNR * arrufat suggestion * refactoring --------- Co-authored-by: Your name Co-authored-by: pf Co-authored-by: AdriĆ  Arrufat <1671644+arrufat@users.noreply.github.com> --- dlib/media/ffmpeg_demuxer.h | 2 +- dlib/media/ffmpeg_details.h | 41 ++++++++ dlib/media/ffmpeg_muxer.h | 137 +++++++++++++++++++------ dlib/test/ffmpeg.cpp | 46 +++++++++ examples/ffmpeg_file_to_speaker_ex.cpp | 4 +- 5 files changed, 195 insertions(+), 35 deletions(-) diff --git a/dlib/media/ffmpeg_demuxer.h b/dlib/media/ffmpeg_demuxer.h index ca317fec4..754ce3dcc 100644 --- a/dlib/media/ffmpeg_demuxer.h +++ b/dlib/media/ffmpeg_demuxer.h @@ -1627,7 +1627,7 @@ namespace dlib inline void load_frame(image_type& image, const std::string& file_name) { if (!demuxer({file_name, video_enabled, audio_disabled}).read(image)) - throw error("ffmpeg::load_frame: error while loading " + file_name); + throw error(EIMAGE_LOAD, "ffmpeg::load_frame: error while loading " + file_name); } // --------------------------------------------------------------------------------------------------- diff --git a/dlib/media/ffmpeg_details.h b/dlib/media/ffmpeg_details.h index 07f56fa0f..801b29e51 100644 --- a/dlib/media/ffmpeg_details.h +++ b/dlib/media/ffmpeg_details.h @@ -480,6 +480,47 @@ namespace dlib { namespace ffmpeg { namespace details } } +// --------------------------------------------------------------------------------------------------- + + inline AVCodecID pick_codec_from_filename(const std::string& filename) + { + const auto ext_pos = filename.find_last_of("."); + + if (ext_pos != std::string::npos) + { + const std::string ext = filename.substr(ext_pos + 1); + + if (ext == "png" || ext == "PNG") + return AV_CODEC_ID_PNG; + else if (ext == "jpeg" || ext == "jpg" || ext == "JPEG") + return AV_CODEC_ID_MJPEG; + else if (ext == "tiff") + return AV_CODEC_ID_TIFF; + else if (ext == "webp") + return AV_CODEC_ID_WEBP; + else if (ext == "bmp") + return AV_CODEC_ID_BMP; + else if (ext == "h264") + return AV_CODEC_ID_H264; + else if (ext == "h265" || ext == "hevc") + return AV_CODEC_ID_H265; + else if (ext == "aac") + return AV_CODEC_ID_AAC; + else if (ext == "ac3") + return AV_CODEC_ID_AC3; + else if (ext == "jls") + return AV_CODEC_ID_JPEGLS; + else if (ext == "jp2") + return AV_CODEC_ID_JPEG2000; +#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 37, 100) + else if (ext == "jxl") + return AV_CODEC_ID_JPEGXL; +#endif + } + + return AV_CODEC_ID_NONE; + } + // --------------------------------------------------------------------------------------------------- }}} diff --git a/dlib/media/ffmpeg_muxer.h b/dlib/media/ffmpeg_muxer.h index 7ce5b6d83..ddf2c2363 100644 --- a/dlib/media/ffmpeg_muxer.h +++ b/dlib/media/ffmpeg_muxer.h @@ -634,6 +634,24 @@ namespace dlib } st; }; +// --------------------------------------------------------------------------------------------------- + + template < + class image_type, + is_image_check = true + > + void save_frame( + const image_type& image, + const std::string& file_name, + const std::unordered_map& codec_options = {} + ); + /*! + requires + - image_type must be a type conforming to the generic image interface. + ensures + - encodes the image into the file pointed by file_name using options described in codec_options. + !*/ + // --------------------------------------------------------------------------------------------------- } @@ -822,10 +840,64 @@ namespace dlib } #endif } - } // --------------------------------------------------------------------------------------------------- + inline bool check_codecs ( + const bool is_video, + const std::string& filename, + const AVOutputFormat* oformat, + encoder::args& args + ) + { + // Check the codec is supported by this muxer + const auto supported_codecs = list_codecs_for_muxer(oformat); + + const auto codec_supported = [&](AVCodecID id, const std::string& name) + { + return std::find_if(begin(supported_codecs), end(supported_codecs), [&](const auto& supported) { + return id != AV_CODEC_ID_NONE ? id == supported.codec_id : name == supported.codec_name; + }) != end(supported_codecs); + }; + + if (codec_supported(args.args_codec.codec, args.args_codec.codec_name)) + return true; + + logger_dlib_wrapper() << LWARN + << "Codec " << avcodec_get_name(args.args_codec.codec) << " or " << args.args_codec.codec_name + << " cannot be stored in this file"; + + // Pick codec based on file extension + args.args_codec.codec = pick_codec_from_filename(filename); + + if (codec_supported(args.args_codec.codec, "")) + { + logger_dlib_wrapper() << LWARN << "Picking codec " << avcodec_get_name(args.args_codec.codec); + return true; + } + + // Pick the default codec as suggested by FFmpeg + args.args_codec.codec = is_video ? oformat->video_codec : oformat->audio_codec; + + if (args.args_codec.codec != AV_CODEC_ID_NONE) + { + logger_dlib_wrapper() << LWARN << "Picking default codec " << avcodec_get_name(args.args_codec.codec); + return true; + } + + logger_dlib_wrapper() << LWARN + << "List of supported codecs for muxer " << oformat->name << " in this installation of ffmpeg:"; + + for (const auto& supported : supported_codecs) + logger_dlib_wrapper() << LWARN << " " << supported.codec_name; + + return false; + } + +// --------------------------------------------------------------------------------------------------- + + } + inline encoder::encoder( const args& a ) : args_(a) @@ -1188,37 +1260,8 @@ namespace dlib if (st.pFormatCtx->oformat->flags & AVFMT_GLOBALHEADER) args.args_codec.flags |= AV_CODEC_FLAG_GLOBAL_HEADER; - // Before we create the encoder, check the codec is supported by this muxer - const auto supported_codecs = list_codecs_for_muxer(st.pFormatCtx->oformat); - - if (std::find_if(begin(supported_codecs), end(supported_codecs), [&](const auto& supported) { - return args.args_codec.codec != AV_CODEC_ID_NONE ? - supported.codec_id == args.args_codec.codec : - supported.codec_name == args.args_codec.codec_name; - }) == end(supported_codecs)) - { - logger_dlib_wrapper() << LWARN - << "Codec " << avcodec_get_name(args.args_codec.codec) << " or " << args.args_codec.codec_name - << " cannot be stored in this file"; - - args.args_codec.codec = is_video ? st.pFormatCtx->oformat->video_codec : - st.pFormatCtx->oformat->audio_codec; - - if (args.args_codec.codec != AV_CODEC_ID_NONE) - { - logger_dlib_wrapper() << LWARN - << "Picking default codec " << avcodec_get_name(args.args_codec.codec); - } - else - { - logger_dlib_wrapper() << LWARN - << "List of supported codecs for muxer " << st.pFormatCtx->oformat->name << " in this installation of ffmpeg:"; - for (const auto& supported : supported_codecs) - logger_dlib_wrapper() << LWARN << " " << supported.codec_name; - - return false; - } - } + if (!check_codecs(is_video, st.args_.filepath, st.pFormatCtx->oformat, args)) + return false; // Codec is supported by muxer, so create encoder enc = encoder(args); @@ -1381,6 +1424,36 @@ namespace dlib inline AVCodecID muxer::get_audio_codec_id() const noexcept { return st.encoder_audio.get_codec_id(); } inline std::string muxer::get_audio_codec_name() const noexcept { return st.encoder_audio.get_codec_name(); } +// --------------------------------------------------------------------------------------------------- + + template < + class image_type, + is_image_check + > + inline void save_frame( + const image_type& image, + const std::string& file_name, + const std::unordered_map& codec_options + ) + { + muxer writer([&] { + muxer::args args; + args.filepath = file_name; + args.enable_image = true; + args.enable_audio = false; + args.args_image.h = num_rows(image); + args.args_image.w = num_columns(image); + args.args_image.framerate = 1; + args.args_image.fmt = pix_traits>::fmt; + args.args_image.codec_options = codec_options; + args.format_options["update"] = "1"; + return args; + }()); + + if (!writer.push(image)) + throw error(EIMAGE_SAVE, "ffmpeg::save_frame: error while saving " + file_name); + } + // --------------------------------------------------------------------------------------------------- } diff --git a/dlib/test/ffmpeg.cpp b/dlib/test/ffmpeg.cpp index aef78b2f6..d3fa27950 100644 --- a/dlib/test/ffmpeg.cpp +++ b/dlib/test/ffmpeg.cpp @@ -146,6 +146,21 @@ namespace DLIB_TEST_MSG(similarity > 25.0, "psnr " << similarity); } + template + void test_load_save_frame(const std::string& filename) + { + matrix img1, img2; + img1 = get_random_image(); + + save_frame(img1, filename, {{"qmin", "1"}, {"qmax", "1"}}); + load_frame(img2, filename); + + DLIB_TEST(img1.nr() == img2.nr()); + DLIB_TEST(img1.nc() == img2.nc()); + const double similarity = psnr(img1, img2); + DLIB_TEST_MSG(similarity > 20.0, "psnr " << similarity); + } + ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// // DECODER @@ -784,6 +799,13 @@ namespace DLIB_TEST(samples <= (nsamples + rate)); } + const auto codec_supported = [](const AVCodecID id) + { + return std::find_if(begin(list_codecs()), end(list_codecs()), [=](const auto& supported) { + return supported.codec_id == id && supported.supports_encoding; + }) != end(list_codecs()); + }; + class video_tester : public tester { public: @@ -802,6 +824,30 @@ namespace test_frame(); test_frame(); test_frame(); + + if (codec_supported(AV_CODEC_ID_PNG)) + { + test_load_save_frame("dummy.png"); + test_load_save_frame("dummy.png"); + } + + if (codec_supported(AV_CODEC_ID_MJPEG)) + { + test_load_save_frame("dummy.jpg"); + test_load_save_frame("dummy.jpg"); + } + + if (codec_supported(AV_CODEC_ID_BMP)) + { + test_load_save_frame("dummy.bmp"); + test_load_save_frame("dummy.bmp"); + } + + if (codec_supported(AV_CODEC_ID_TIFF)) + { + test_load_save_frame("dummy.tiff"); + test_load_save_frame("dummy.tiff"); + } } dlib::file f(DLIB_FFMPEG_DATA); diff --git a/examples/ffmpeg_file_to_speaker_ex.cpp b/examples/ffmpeg_file_to_speaker_ex.cpp index 14705e42e..91a2da375 100644 --- a/examples/ffmpeg_file_to_speaker_ex.cpp +++ b/examples/ffmpeg_file_to_speaker_ex.cpp @@ -40,7 +40,7 @@ try const std::string filename = get_option(parser, "i", ""); const std::string device = get_option(parser, "o", "hw:0,0"); - const std::string codec = get_option(parser, "codec", "pcm_s16le"); + const std::string codec = get_option(parser, "codec", ""); if (device.empty()) { @@ -65,7 +65,7 @@ try args.enable_image = false; args.args_audio.codec_name = codec; args.args_audio.sample_rate = 44100; - args.args_audio.channel_layout = cap.channel_layout(); + args.args_audio.channel_layout = AV_CH_LAYOUT_STEREO; args.args_audio.fmt = cap.sample_fmt(); return args; }());