From 5b9ab23cc04bbb07b64f53ba82c3219a79ac3b80 Mon Sep 17 00:00:00 2001 From: pfeatherstone <45853521+pfeatherstone@users.noreply.github.com> Date: Thu, 18 May 2023 13:11:49 +0100 Subject: [PATCH] [FFmpeg] added push(image_type) methods to encoder and muxer (#2797) * added push() methods * Update dlib/media/ffmpeg_muxer.h * Update dlib/media/ffmpeg_muxer.h --------- Co-authored-by: pf Co-authored-by: Davis E. King --- dlib/media/ffmpeg_muxer.h | 79 +++++++++++++++++++++++++++++++++++ dlib/test/ffmpeg.cpp | 86 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 163 insertions(+), 2 deletions(-) diff --git a/dlib/media/ffmpeg_muxer.h b/dlib/media/ffmpeg_muxer.h index 2a75076f1..8d5af3e17 100644 --- a/dlib/media/ffmpeg_muxer.h +++ b/dlib/media/ffmpeg_muxer.h @@ -251,6 +251,33 @@ namespace dlib or an error occurred, in which case is_open() == false. !*/ + template < + class image_type, + class Callback, + is_image_check = true + > + bool push ( + const image_type& img, + Callback&& sink + ); + /*! + requires + - is_image_encoder() == true + - sink is set to a valid callback with signature bool(size_t, const char*) + for writing packet data. dlib/media/sink.h contains callback wrappers for + different buffer types. + - sink does not call encoder::push() or encoder::flush(), + i.e., the callback does not create a recursive loop. + ensures + - Encodes img using the constructor arguments, which may incur a resizing + operation if the image dimensions and pixel type don't match the codec. + - The sink callback may or may not be invoked as the underlying codec + can buffer if necessary. + - Returns true if successfully encoded, even if sink wasn't invoked. + - Returns false if either EOF, i.e. flush() has been previously called, + or an error occurred, in which case is_open() == false. + !*/ + template void flush ( Callback&& sink @@ -565,6 +592,23 @@ namespace dlib or an error occurred, in which case is_open() == false. !*/ + template < + class image_type, + is_image_check = true + > + bool push(const image_type& img); + /*! + requires + - is_image_encoder() == true + ensures + - Encodes img using the constructor arguments, which may incur a resizing + operation if the image dimensions and pixel type don't match the codec. + - Encodes and writes the encoded data to file/socket + - Returns true if successfully encoded. + - Returns false if either EOF, i.e. flush() has been previously called, + or an error occurred, in which case is_open() == false. + !*/ + void flush(); /*! ensures @@ -1019,6 +1063,27 @@ namespace dlib return state != ENCODE_ERROR; } + template < + class image_type, + class Callback, + is_image_check + > + inline bool encoder::push ( + const image_type& img, + Callback&& sink + ) + { + // Unfortunately, FFmpeg assumes all data is over-aligned, and therefore, + // even though the API has facilities to convert img directly to a frame object, + // we cannot use it because it assumes the data in img is over-aligned, and of + // course, it is not. Shame. At some point, I'll more digging to see if we + // can get around this without doing a brute force copy like below. + using namespace details; + frame f; + convert(img, f); + return push(std::move(f), std::forward(sink)); + } + template inline void encoder::flush(Callback&& clb) { @@ -1268,6 +1333,20 @@ namespace dlib return false; } + template < + class image_type, + is_image_check + > + bool muxer::push(const image_type& img) + { + using namespace std; + using namespace details; + + return is_open() && + st.encoder_image.is_open() && + st.encoder_image.push(img, muxer_sink(st.pFormatCtx.get(), st.stream_id_video)); + } + inline void muxer::flush() { using namespace details; diff --git a/dlib/test/ffmpeg.cpp b/dlib/test/ffmpeg.cpp index 1ba82ef03..aef78b2f6 100644 --- a/dlib/test/ffmpeg.cpp +++ b/dlib/test/ffmpeg.cpp @@ -584,7 +584,82 @@ namespace ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////// - void test_muxer ( + template + void test_muxer1 ( + const std::string& filepath, + AVCodecID image_codec + ) + { + const std::string tmpfile = "dummy.avi"; + + // Load a video/audio as a source of frames + demuxer cap({filepath, video_enabled, audio_disabled}); + DLIB_TEST(cap.is_open()); + DLIB_TEST(cap.video_enabled()); + DLIB_TEST(!cap.audio_enabled()); + const int height = cap.height(); + const int width = cap.width(); + + // Open muxer + muxer writer([&] { + muxer::args args; + args.filepath = tmpfile; + args.enable_audio = false; + args.args_image.codec = image_codec; + args.args_image.h = cap.height(); + args.args_image.w = cap.width(); + args.args_image.framerate = cap.fps(); + args.args_image.fmt = AV_PIX_FMT_YUV420P; + return args; + }()); + + DLIB_TEST(writer.is_open()); + DLIB_TEST(!writer.audio_enabled()); + DLIB_TEST(writer.video_enabled()); + + DLIB_TEST(writer.get_video_codec_id() == image_codec); + DLIB_TEST(writer.height() == cap.height()); + DLIB_TEST(writer.width() == cap.width()); + + // Demux then remux + int nimages_demuxed{0}; + image_type img; + + while (cap.read(img)) + { + ++nimages_demuxed; + DLIB_TEST(img.nr() == height); + DLIB_TEST(img.nc() == width); + DLIB_TEST(writer.push(img)); + + if (nimages_demuxed % 10 == 0) + print_spinner(); + } + + writer.flush(); + + // Demux everything back + demuxer cap2(tmpfile); + DLIB_TEST(cap2.is_open()); + DLIB_TEST(cap2.video_enabled()); + DLIB_TEST(!cap2.audio_enabled()); + DLIB_TEST(cap2.get_video_codec_id() == image_codec); + DLIB_TEST(cap2.height() == height); + DLIB_TEST(cap2.width() == width); + + int nimages_muxed{0}; + + while (cap2.read(img)) + { + ++nimages_muxed; + if (nimages_muxed % 10 == 0) + print_spinner(); + } + + DLIB_TEST(nimages_muxed == nimages_demuxed); + } + + void test_muxer2 ( const std::string& filepath, AVCodecID image_codec, AVCodecID audio_codec @@ -802,7 +877,14 @@ namespace test_demuxer_full(filepath, nframes, height, width, sample_rate, has_video, has_audio); test_encoder(filepath, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3); - test_muxer(filepath, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3); + + if (has_video) + { + test_muxer1>(filepath, AV_CODEC_ID_MPEG4); + test_muxer1>(filepath, AV_CODEC_ID_MPEG4); + } + + test_muxer2(filepath, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3); } } }