[FFmpeg] added push(image_type) methods to encoder and muxer (#2797)

* added push() methods * Update dlib/media/ffmpeg_muxer.h * Update dlib/media/ffmpeg_muxer.h --------- Co-authored-by: pf <pf@me> Co-authored-by: Davis E. King <davis685@gmail.com>
2024-11-01 10:14:53 +08:00 · 2023-05-18 13:11:49 +01:00 · 2023-05-18 13:11:49 +01:00 · 5b9ab23cc0
commit 5b9ab23cc0
parent dab9aa1fa5
2 changed files with 163 additions and 2 deletions
--- a/dlib/media/ffmpeg_muxer.h
+++ b/dlib/media/ffmpeg_muxer.h
@ -251,6 +251,33 @@ namespace dlib
                      or an error occurred, in which case is_open() == false.
            !*/

+            template <
+              class image_type,
+              class Callback,
+              is_image_check<image_type> = true
+            >
+            bool push (
+                const image_type& img,
+                Callback&& sink
+            );
+            /*!
+                requires
+                    - is_image_encoder() == true
+                    - sink is set to a valid callback with signature bool(size_t, const char*)
+                      for writing packet data. dlib/media/sink.h contains callback wrappers for
+                      different buffer types.
+                    - sink does not call encoder::push() or encoder::flush(),
+                      i.e., the callback does not create a recursive loop. 
+                ensures
+                    - Encodes img using the constructor arguments, which may incur a resizing
+                      operation if the image dimensions and pixel type don't match the codec. 
+                    - The sink callback may or may not be invoked as the underlying codec 
+                      can buffer if necessary.
+                    - Returns true if successfully encoded, even if sink wasn't invoked.
+                    - Returns false if either EOF, i.e. flush() has been previously called,
+                      or an error occurred, in which case is_open() == false.
+            !*/
+
            template <class Callback>
            void flush (
                Callback&& sink
@ -565,6 +592,23 @@ namespace dlib
                      or an error occurred, in which case is_open() == false.
            !*/

+            template <
+              class image_type,
+              is_image_check<image_type> = true
+            >
+            bool push(const image_type& img);
+            /*!
+                requires
+                    - is_image_encoder() == true
+                ensures
+                    - Encodes img using the constructor arguments, which may incur a resizing
+                      operation if the image dimensions and pixel type don't match the codec. 
+                    - Encodes and writes the encoded data to file/socket
+                    - Returns true if successfully encoded.
+                    - Returns false if either EOF, i.e. flush() has been previously called,
+                      or an error occurred, in which case is_open() == false.
+            !*/
+
            void flush();
            /*!
                ensures
@ -1019,6 +1063,27 @@ namespace dlib
            return state != ENCODE_ERROR;
        }

+        template <
+            class image_type,
+            class Callback,
+            is_image_check<image_type>
+        >
+        inline bool encoder::push (
+            const image_type& img,
+            Callback&& sink
+        )
+        {
+            // Unfortunately, FFmpeg assumes all data is over-aligned, and therefore,
+            // even though the API has facilities to convert img directly to a frame object,
+            // we cannot use it because it assumes the data in img is over-aligned, and of 
+            // course, it is not. Shame. At some point, I'll more digging to see if we 
+            // can get around this without doing a brute force copy like below.
+            using namespace details;
+            frame f;
+            convert(img, f);
+            return push(std::move(f), std::forward<Callback>(sink));
+        }
+
        template <class Callback>
        inline void encoder::flush(Callback&& clb)
        {
@ -1268,6 +1333,20 @@ namespace dlib
            return false;
        }

+        template <
+            class image_type,
+            is_image_check<image_type>
+        >
+        bool muxer::push(const image_type& img)
+        {
+            using namespace std;
+            using namespace details;
+
+            return is_open() &&
+                   st.encoder_image.is_open() &&
+                   st.encoder_image.push(img, muxer_sink(st.pFormatCtx.get(), st.stream_id_video));
+        }
+
        inline void muxer::flush()
        {
            using namespace details;
--- a/dlib/test/ffmpeg.cpp
+++ b/dlib/test/ffmpeg.cpp
@ -584,7 +584,82 @@ namespace
 //////////////////////////////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////////////////////////////

-    void test_muxer (
+    template<class image_type>
+    void test_muxer1 (
+        const std::string& filepath,
+        AVCodecID image_codec
+    )
+    {
+        const std::string tmpfile = "dummy.avi";
+
+        // Load a video/audio as a source of frames
+        demuxer cap({filepath, video_enabled, audio_disabled});
+        DLIB_TEST(cap.is_open());
+        DLIB_TEST(cap.video_enabled());
+        DLIB_TEST(!cap.audio_enabled());
+        const int height = cap.height();
+        const int width  = cap.width();
+
+        // Open muxer
+        muxer writer([&] {
+            muxer::args args;
+            args.filepath = tmpfile;
+            args.enable_audio = false;
+            args.args_image.codec        = image_codec;
+            args.args_image.h            = cap.height();
+            args.args_image.w            = cap.width();
+            args.args_image.framerate    = cap.fps();
+            args.args_image.fmt          = AV_PIX_FMT_YUV420P;
+            return args;
+        }());
+
+        DLIB_TEST(writer.is_open());
+        DLIB_TEST(!writer.audio_enabled());
+        DLIB_TEST(writer.video_enabled());
+
+        DLIB_TEST(writer.get_video_codec_id()   == image_codec);
+        DLIB_TEST(writer.height()               == cap.height());
+        DLIB_TEST(writer.width()                == cap.width());
+
+        // Demux then remux
+        int nimages_demuxed{0};
+        image_type img;
+
+        while (cap.read(img))
+        {
+            ++nimages_demuxed;
+            DLIB_TEST(img.nr() == height);
+            DLIB_TEST(img.nc() == width);
+            DLIB_TEST(writer.push(img));
+
+            if (nimages_demuxed % 10 == 0)
+                print_spinner();
+        }
+
+        writer.flush();
+
+        // Demux everything back
+        demuxer cap2(tmpfile);
+        DLIB_TEST(cap2.is_open());
+        DLIB_TEST(cap2.video_enabled());
+        DLIB_TEST(!cap2.audio_enabled());
+        DLIB_TEST(cap2.get_video_codec_id() == image_codec);
+        DLIB_TEST(cap2.height() == height);
+        DLIB_TEST(cap2.width()  == width);
+
+        int nimages_muxed{0};
+
+        while (cap2.read(img))
+        {
+            ++nimages_muxed;
+            if (nimages_muxed % 10 == 0)
+                print_spinner();
+        }
+
+        DLIB_TEST(nimages_muxed == nimages_demuxed);
+    }
+
+    void test_muxer2 (
        const std::string& filepath,
        AVCodecID image_codec,
        AVCodecID audio_codec
@ -802,7 +877,14 @@ namespace

                    test_demuxer_full(filepath, nframes, height, width, sample_rate, has_video, has_audio);
                    test_encoder(filepath, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3);
-                    test_muxer(filepath, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3);
+                    
+                    if (has_video)
+                    {
+                        test_muxer1<array2d<rgb_pixel>>(filepath, AV_CODEC_ID_MPEG4);
+                        test_muxer1<matrix<bgr_pixel>>(filepath, AV_CODEC_ID_MPEG4);
+                    }
+                    
+                    test_muxer2(filepath, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3);
                }
            }
        }