FFmpeg : encoding (#2754)

* docs * callbacks for encoder * shorter video * shorter video * added is_byte type trait * leave muxer for next PR * added overloads for set_layout() and get_layout() in details namespace * unit test * example * build * overloads for ffmpeg < 5 * Update examples/ffmpeg_video_encoding_ex.cpp Co-authored-by: Adrià Arrufat <1671644+arrufat@users.noreply.github.com> * Update dlib/media/ffmpeg_abstract.h Co-authored-by: Davis E. King <davis685@gmail.com> * Update dlib/media/ffmpeg_abstract.h Co-authored-by: Davis E. King <davis685@gmail.com> * Update dlib/media/ffmpeg_abstract.h Co-authored-by: Davis E. King <davis685@gmail.com> * Update dlib/media/ffmpeg_abstract.h Co-authored-by: Davis E. King <davis685@gmail.com> * Update dlib/media/ffmpeg_abstract.h Co-authored-by: Davis E. King <davis685@gmail.com> * as per suggestion * remove requires clause * Update examples/ffmpeg_video_encoding_ex.cpp Co-authored-by: Davis E. King <davis685@gmail.com> * Update dlib/media/ffmpeg_abstract.h Co-authored-by: Davis E. King <davis685@gmail.com> * Update dlib/media/ffmpeg_abstract.h Co-authored-by: Davis E. King <davis685@gmail.com> * Update dlib/media/ffmpeg_abstract.h Co-authored-by: Davis E. King <davis685@gmail.com> * Update dlib/media/ffmpeg_muxer.h Co-authored-by: Davis E. King <davis685@gmail.com> * use dlib::logger * oops * Update dlib/media/ffmpeg_muxer.h Co-authored-by: Davis E. King <davis685@gmail.com> * Update dlib/media/ffmpeg_demuxer.h * Update dlib/media/ffmpeg_demuxer.h * Update dlib/media/ffmpeg_abstract.h --------- Co-authored-by: pf <pf@me> Co-authored-by: Davis E. King <davis685@gmail.com> Co-authored-by: Adrià Arrufat <1671644+arrufat@users.noreply.github.com>
2024-11-01 10:14:53 +08:00 · 2023-04-01 14:48:26 +01:00 · 2023-04-01 14:48:26 +01:00 · 937e07e90d
commit 937e07e90d
parent f586d0a552
13 changed files with 1313 additions and 78 deletions
--- a/dlib/media.h
+++ b/dlib/media.h
@ -11,5 +11,7 @@ static_assert(false, "This version of dlib isn't built with the FFMPEG wrappers"

 #include "media/ffmpeg_utils.h"
 #include "media/ffmpeg_demuxer.h"
+#include "media/ffmpeg_muxer.h"
+#include "media/sink.h"

 #endif // DLIB_MEDIA 
--- a/dlib/media/ffmpeg_abstract.h
+++ b/dlib/media/ffmpeg_abstract.h
@ -1018,7 +1018,323 @@ namespace dlib
            !*/
        };

-        // ---------------------------------------------------------------------------------------------------
+// ---------------------------------------------------------------------------------------------------
+
+        template <
+          class Byte, 
+          class Allocator,
+          std::enable_if_t<is_byte<Byte>::value, bool> = true
+        >
+        auto sink(std::vector<Byte, Allocator>& buf);
+        /*!
+            requires
+                - Byte must be a byte type, e.g. char, int8_t or uint8_t
+            ensures
+                - returns a function object with signature bool(std::size_t N, const char* data).  When
+                  called that function appends the first N bytes pointed to by data onto the end of buf.
+                - The returned function is valid only as long as buf exists.
+                - The function always returns true.        
+        !*/
+
+// ---------------------------------------------------------------------------------------------------
+
+        auto sink(std::ostream& out);
+        /*!
+            ensures
+                - returns a function object with signature bool(std::size_t N, const char* data).  When
+                  called that function writes the first N bytes pointed to by data to out.
+                - The returned view is valid only as long as out exists.
+                - Returns out.good(). I.e. returns true if the write to the stream succeeded and false otherwise.       
+        !*/
+
+// ---------------------------------------------------------------------------------------------------
+
+        struct encoder_image_args
+        {
+            /*!
+                WHAT THIS OBJECT REPRESENTS
+                    This class groups a set of arguments passed to the encoder and muxer classes.
+                    These must be set to non-zero or non-trivial values as they are used to configure 
+                    the underlying codec and optionally, an internal image scaler.
+                    Any frame that is pushed to encoder or muxer instances is resized to the codec's 
+                    pre-configured settings if their dimensions or pixel format don't match.
+                    For example, if the codec is configured to use height 512, width 384 and RGB format,
+                    using the variables below, and the frames already have these settings when pushed, 
+                    then no resizing is performed. If however they don't, then they are first resized. 
+            !*/
+
+            // Target height of codec.
+            int h{0};
+
+            // Target width of codec.
+            int w{0};
+            
+            // Target pixel format of codec.
+            AVPixelFormat fmt{AV_PIX_FMT_YUV420P};
+
+            // Target framerate of codec/muxer
+            int framerate{0};
+        };
+
+// ---------------------------------------------------------------------------------------------------
+
+        struct encoder_audio_args
+        {
+            /*!
+                WHAT THIS OBJECT REPRESENTS
+                    This class groups a set of arguments passed to the encoder and muxer classes.
+                    These must be set to non-zero or non-trivial values as they are used to configure 
+                    the underlying codec and optionally, an internal audio resampler.
+                    Any frame that is pushed to encoder or muxer instances is resampled to the codec's
+                    pre-configured settings if their sample format, sample rate or channel layout, don't match.
+            !*/
+
+            // Target sample rate of codec
+            int sample_rate{0};
+
+            // Target channel layout of codec
+            uint64_t channel_layout{AV_CH_LAYOUT_STEREO};
+
+            // Target sample format of codec
+            AVSampleFormat fmt{AV_SAMPLE_FMT_S16};
+        };
+
+// ---------------------------------------------------------------------------------------------------
+
+        struct encoder_codec_args
+        {
+            /*!
+                WHAT THIS OBJECT REPRESENTS
+                    This class groups a set of arguments passed to the encoder and muxer classes.
+                    Some of these must be set to non-zero or non-trivial values as they are used 
+                    to configure the underlying codec. Others will only be used if non-zero or
+                    non-trivial.
+            !*/
+
+            // Codec ID used to configure the encoder. Either codec or codec_name MUST be set.
+            AVCodecID codec{AV_CODEC_ID_NONE};
+
+            // Codec name used to configure the encoder. This is used if codec == AV_CODEC_ID_NONE.
+            std::string codec_name;
+
+            // A dictionary of AVCodecContext and codec-private options. Used by "avcodec_open2()"
+            std::unordered_map<std::string, std::string> codec_options;
+
+            // Sets AVCodecContext::bit_rate if non-negative.
+            int64_t bitrate{-1};
+
+            // Sets AVCodecContext::gop_size if non-negative.
+            int gop_size{-1};
+
+            // OR-ed with AVCodecContext::flags if non-negative.
+            int flags{0};
+        };
+
+// ---------------------------------------------------------------------------------------------------
+
+        class encoder
+        {
+        public:
+            /*!
+                WHAT THIS OBJECT REPRESENTS
+                    This class is a libavcodec wrapper which encodes video or audio to raw memory.
+                    Note, if you are creating a media file, it is easier to use the muxer object
+                    as it also works with raw codec files like .h264 files.
+                    This class is suitable for example if you need to send raw packets over a socket
+                    or interface with another library that requires encoded data, not raw images
+                    or raw audio samples.
+            !*/
+
+            struct args
+            {
+                /*!
+                    WHAT THIS OBJECT REPRESENTS
+                        This holds constructor arguments for encoder.
+                !*/
+                encoder_codec_args args_codec;
+                encoder_image_args args_image;
+                encoder_audio_args args_audio;
+            };
+
+            encoder() = default;
+            /*!
+                ensures
+                    - is_open() == false
+            !*/
+
+            encoder(
+                const args& a,
+                std::function<bool(std::size_t, const char*)> sink
+            );
+            /*!
+                requires
+                    - a.args_codec.codec or a.args_codec.codec_name are set
+                    - Either a.args_image or a.args_audio is fully set
+                    - sink is set to a valid callback for writing packet data.
+                      dlib/media/sink.h contains callback wrappers for
+                      different buffer types.
+
+                ensures
+                    - Constructs encoder from args and sink
+                    - is_open() == true
+            !*/
+
+            encoder(encoder&& other) = default;
+            /*!
+                ensures
+                    - Move constructor
+                    - other is in an empty but otherwise valid state after move
+                    - other.is_open() == false after move
+            !*/
+
+            encoder& operator=(encoder&& other) = default;
+            /*!
+                ensures
+                    - Move assignment operator
+                    - other is in an empty but otherwise valid state after move
+                    - other.is_open() == false after move
+            !*/
+            
+            ~encoder();
+            /*!
+                ensures
+                    - Destructor
+                    - flush() is called if it hasn't been already
+            !*/
+
+            bool is_open() const noexcept;
+            /*!
+                ensures
+                    - Returns true if the codec is open and user may call push()
+            !*/
+
+            bool is_image_encoder() const noexcept;
+            /*!
+                ensures
+                    - Returns true if the codec is an image encoder.
+            !*/
+
+            bool is_audio_encoder() const noexcept;
+            /*!
+                ensures
+                    - Returns true if the codec is an audio encoder.
+            !*/
+
+            AVCodecID get_codec_id() const noexcept;
+            /*!
+                requires
+                    - is_open() == true
+                ensures
+                    - returns the codec id. See ffmpeg documentation or libavcodec/codec_id.h
+            !*/
+
+            std::string get_codec_name() const noexcept;
+            /*!
+                requires
+                    - is_open() == true
+                ensures
+                    - returns string representation of codec id.
+            !*/
+
+            int height() const noexcept;
+            /*!
+                requires
+                    - is_image_encoder() == true
+                ensures
+                    - returns the height of the configured codec, not necessarily the
+                      height of frames passed to push(frame)
+            !*/
+
+            int width() const noexcept;
+            /*!
+                requires
+                    - is_image_encoder() == true
+                ensures
+                    - returns the width of the configured codec, not necessarily the
+                      width of frames passed to push(frame)
+            !*/
+
+            AVPixelFormat pixel_fmt() const noexcept;
+            /*!
+                requires
+                    - is_image_encoder() == true
+                ensures
+                    - returns the pixel format of the configured codec, not necessarily the
+                      pixel format of frames passed to push(frame)
+            !*/
+
+            int fps() const noexcept;
+             /*!
+                requires
+                    - is_image_encoder() == true
+                ensures
+                    - returns the configured framerate of the codec.
+            !*/
+
+            int sample_rate() const noexcept;
+            /*!
+                requires
+                    - is_audio_encoder() == true
+                ensures
+                    - returns the sample rate of the configured codec, not necessarily the
+                      sample rate of frames passed to push(frame)
+            !*/
+
+            uint64_t channel_layout() const noexcept;
+            /*!
+                requires
+                    - is_audio_encoder() == true
+                ensures
+                    - returns the channel layout of the configured codec, not necessarily the
+                      channel layout of frames passed to push(frame).
+                      e.g. AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_MONO etc.
+            !*/
+
+            AVSampleFormat sample_fmt() const noexcept;
+            /*!
+                requires
+                    - is_audio_encoder() == true
+                ensures
+                    - returns the sample format of the configured codec, not necessarily the
+                      sample format of frames passed to push(frame)
+            !*/
+
+            int nchannels() const noexcept;
+            /*!
+                requires
+                    - is_audio_encoder() == true
+                ensures
+                    - returns the number of audio channels in the configured codec.
+            !*/
+
+            bool push(frame f);
+            /*!
+                requires
+                    - is_open() == true
+                    - if is_image_encoder() == true, then f.is_image() == true
+                    - if is_audio_encoder() == true, then f.is_audio() == true
+                    - flush() has not been called
+                ensures
+                    - If f does not have matching settings to the codec, it is either
+                      resized or resampled before being pushed to the codec and encoded.
+                    - The callback passed to the constructor may or may not be invoked
+                      as the underlying resampler, audio fifo and codec may buffer.
+                    - Returns true if successfully encoded, even if callback wasn't invoked.
+                    - Returns false if either EOF, i.e. flush() has been previously called,
+                      or an error occured, in which case is_open() == false.
+            !*/
+
+            void flush();
+            /*!
+                ensures
+                    - Flushes the codec. Callback passed to constructor will likely be invoked.
+                    - is_open() == false
+                    - Becomes a no-op after the first time you call this.
+            !*/
+        };
+
+// ---------------------------------------------------------------------------------------------------
+
    }
 }

--- a/dlib/media/ffmpeg_demuxer.h
+++ b/dlib/media/ffmpeg_demuxer.h
@ -73,9 +73,10 @@ namespace dlib
                decoder_extractor() = default;

                decoder_extractor(
-                    const args& a,
-                    av_ptr<AVCodecContext> pCodecCtx_,
-                    const AVCodec* codec
+                    const args&             a,
+                    av_ptr<AVCodecContext>  pCodecCtx_,
+                    const AVCodec*          codec,
+                    std::shared_ptr<logger> log_
                );

                bool            is_open()           const noexcept;
@ -107,6 +108,7 @@ namespace dlib
                resizer                 resizer_image;
                resampler               resizer_audio;
                std::queue<frame>       frame_queue;
+                std::shared_ptr<logger> log;
            };
        }

@ -152,6 +154,7 @@ namespace dlib
            details::av_ptr<AVCodecParserContext>   parser;
            details::av_ptr<AVPacket>               packet;
            details::decoder_extractor              extractor;
+            std::shared_ptr<logger>                 log;
        };

 // ---------------------------------------------------------------------------------------------------
@ -233,6 +236,7 @@ namespace dlib
                int                                     stream_id_video{-1};
                int                                     stream_id_audio{-1};
                std::queue<frame>                       frame_queue;
+                std::shared_ptr<logger>                 log;
            } st;
        };

@ -256,10 +260,11 @@ namespace dlib
        namespace details
        {
            inline decoder_extractor::decoder_extractor(
-                const args& a,
-                av_ptr<AVCodecContext> pCodecCtx_,
-                const AVCodec* codec
-            )
+                const args&             a,
+                av_ptr<AVCodecContext>  pCodecCtx_,
+                const AVCodec*          codec,
+                std::shared_ptr<logger> log_
+            ) : log(log_)
            {
                args_   = a;
                avframe = make_avframe();
@ -274,7 +279,7 @@ namespace dlib

                if (ret < 0)
                {
-                    printf("avcodec_open2() failed : `%s`\n", get_av_error(ret).c_str());
+                    (*log) << LERROR << "avcodec_open2() failed : " << get_av_error(ret).c_str();
                    return;
                }
                
@ -353,7 +358,7 @@ namespace dlib
                    } else {
                        pCodecCtx = nullptr;
                        state   = EXTRACT_ERROR;
-                        printf("avcodec_send_packet() failed : `%s`\n", get_av_error(ret).c_str());
+                        (*log) << LERROR << "avcodec_send_packet() failed : " << get_av_error(ret);
                    }
                };

@ -373,7 +378,7 @@ namespace dlib
                    {
                        pCodecCtx = nullptr;
                        state   = EXTRACT_ERROR;
-                        printf("avcodec_receive_frame() failed : %i - `%s`\n", ret, get_av_error(ret).c_str());
+                        (*log) << LERROR << "avcodec_receive_frame() failed : " << get_av_error(ret);
                    }
                    else
                    {
@ -449,6 +454,7 @@ namespace dlib
 // ---------------------------------------------------------------------------------------------------

        inline decoder::decoder(const args &a)
+        : log(std::make_shared<logger>("ffmpeg::decoder"))
        {
            using namespace details;

@ -465,7 +471,12 @@ namespace dlib

            if (!pCodec)
            {
-                printf("Codec `%s` / `%s` not found\n", avcodec_get_name(a.args_codec.codec), a.args_codec.codec_name.c_str());
+                (*log) << LERROR 
+                    << "Codec "
+                    << avcodec_get_name(a.args_codec.codec)
+                    << " / "
+                    << a.args_codec.codec_name
+                    << " not found.";
                return;
            }

@ -473,14 +484,14 @@ namespace dlib

            if (!pCodecCtx)
            {
-                printf("avcodec_alloc_context3() failed to allocate codec context for `%s`\n", pCodec->name);
+                (*log) << LERROR << "avcodec_alloc_context3() failed to allocate codec context for " << pCodec->name;
                return;
            }

            if (pCodecCtx->codec_id == AV_CODEC_ID_AAC)
                pCodecCtx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;

-            extractor = decoder_extractor{{a.args_codec, a.args_image, a.args_audio, pCodecCtx->time_base}, std::move(pCodecCtx), pCodec};
+            extractor = decoder_extractor{{a.args_codec, a.args_image, a.args_audio, pCodecCtx->time_base}, std::move(pCodecCtx), pCodec, log};
            if (!extractor.is_open())
                return;

@ -493,7 +504,7 @@ namespace dlib
                parser.reset(av_parser_init(pCodec->id));
                if (!parser)
                {
-                    printf("av_parser_init() failed codec `%s` not found\n", pCodec->name);
+                    (*log) << LERROR << "av_parser_init() failed codec " << pCodec->name << " not found";
                    return;
                }
            }
@ -539,7 +550,7 @@ namespace dlib
                    );

                    if (ret < 0)
-                        return fail(cerr, "AV : error while parsing encoded buffer");
+                        return fail(*log, "AV : error while parsing encoded buffer");

                    encoded  += ret;
                    nencoded -= ret;
@ -640,6 +651,8 @@ namespace dlib

        inline demuxer::demuxer(const args &a)
        {
+            st.log = std::make_shared<logger>("ffmpeg::demuxer");
+
            if (!open(a))
                st.pFormatCtx = nullptr;
        }
@ -718,7 +731,7 @@ namespace dlib
                                        opts.get());

            if (ret != 0)
-                return fail(cerr, "avformat_open_input() failed with error : ", get_av_error(ret));
+                return fail(*st.log, "avformat_open_input() failed with error : ", get_av_error(ret));

            if (opts.size() > 0)
            {
@ -732,7 +745,7 @@ namespace dlib
            ret = avformat_find_stream_info(st.pFormatCtx.get(), NULL);

            if (ret < 0)
-                return fail(cerr, "avformat_find_stream_info() failed with error : ", get_av_error(ret));
+                return fail(*st.log, "avformat_find_stream_info() failed with error : ", get_av_error(ret));

            const auto setup_stream = [&](bool is_video)
            {
@ -745,26 +758,26 @@ namespace dlib
                    return true; //You might be asking for both video and audio but only video is available. That's OK. Just provide video.

                else if (stream_id == AVERROR_DECODER_NOT_FOUND)
-                    return fail(cerr, "av_find_best_stream() : decoder not found for stream type : ", av_get_media_type_string(media_type));
+                    return fail(*st.log, "av_find_best_stream() : decoder not found for stream type : ", av_get_media_type_string(media_type));

                else if (stream_id < 0)
-                    return fail(cerr, "av_find_best_stream() failed : ", get_av_error(stream_id));
+                    return fail(*st.log, "av_find_best_stream() failed : ", get_av_error(stream_id));

                av_ptr<AVCodecContext> pCodecCtx{avcodec_alloc_context3(pCodec)};

                if (!pCodecCtx)
-                    return fail(cerr, "avcodec_alloc_context3() failed to allocate codec context for ", pCodec->name);
+                    return fail(*st.log, "avcodec_alloc_context3() failed to allocate codec context for ", pCodec->name);

                const int ret = avcodec_parameters_to_context(pCodecCtx.get(), st.pFormatCtx->streams[stream_id]->codecpar);
                if (ret < 0)
-                    return fail(cerr, "avcodec_parameters_to_context() failed : ", get_av_error(ret));
+                    return fail(*st.log, "avcodec_parameters_to_context() failed : ", get_av_error(ret));

                if (pCodecCtx->codec_type == AVMEDIA_TYPE_VIDEO)
                {
                    if (pCodecCtx->height   == 0 ||
                        pCodecCtx->width    == 0 ||
                        pCodecCtx->pix_fmt  == AV_PIX_FMT_NONE)
-                        return fail(cerr, "Codec parameters look wrong : (h,w,pixel_fmt) : (",
+                        return fail(*st.log, "Codec parameters look wrong : (h,w,pixel_fmt) : (",
                                pCodecCtx->height, ",",
                                pCodecCtx->width,  ",",
                                get_pixel_fmt_str(pCodecCtx->pix_fmt), ")");
@ -774,13 +787,13 @@ namespace dlib
                    if (pCodecCtx->sample_rate == 0 ||
                        pCodecCtx->sample_fmt  == AV_SAMPLE_FMT_NONE ||
                        details::channel_layout_empty(pCodecCtx.get()))
-                        return fail(cerr,"Codec parameters look wrong :",
+                        return fail(*st.log,"Codec parameters look wrong :",
                            " sample_rate : ", pCodecCtx->sample_rate,
                            " sample format : ", get_audio_fmt_str(pCodecCtx->sample_fmt),
                            " channel layout : ", details::get_channel_layout_str(pCodecCtx.get()));
                }
                else
-                    return fail(cerr,"Unrecognized media type ", pCodecCtx->codec_type);
+                    return fail(*st.log,"Unrecognized media type ", pCodecCtx->codec_type);

                if (is_video)
                {
@ -790,7 +803,7 @@ namespace dlib
                        args.args_image = st.args_.args_image;
                        args.time_base  = st.pFormatCtx->streams[stream_id]->time_base;
                        return args;
-                    }(), std::move(pCodecCtx), pCodec};
+                    }(), std::move(pCodecCtx), pCodec, st.log};

                    st.stream_id_video = stream_id;
                }
@ -802,7 +815,7 @@ namespace dlib
                        args.args_audio = st.args_.args_audio;
                        args.time_base  = st.pFormatCtx->streams[stream_id]->time_base;
                        return args;
-                    }(), std::move(pCodecCtx), pCodec};
+                    }(), std::move(pCodecCtx), pCodec, st.log};

                    st.stream_id_audio = stream_id;
                }
@ -817,7 +830,7 @@ namespace dlib
                return false;

            if (!st.channel_audio.is_open() && !st.channel_video.is_open())
-                return fail(cerr, "At least one of video and audio channels must be enabled");
+                return fail(*st.log, "At least one of video and audio channels must be enabled");

            populate_metadata();

@ -889,7 +902,7 @@ namespace dlib
                    return false;
   
                else if (ret < 0)
-                    return fail(cerr, "av_read_frame() failed : ", get_av_error(ret));
+                    return fail(*st.log, "av_read_frame() failed : ", get_av_error(ret));
 
                if (st.packet->stream_index == st.stream_id_video)
                    channel = &st.channel_video;
--- a/dlib/media/ffmpeg_muxer.h
+++ b/dlib/media/ffmpeg_muxer.h
@ -0,0 +1,543 @@
+// Copyright (C) 2023  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+
+#ifndef DLIB_VIDEO_MUXER
+#define DLIB_VIDEO_MUXER
+
+#include <queue>
+#include <functional>
+#include <unordered_map>
+#include "ffmpeg_utils.h"
+
+namespace dlib
+{
+    namespace ffmpeg
+    {
+// ---------------------------------------------------------------------------------------------------
+
+        struct encoder_image_args
+        {
+            int             h{0};
+            int             w{0};
+            AVPixelFormat   fmt{AV_PIX_FMT_YUV420P};
+            int             framerate{0};
+        };
+
+// ---------------------------------------------------------------------------------------------------
+
+        struct encoder_audio_args
+        {
+            int             sample_rate{0};
+            uint64_t        channel_layout{AV_CH_LAYOUT_STEREO};
+            AVSampleFormat  fmt{AV_SAMPLE_FMT_S16};
+        };
+
+// ---------------------------------------------------------------------------------------------------
+
+        struct encoder_codec_args
+        {
+            AVCodecID                                    codec{AV_CODEC_ID_NONE};
+            std::string                                  codec_name;
+            std::unordered_map<std::string, std::string> codec_options;
+            int64_t                                      bitrate{-1};
+            int                                          gop_size{-1};
+            int                                          flags{0};
+        };
+
+// ---------------------------------------------------------------------------------------------------
+
+        class encoder
+        {
+        public:
+            struct args
+            {
+                encoder_codec_args args_codec;
+                encoder_image_args args_image;
+                encoder_audio_args args_audio;
+            };
+
+            encoder()                             = default;
+            encoder(encoder&& other)              = default;
+            encoder& operator=(encoder&& other)   = default;
+
+            encoder(
+                const args& a,
+                std::function<bool(std::size_t, const char*)> sink
+            );
+
+            ~encoder();
+
+            bool            is_open()           const noexcept;
+            bool            is_image_encoder()  const noexcept;
+            bool            is_audio_encoder()  const noexcept;
+            AVCodecID       get_codec_id()      const noexcept;
+            std::string     get_codec_name()    const noexcept;
+            /*! video properties !*/
+            int             height()            const noexcept;
+            int             width()             const noexcept;
+            AVPixelFormat   pixel_fmt()         const noexcept;
+            int             fps()               const noexcept;
+            /*! audio properties !*/
+            int             sample_rate()       const noexcept;
+            uint64_t        channel_layout()    const noexcept;
+            AVSampleFormat  sample_fmt()        const noexcept;
+            int             nchannels()         const noexcept;
+
+            bool push(frame frame);
+            void flush();
+
+        private:
+            friend class muxer;
+
+            encoder(
+                const args& a,
+                std::function<bool(AVCodecContext*,AVPacket*)> sink,
+                std::shared_ptr<logger> log_
+            );
+
+            bool open();
+
+            args                            args_;
+            bool                            open_{false};
+            details::av_ptr<AVCodecContext> pCodecCtx;
+            details::av_ptr<AVPacket>       packet;
+            int                             next_pts{0};
+            details::resizer                resizer_image;
+            details::resampler              resizer_audio;
+            details::audio_fifo             fifo;
+            std::function<bool(AVCodecContext*,AVPacket*)> sink;
+            std::shared_ptr<logger>         log;
+        };
+
+// ---------------------------------------------------------------------------------------------------
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////// DECLARATIONS ////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////////////////////////////
+
+        inline bool operator==(const AVRational& a, const AVRational& b) {return a.num == b.num && a.den == b.den;}
+        inline bool operator!=(const AVRational& a, const AVRational& b) {return !(a == b);}
+        inline bool operator==(const AVRational& a, int framerate)       {return a.den > 0 && (a.num / a.den) == framerate;}
+        inline bool operator!=(const AVRational& a, int framerate)       {return !(a == framerate);}
+        inline int  to_int(const AVRational& a)                          {return a.num / a.den;}
+        inline AVRational inv(const AVRational& a)                       {return {a.den, a.num};}
+
+        inline void check_properties(
+            const AVCodec*  pCodec,
+            AVCodecContext* pCodecCtx,
+            logger&         log
+        )
+        {
+            // Video properties
+            if (pCodec->supported_framerates && pCodecCtx->framerate != 0)
+            {
+                bool framerate_supported = false;
+
+                for (int i = 0 ; pCodec->supported_framerates[i] != AVRational{0,0} ; i++)
+                {
+                    if (pCodecCtx->framerate == pCodec->supported_framerates[i])
+                    {
+                        framerate_supported = true;
+                        break;
+                    }
+                }
+
+                if (!framerate_supported)
+                {
+                    log << LINFO 
+                        << "Requested framerate "
+                        << pCodecCtx->framerate.num / pCodecCtx->framerate.den
+                        << " not supported. Changing to default "
+                        << pCodec->supported_framerates[0].num / pCodec->supported_framerates[0].den;
+
+                    pCodecCtx->framerate = pCodec->supported_framerates[0];
+                }
+            }
+
+            if (pCodec->pix_fmts)
+            {
+                bool pix_fmt_supported = false;
+
+                for (int i = 0 ; pCodec->pix_fmts[i] != AV_PIX_FMT_NONE ; i++)
+                {
+                    if (pCodecCtx->pix_fmt == pCodec->pix_fmts[i])
+                    {
+                        pix_fmt_supported = true;
+                        break;
+                    }
+                }
+
+                if (!pix_fmt_supported)
+                {
+                    log << LINFO
+                        << "Requested pixel format "
+                        << av_get_pix_fmt_name(pCodecCtx->pix_fmt)
+                        << " not supported. Changing to default "
+                        << av_get_pix_fmt_name(pCodec->pix_fmts[0]);
+
+                    pCodecCtx->pix_fmt = pCodec->pix_fmts[0];
+                }
+            }
+
+            // Audio properties
+            if (pCodec->supported_samplerates)
+            {
+                bool sample_rate_supported = false;
+
+                for (int i = 0 ; pCodec->supported_samplerates[i] != 0 ; i++)
+                {
+                    if (pCodecCtx->sample_rate == pCodec->supported_samplerates[i])
+                    {
+                        sample_rate_supported = true;
+                        break;
+                    }
+                }
+
+                if (!sample_rate_supported)
+                {
+                    log << LINFO
+                        << "Requested sample rate "
+                        << pCodecCtx->sample_rate
+                        << " not supported. Changing to default "
+                        << pCodec->supported_samplerates[0];
+
+                    pCodecCtx->sample_rate = pCodec->supported_samplerates[0];
+                }
+            }
+
+            if (pCodec->sample_fmts)
+            {
+                bool sample_fmt_supported = false;
+
+                for (int i = 0 ; pCodec->sample_fmts[i] != AV_SAMPLE_FMT_NONE ; i++)
+                {
+                    if (pCodecCtx->sample_fmt == pCodec->sample_fmts[i])
+                    {
+                        sample_fmt_supported = true;
+                        break;
+                    }
+                }
+
+                if (!sample_fmt_supported)
+                {
+                    log << LINFO
+                        << "Requested sample format "
+                        << av_get_sample_fmt_name(pCodecCtx->sample_fmt)
+                        << " not supported. Changing to default "
+                        << av_get_sample_fmt_name(pCodec->sample_fmts[0]);
+
+                    pCodecCtx->sample_fmt = pCodec->sample_fmts[0];
+                }
+            }
+
+#if FF_API_OLD_CHANNEL_LAYOUT
+            if (pCodec->ch_layouts)
+            {
+                bool channel_layout_supported = false;
+
+                for (int i = 0 ; av_channel_layout_check(&pCodec->ch_layouts[i]) ; ++i)
+                {
+                    if (av_channel_layout_compare(&pCodecCtx->ch_layout, &pCodec->ch_layouts[i]) == 0)
+                    {
+                        channel_layout_supported = true;
+                        break;
+                    }
+                }
+
+                if (!channel_layout_supported)
+                {
+                    log << LINFO
+                        << "Channel layout "
+                        << details::get_channel_layout_str(pCodecCtx)
+                        << " not supported. Changing to default "
+                        << details::get_channel_layout_str(pCodec->ch_layouts[0]);
+
+                    av_channel_layout_copy(&pCodecCtx->ch_layout, &pCodec->ch_layouts[0]);
+                }
+            }
+#else
+            if (pCodec->channel_layouts)
+            {
+                bool channel_layout_supported = false;
+
+                for (int i = 0 ; pCodec->channel_layouts[i] != 0 ; i++)
+                {
+                    if (pCodecCtx->channel_layout == pCodec->channel_layouts[i])
+                    {
+                        channel_layout_supported = true;
+                        break;
+                    }
+                }
+
+                if (!channel_layout_supported)
+                {
+                    log << LINFO 
+                        << "Channel layout "
+                        << get_channel_layout_str(pCodecCtx->channel_layout)
+                        << " not supported. Changing to default "
+                        << get_channel_layout_str(pCodec->channel_layouts[0]);
+
+                    pCodecCtx->channel_layout = pCodec->channel_layouts[0];
+                }
+            }
+#endif
+        }
+
+        inline encoder::encoder(
+            const args &a,
+            std::function<bool(std::size_t, const char*)> sink
+        ) : encoder(a, [sink](AVCodecContext*, AVPacket* pkt) {
+                return sink(pkt->size, (const char*)pkt->data);
+            }, std::make_shared<logger>("ffmpeg::encoder"))
+        {
+        }
+
+        inline encoder::encoder(
+            const args& a,
+            std::function<bool(AVCodecContext*,AVPacket*)> sink_,
+            std::shared_ptr<logger> log_
+        ) : args_(a),
+            sink(std::move(sink_)),
+            log(log_)
+        {
+            if (!open())
+                pCodecCtx = nullptr;
+        }
+
+        inline encoder::~encoder()
+        {
+            flush();
+        }
+
+        inline bool encoder::open()
+        {
+            using namespace std;
+            using namespace details;
+
+            DLIB_CASSERT(sink != nullptr, "must provide an appriate sink callback");
+
+            const bool init = details::register_ffmpeg::get(); // This must be used somewhere otherwise compiler might optimize it away.
+
+            packet = make_avpacket();
+            const AVCodec* pCodec = nullptr;
+
+            if (args_.args_codec.codec != AV_CODEC_ID_NONE)
+                pCodec = init ? avcodec_find_encoder(args_.args_codec.codec) : nullptr;
+            else if (!args_.args_codec.codec_name.empty())
+                pCodec = init ? avcodec_find_encoder_by_name(args_.args_codec.codec_name.c_str()) : nullptr;
+
+            if (!pCodec)
+                return fail(*log, "Codec ",  avcodec_get_name(args_.args_codec.codec), " or ", args_.args_codec.codec_name, " not found");
+
+            pCodecCtx.reset(avcodec_alloc_context3(pCodec));
+            if (!pCodecCtx)
+                return fail(*log, "AV : failed to allocate codec context for ", pCodec->name, " : likely ran out of memory");
+
+            if (args_.args_codec.bitrate > 0)
+                pCodecCtx->bit_rate = args_.args_codec.bitrate;
+            if (args_.args_codec.gop_size > 0)
+                pCodecCtx->gop_size = args_.args_codec.gop_size;
+            if (args_.args_codec.flags > 0)
+                pCodecCtx->flags |= args_.args_codec.flags;
+
+            if (pCodec->type == AVMEDIA_TYPE_VIDEO)
+            {
+                if (args_.args_image.h          <= 0               ||
+                    args_.args_image.w          <= 0               ||
+                    args_.args_image.fmt        == AV_PIX_FMT_NONE ||
+                    args_.args_image.framerate  <= 0)
+                {
+                    return fail(*log, pCodec->name, " is an image codec. height, width, fmt (pixel format) and framerate must be set");
+                }
+
+                pCodecCtx->height       = args_.args_image.h;
+                pCodecCtx->width        = args_.args_image.w;
+                pCodecCtx->pix_fmt      = args_.args_image.fmt;
+                pCodecCtx->framerate    = AVRational{args_.args_image.framerate, 1};
+                check_properties(pCodec, pCodecCtx.get(), *log);
+                pCodecCtx->time_base    = inv(pCodecCtx->framerate);
+
+                //don't know what src options are, but at least dst options are set
+                resizer_image.reset(pCodecCtx->height, pCodecCtx->width, pCodecCtx->pix_fmt,
+                                    pCodecCtx->height, pCodecCtx->width, pCodecCtx->pix_fmt);
+            }
+            else if (pCodec->type == AVMEDIA_TYPE_AUDIO)
+            {
+                if (args_.args_audio.sample_rate <= 0 ||
+                    args_.args_audio.channel_layout <= 0 ||
+                    args_.args_audio.fmt == AV_SAMPLE_FMT_NONE) 
+                {
+                    return fail(*log, pCodec->name, " is an audio codec. sample_rate, channel_layout and fmt (sample format) must be set");
+                }
+
+                pCodecCtx->sample_rate      = args_.args_audio.sample_rate;
+                pCodecCtx->sample_fmt       = args_.args_audio.fmt;
+                set_layout(pCodecCtx.get(), args_.args_audio.channel_layout);
+                check_properties(pCodec, pCodecCtx.get(), *log);
+                pCodecCtx->time_base        = AVRational{ 1, pCodecCtx->sample_rate };
+
+                if (pCodecCtx->codec_id == AV_CODEC_ID_AAC) {
+                    pCodecCtx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
+                }
+
+                //don't know what src options are, but at least dst options are set
+                resizer_audio.reset(
+                        pCodecCtx->sample_rate, get_layout(pCodecCtx.get()), pCodecCtx->sample_fmt,
+                        pCodecCtx->sample_rate, get_layout(pCodecCtx.get()), pCodecCtx->sample_fmt
+                );
+            }
+
+            av_dict opt = args_.args_codec.codec_options;
+            const int ret = avcodec_open2(pCodecCtx.get(), pCodec, opt.get());
+            if (ret < 0)
+                return fail(*log, "avcodec_open2() failed : ", get_av_error(ret));
+
+            if (pCodec->type == AVMEDIA_TYPE_AUDIO)
+            {
+                fifo = audio_fifo(pCodecCtx->frame_size,
+                                  pCodecCtx->sample_fmt,
+                                  get_nchannels(pCodecCtx.get()));
+            }
+
+            open_ = true;
+            return open_;
+        }
+
+        inline bool            encoder::is_open()          const noexcept { return pCodecCtx != nullptr && sink != nullptr && open_; }
+        inline bool            encoder::is_image_encoder() const noexcept { return pCodecCtx && pCodecCtx->codec_type == AVMEDIA_TYPE_VIDEO; }
+        inline bool            encoder::is_audio_encoder() const noexcept { return pCodecCtx && pCodecCtx->codec_type == AVMEDIA_TYPE_AUDIO; }
+        inline AVCodecID       encoder::get_codec_id()     const noexcept { return pCodecCtx ? pCodecCtx->codec_id : AV_CODEC_ID_NONE; }
+        inline std::string     encoder::get_codec_name()   const noexcept { return pCodecCtx ? avcodec_get_name(pCodecCtx->codec_id) : "NONE"; }
+        inline int             encoder::fps()              const noexcept { return pCodecCtx ? to_int(pCodecCtx->framerate) : 0; }
+        inline int             encoder::height()           const noexcept { return resizer_image.get_dst_h(); }
+        inline int             encoder::width()            const noexcept { return resizer_image.get_dst_w(); }
+        inline AVPixelFormat   encoder::pixel_fmt()        const noexcept { return resizer_image.get_dst_fmt(); }
+        inline int             encoder::sample_rate()      const noexcept { return resizer_audio.get_dst_rate(); }
+        inline uint64_t        encoder::channel_layout()   const noexcept { return resizer_audio.get_dst_layout(); }
+        inline AVSampleFormat  encoder::sample_fmt()       const noexcept { return resizer_audio.get_dst_fmt(); }
+        inline int             encoder::nchannels()        const noexcept { return details::get_nchannels(channel_layout()); }
+
+        enum encoding_state
+        {
+            ENCODE_SEND_FRAME,
+            ENCODE_READ_PACKET_THEN_DONE,
+            ENCODE_READ_PACKET_THEN_SEND_FRAME,
+            ENCODE_DONE,
+            ENCODE_ERROR = -1
+        };
+        
+        inline bool encoder::push(frame f_)
+        {
+            using namespace std::chrono;
+            using namespace details;
+
+            if (!is_open())
+                return false;
+
+            std::vector<frame> frames;
+
+            // Resize if image. Resample if audio. Push through audio fifo if necessary (some audio codecs requires fixed size frames)
+            if (f_.is_image())
+            {
+                resizer_image.resize(f_, f_);
+                frames.push_back(std::move(f_));
+            }
+            else if (f_.is_audio())
+            {
+                resizer_audio.resize(f_, f_);
+                frames = fifo.push_pull(std::move(f_));
+            }
+            else
+            {
+                // FLUSH
+                frames.push_back(std::move(f_));
+            }
+
+            // Set pts based on tracked state. Ignore timestamps for now
+            for (auto& f : frames)
+            {
+                if (f.f)
+                {
+                    f.f->pts = next_pts;
+                    next_pts += (f.is_image() ? 1 : f.nsamples());
+                }
+            }
+
+            const auto send_frame = [&](encoding_state& state, frame& f)
+            {
+                const int ret = avcodec_send_frame(pCodecCtx.get(), f.f.get());
+
+                if (ret >= 0) {
+                    state   = ENCODE_READ_PACKET_THEN_DONE;
+                } else if (ret == AVERROR(EAGAIN)) {
+                    state   = ENCODE_READ_PACKET_THEN_SEND_FRAME;
+                } else if (ret == AVERROR_EOF) {
+                    open_   = false;
+                    state   = ENCODE_DONE;
+                } else {
+                    open_   = false;
+                    state   = ENCODE_ERROR;
+                    (*log) << LERROR << "avcodec_send_frame() failed : " << get_av_error(ret);
+                }
+            };
+
+            const auto recv_packet = [&](encoding_state& state, bool resend)
+            {
+                const int ret = avcodec_receive_packet(pCodecCtx.get(), packet.get());
+
+                if (ret == AVERROR(EAGAIN) && resend)
+                    state   = ENCODE_SEND_FRAME;
+                else if (ret == AVERROR(EAGAIN))
+                    state   = ENCODE_DONE;
+                else if (ret == AVERROR_EOF) {
+                    open_   = false;
+                    state   = ENCODE_DONE;
+                }
+                else if (ret < 0)
+                {
+                    open_   = false;
+                    state   = ENCODE_ERROR;
+                    (*log) << LERROR << "avcodec_receive_packet() failed : " << get_av_error(ret);
+                }
+                else
+                {
+                    if (!sink(pCodecCtx.get(), packet.get()))
+                    {
+                        open_   = false;
+                        state   = ENCODE_ERROR;
+                    }
+                }
+            };
+
+            encoding_state state = ENCODE_SEND_FRAME;
+
+            for (size_t i = 0 ; i < frames.size() && is_open() ; ++i)
+            {
+                state = ENCODE_SEND_FRAME;
+
+                while (state != ENCODE_DONE && state != ENCODE_ERROR)
+                {
+                    switch(state)
+                    {
+                        case ENCODE_SEND_FRAME:                     send_frame(state, frames[i]);   break;
+                        case ENCODE_READ_PACKET_THEN_DONE:          recv_packet(state, false);      break;
+                        case ENCODE_READ_PACKET_THEN_SEND_FRAME:    recv_packet(state, true);       break;
+                        default: break;
+                    }
+                }
+            }
+
+            return state != ENCODE_ERROR;
+        }
+
+        inline void encoder::flush()
+        {
+            push(frame{});
+        }
+
+    }
+}
+
+#endif //DLIB_VIDEO_MUXER
--- a/dlib/media/ffmpeg_utils.h
+++ b/dlib/media/ffmpeg_utils.h
@ -22,6 +22,7 @@ static_assert(false, "This version of dlib isn't built with the FFMPEG wrappers"
 #include "../image_processing/generic_image.h"
 #include "../pixel.h"
 #include "../assert.h"
+#include "../logger.h"
 #include "ffmpeg_abstract.h"

 extern "C" {
@ -286,6 +287,7 @@ namespace dlib

            friend class details::resampler;
            friend class details::decoder_extractor;
+            friend class encoder;

            frame(
                int                                     h,
@ -414,14 +416,14 @@ namespace dlib
        namespace details
        {
            template<class... Args>
-            inline bool fail(std::ostream& out, Args&&... args)
+            inline bool fail(logger& out, Args&&... args)
            {
+                auto ret = out << LERROR;
 #ifdef __cpp_fold_expressions
-                ((out << args),...);
+                ((ret << args),...);
 #else
-                (void)std::initializer_list<int>{((out << args), 0)...};
+                (void)std::initializer_list<int>{((ret << args), 0)...};
 #endif
-                out << '\n';
                return false;
            }
        }
@ -550,11 +552,21 @@ namespace dlib
                return frame->ch_layout.u.mask;
            }

+            inline void set_layout(AVCodecContext* pCodecCtx, const uint64_t channel_layout)
+            {
+                pCodecCtx->ch_layout = convert_layout(channel_layout);
+            }
+
            inline void set_layout(AVFrame* frame, const uint64_t channel_layout)
            {
                frame->ch_layout = convert_layout(channel_layout);
            }

+            inline int get_nchannels(const AVCodecContext* pCodecCtx)
+            {
+                return pCodecCtx->ch_layout.nb_channels;
+            }
+
            inline int get_nchannels(const AVFrame* frame)
            {
                return frame->ch_layout.nb_channels;
@ -607,6 +619,11 @@ namespace dlib
                return frame->channel_layout;
            }

+            inline void set_layout(AVCodecContext* pCodecCtx, const uint64_t channel_layout)
+            {
+                pCodecCtx->channel_layout = channel_layout;
+            }
+
            inline void set_layout(AVFrame* frame, const uint64_t channel_layout)
            {
                frame->channel_layout = channel_layout;
@ -617,6 +634,11 @@ namespace dlib
                return av_get_channel_layout_nb_channels(channel_layout);
            }

+            inline int get_nchannels(const AVCodecContext* pCodecCtx)
+            {
+                return get_nchannels(pCodecCtx->channel_layout);
+            }
+
            inline int get_nchannels(const AVFrame* frame)
            {
                return get_nchannels(frame->channel_layout);
--- a/dlib/media/sink.h
+++ b/dlib/media/sink.h
@ -0,0 +1,47 @@
+// Copyright (C) 2023  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+
+#ifndef DLIB_FFMPEG_SINK
+#define DLIB_FFMPEG_SINK
+
+#include <cstdint>
+#include <vector>
+#include <ostream>
+#include "../type_traits.h"
+
+namespace dlib
+{
+    namespace ffmpeg
+    {
+
+// ---------------------------------------------------------------------------------------------------
+
+        template <
+          class Byte, 
+          class Allocator,
+          std::enable_if_t<is_byte<Byte>::value, bool> = true
+        >
+        auto sink(std::vector<Byte, Allocator>& buf)
+        {
+            return [&](std::size_t ndata, const char* data) {
+                buf.insert(buf.end(), data, data + ndata);
+                return true;
+            };
+        }
+
+// ---------------------------------------------------------------------------------------------------
+
+        inline auto sink(std::ostream& out)
+        {
+            return [&](std::size_t ndata, const char* data) {
+                out.write(data, ndata);
+                return out.good();
+            };
+        }
+
+// ---------------------------------------------------------------------------------------------------
+
+    }
+}
+
+#endif //DLIB_FFMPEG_SINK
--- a/dlib/test/ffmpeg.cpp
+++ b/dlib/test/ffmpeg.cpp
@ -33,21 +33,20 @@ namespace
        const int height        = dlib::get_option(cfg, "height", 0);
        const int width         = dlib::get_option(cfg, "width", 0);
        const int sample_rate   = dlib::get_option(cfg, "sample_rate", 0);
-        const bool is_audio     = sample_rate > 0;
+        const bool is_audio     = sample_rate > 0;   

-        decoder::args args;
-        args.args_codec.codec_name      = codec;
-        args.args_image.fmt             = AV_PIX_FMT_RGB24;
-        args.args_audio.fmt             = AV_SAMPLE_FMT_S16;
-        args.args_audio.channel_layout  = AV_CH_LAYOUT_MONO;
+        decoder dec([&] {
+            decoder::args args;
+            args.args_codec.codec_name      = codec;
+            args.args_image.fmt             = AV_PIX_FMT_RGB24;
+            args.args_audio.fmt             = AV_SAMPLE_FMT_S16;
+            args.args_audio.channel_layout  = AV_CH_LAYOUT_MONO;
+            return args;
+        }());

-        decoder dec(args);
        DLIB_TEST(dec.is_open());
        DLIB_TEST(dec.get_codec_name() == codec);
-        if (is_audio)
-            DLIB_TEST(dec.is_audio_decoder());
-        else
-            DLIB_TEST(dec.is_image_decoder());
+        DLIB_TEST(is_audio ? dec.is_audio_decoder() : dec.is_image_decoder());

        array2d<rgb_pixel>          img;
        ffmpeg::audio<int16_t, 1>   audio;
@ -126,11 +125,14 @@ namespace
        DLIB_TEST(!dec.is_open());
    }

-    void test_demuxer (
+    void test_demuxer_encoder_decoder (
        const std::string& filepath,
-        const dlib::config_reader& cfg
+        const dlib::config_reader& cfg,
+        AVCodecID image_codec,
+        AVCodecID audio_codec
    )
    {
+        const std::string tmpfile = "dummy.avi";
        const int nframes       = dlib::get_option(cfg, "nframes", 0);
        const int height        = dlib::get_option(cfg, "height", 0);
        const int width         = dlib::get_option(cfg, "width", 0);
@ -138,18 +140,23 @@ namespace
        const bool has_video    = height > 0 && width > 0 && nframes > 0;
        const bool has_audio    = sample_rate > 0;

-        demuxer::args args;
-        args.filepath       = filepath;
-        args.args_image.fmt = AV_PIX_FMT_RGB24;
-        args.args_audio.fmt = AV_SAMPLE_FMT_S16;
-
-        demuxer cap(args);
+        demuxer cap{[&] {
+            demuxer::args args;
+            args.filepath        = filepath;
+            args.args_image.fmt  = AV_PIX_FMT_RGB24;
+            args.args_audio.fmt  = AV_SAMPLE_FMT_S16;
+            return args;
+        }()};
+        
        DLIB_TEST(cap.is_open());
-        DLIB_TEST(cap.video_enabled() == has_video);
-        DLIB_TEST(cap.audio_enabled() == has_audio);
-        DLIB_TEST(cap.height() == height);
-        DLIB_TEST(cap.width() == width);
-        DLIB_TEST(cap.sample_rate() == sample_rate);
+        DLIB_TEST(cap.video_enabled()       == has_video);
+        DLIB_TEST(cap.audio_enabled()       == has_audio);
+        DLIB_TEST(cap.height()              == height);
+        DLIB_TEST(cap.width()               == width);
+        DLIB_TEST(cap.sample_rate()         == sample_rate);
+        // DLIB_TEST(cap.estimated_nframes()   == nframes); // This won't always work with ffmpeg v3. v4 onwards is fine
+        int estimated_samples_min = cap.estimated_total_samples() - cap.sample_rate(); // - 1s
+        int estimated_samples_max = cap.estimated_total_samples() + cap.sample_rate(); // + 1s

        if (has_video)
        {
@ -159,40 +166,130 @@ namespace
        {
            DLIB_TEST(cap.sample_fmt() == AV_SAMPLE_FMT_S16);
        }
+
+        // We're going to extract all frames, encode them, then decode then again
+        encoder enc_image, enc_audio;
+        decoder dec_image, dec_audio;
+        std::vector<uint8_t> buf_image, buf_audio;
+
+        if (has_video)
+        {
+            {
+                enc_image = encoder([&]{
+                    encoder::args args;
+                    args.args_codec.codec       = image_codec;
+                    args.args_image.h           = cap.height();
+                    args.args_image.w           = cap.width();
+                    args.args_image.framerate   = cap.fps();
+                    args.args_image.fmt         = AV_PIX_FMT_YUV420P;
+                    return args;
+                }(), sink(buf_image));
+
+                DLIB_TEST(enc_image.is_open());
+                DLIB_TEST(enc_image.is_image_encoder());
+                DLIB_TEST(enc_image.get_codec_id()  == image_codec);
+                DLIB_TEST(enc_image.height()        == cap.height());
+                DLIB_TEST(enc_image.width()         == cap.width());
+                print_spinner();
+            }
+
+            {
+                dec_image = decoder{[&]{
+                    decoder::args args;
+                    args.args_codec.codec  = enc_image.get_codec_id();
+                    args.args_image.h      = cap.height();
+                    args.args_image.w      = cap.width();
+                    args.args_image.fmt    = cap.pixel_fmt();
+                    return args;
+                }()};
+
+                DLIB_TEST(dec_image.is_open());
+                DLIB_TEST(dec_image.is_image_decoder());
+                DLIB_TEST(dec_image.get_codec_id() == enc_image.get_codec_id());
+                print_spinner();
+            }
+        }
+
+        if (has_audio)
+        {
+            {
+                enc_audio = encoder([&]{
+                    encoder::args args;
+                    args.args_codec.codec           = audio_codec;
+                    args.args_audio.sample_rate     = cap.sample_rate();
+                    args.args_audio.channel_layout  = cap.channel_layout();
+                    args.args_audio.fmt             = cap.sample_fmt();
+                    return args;
+                }(), sink(buf_audio));
+
+                DLIB_TEST(enc_audio.is_open());
+                DLIB_TEST(enc_audio.is_audio_encoder());
+                DLIB_TEST(enc_audio.get_codec_id() == audio_codec);
+                //You can't guarantee that the requested sample rate or sample format are supported.
+                //In which case, the object changes them to values that ARE supported. So we can't add
+                //tests that check the sample rate is set to what we asked for.
+                print_spinner();
+            }
+
+            {
+                dec_audio = decoder{[&]{
+                    decoder::args args;
+                    args.args_codec.codec           = enc_audio.get_codec_id();
+                    args.args_audio.sample_rate     = cap.sample_rate();
+                    args.args_audio.channel_layout  = cap.channel_layout();
+                    args.args_audio.fmt             = cap.sample_fmt();
+                    return args;
+                }()};
+
+                DLIB_TEST(dec_audio.is_open());
+                DLIB_TEST(dec_audio.is_audio_decoder());
+                DLIB_TEST(dec_audio.get_codec_id() == enc_audio.get_codec_id());
+                print_spinner();
+            }
+        }
        
        dlib::ffmpeg::frame frame, frame_copy;
        array2d<rgb_pixel>  img;
        audio<int16_t, 1>   audio1;
        audio<int16_t, 2>   audio2;
-        int                 count{0};
-        int                 nsamples{0};
+        int                 counter_images{0};
+        int                 counter_samples{0};
        int                 iteration{0};

        while (cap.read(frame))
        {
            if (frame.is_image())
            {
-                DLIB_TEST(frame.height() == height);
-                DLIB_TEST(frame.width() == width);
-                DLIB_TEST(frame.pixfmt() == AV_PIX_FMT_RGB24);
+                // Test frame 
+                DLIB_TEST(frame.height()    == height);
+                DLIB_TEST(frame.width()     == width);
+                DLIB_TEST(frame.pixfmt()    == AV_PIX_FMT_RGB24);
                convert(frame, img);

+                // Test frame -> dlib array
                DLIB_TEST(img.nr() == height);
                DLIB_TEST(img.nc() == width);
                convert(img, frame_copy);

-                DLIB_TEST(frame_copy.height() == frame.height());
-                DLIB_TEST(frame_copy.width() == frame.width());
-                DLIB_TEST(frame_copy.pixfmt() == frame.pixfmt());
+                // Test dlib array -> frame
+                DLIB_TEST(frame_copy.height()   == frame.height());
+                DLIB_TEST(frame_copy.width()    == frame.width());
+                DLIB_TEST(frame_copy.pixfmt()   == frame.pixfmt());
+
+                // Push to encoder
+                DLIB_TEST(enc_image.push(std::move(frame)));
                
-                ++count;
+                ++counter_images;
            }

            if (frame.is_audio())
            {
+                // Test frame 
                DLIB_TEST(frame.sample_rate() == sample_rate);
-                DLIB_TEST(frame.samplefmt() == AV_SAMPLE_FMT_S16);
+                DLIB_TEST(frame.samplefmt()   == AV_SAMPLE_FMT_S16);

+                // Test frame -> dlib array
+                // Test dlib array -> frame
                if (frame.nchannels() == 1)
                {
                    convert(frame, audio1);
@ -205,11 +302,16 @@ namespace
                }

                DLIB_TEST(frame.sample_rate() == sample_rate);
-                nsamples += frame.nsamples();
                DLIB_TEST(frame_copy.is_audio());
-                DLIB_TEST(frame_copy.sample_rate() == frame.sample_rate());
-                DLIB_TEST(frame_copy.samplefmt() == frame.samplefmt());
-                DLIB_TEST(frame_copy.nsamples() == frame.nsamples());
+                DLIB_TEST(frame_copy.sample_rate()  == frame.sample_rate());
+                DLIB_TEST(frame_copy.samplefmt()    == frame.samplefmt());
+                DLIB_TEST(frame_copy.nsamples()     == frame.nsamples());
+                DLIB_TEST(frame_copy.nchannels()    == frame.nchannels());
+
+                counter_samples += frame.nsamples();
+
+                // Push to encoder
+                DLIB_TEST(enc_audio.push(std::move(frame))); 
            }

            ++iteration;
@ -217,8 +319,59 @@ namespace
                print_spinner();
        }

-        DLIB_TEST(count == nframes);
+        DLIB_TEST(counter_images == nframes);
+        DLIB_TEST(counter_samples >= estimated_samples_min); //within 1 second
+        DLIB_TEST(counter_samples <= estimated_samples_max); //within 1 second
        DLIB_TEST(!cap.is_open());
+
+        enc_audio.flush();
+        enc_image.flush();
+
+        print_spinner();
+
+        // Decode encoded images
+        if (has_video)
+        {
+            DLIB_TEST(dec_image.push_encoded(buf_image.data(), buf_image.size()));
+            print_spinner();
+            dec_image.flush();
+            
+            counter_images = 0;
+            decoder_status status;
+
+            while ((status = dec_image.read(frame)) == DECODER_FRAME_AVAILABLE)
+            {
+                ++counter_images;
+                DLIB_TEST(frame.height()    == height);
+                DLIB_TEST(frame.width()     == width);
+                DLIB_TEST(frame.pixfmt()    == AV_PIX_FMT_RGB24);
+                print_spinner();
+            }
+
+            DLIB_TEST(counter_images == nframes);
+        }
+
+        // Decode encoded audio
+        if (has_audio)
+        {
+            DLIB_TEST(dec_audio.push_encoded(buf_audio.data(), buf_audio.size()));
+            print_spinner();
+            dec_audio.flush();
+            
+            counter_samples = 0;
+            decoder_status status;
+
+            while ((status = dec_audio.read(frame)) == DECODER_FRAME_AVAILABLE)
+            {
+                counter_samples += frame.nsamples();
+                DLIB_TEST(frame.sample_rate() == sample_rate);
+                DLIB_TEST(frame.samplefmt()   == AV_SAMPLE_FMT_S16);
+                print_spinner();
+            }
+
+            DLIB_TEST(counter_samples >= estimated_samples_min); //within 1 second
+            DLIB_TEST(counter_samples <= estimated_samples_max); //within 1 second
+        }
    }

    class video_tester : public tester
@ -260,11 +413,11 @@ namespace
                    const auto& sublock = video_file_block.block(block);
                    const std::string filepath = get_parent_directory(f).full_name() + "/" + sublock["file"];

-                    test_demuxer(filepath, sublock);
+                    test_demuxer_encoder_decoder(filepath, sublock, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3);
                }
            }
        }
    } a;
 }

-#endif
+#endif
--- a/dlib/test/ffmpeg_data/MOT20-05-raw.mp4
+++ b/dlib/test/ffmpeg_data/MOT20-05-raw.mp4
--- a/dlib/test/ffmpeg_data/MOT20-05-raw_shorter.mp4
+++ b/dlib/test/ffmpeg_data/MOT20-05-raw_shorter.mp4
--- a/dlib/test/ffmpeg_data/details.cfg
+++ b/dlib/test/ffmpeg_data/details.cfg
@ -35,10 +35,10 @@ demuxing
 {
    file1 
    {
-        file        = MOT20-05-raw.mp4
+        file        = MOT20-05-raw_shorter.mp4
        width       = 826
        height      = 540
-        nframes     = 3315
+        nframes     = 752
        sample_rate = 0
    }

@ -59,4 +59,4 @@ demuxing
        nframes     = 0
        sample_rate = 16000
    }
-}
+}
--- a/dlib/type_traits.h
+++ b/dlib/type_traits.h
@ -97,6 +97,17 @@ namespace dlib

 // ----------------------------------------------------------------------------------------

+    template<class Byte>
+    using is_byte = std::integral_constant<bool, std::is_same<Byte,char>::value
+                                              || std::is_same<Byte,int8_t>::value
+                                              || std::is_same<Byte,uint8_t>::value
+#ifdef __cpp_lib_byte
+                                              || std::is_same<Byte,std::byte>::value
+#endif
+                                          >;
+
+// ----------------------------------------------------------------------------------------
+
    template< class T >
    using remove_cvref_t = std::remove_cv_t<std::remove_reference_t<T>>;

@ -203,4 +214,4 @@ namespace dlib
 
 }

-#endif //DLIB_TYPE_TRAITS_H_
+#endif //DLIB_TYPE_TRAITS_H_
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -239,6 +239,7 @@ if (DLIB_USE_FFMPEG AND NOT DLIB_NO_GUI_SUPPORT)
   add_example(ffmpeg_video_decoding_ex)
   add_example(ffmpeg_info_ex)
   add_example(ffmpeg_screen_grab_ex)
+   add_example(ffmpeg_video_encoding_ex)
 endif()

 if (DLIB_NO_GUI_SUPPORT)
--- a/examples/ffmpeg_video_encoding_ex.cpp
+++ b/examples/ffmpeg_video_encoding_ex.cpp
@ -0,0 +1,127 @@
+// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
+/*
+
+    This is an example illustrating the use of the ffmpeg wrappers, in this case the encding API.
+
+    This is a pretty simple example. It loads a video file, extracts the images and
+    re-encodes them into a raw buffer using a user-specified codec.
+    
+    Please see the following examples on how to decode, demux, and get information on your installation of ffmpeg:
+        - ffmpeg_info_ex.cpp
+        - ffmpeg_video_decoding_ex.cpp
+        - ffmpeg_video_demuxing_ex.cpp
+*/
+
+#include <cstdio>
+#include <dlib/media.h>
+#include <dlib/cmd_line_parser.h>
+
+using namespace std;
+using namespace dlib;
+using namespace dlib::ffmpeg;
+
+int main(const int argc, const char** argv)
+try
+{
+    command_line_parser parser;
+    parser.add_option("i",      "input video", 1);
+    parser.add_option("codec",  "codec name. e.g. h264. Defaults to mpeg4", 1);
+    parser.add_option("height", "height of encoded stream. Defaults to whatever is in the video file", 1);
+    parser.add_option("width",  "width of encoded stream. Defaults to whatever is in the video file", 1);
+
+    parser.set_group_name("Help Options");
+    parser.add_option("h",      "alias of --help");
+    parser.add_option("help",   "display this message and exit");
+
+    parser.parse(argc, argv);
+    const char* one_time_opts[] = {"i", "codec", "height", "width"};
+    parser.check_one_time_options(one_time_opts);
+
+    if (parser.option("h") || parser.option("help"))
+    {
+        parser.print_options();
+        return 0;
+    }
+
+    if (!parser.option("i"))
+    {
+        cout << "Missing -i" << endl;
+        parser.print_options();
+        return 0;
+    }
+
+    const std::string filepath = parser.option("i").argument();
+
+    // Load input video.
+    // Note, this uses a convenient constructor which (dis)enables audio and/or video.
+    demuxer cap({filepath, video_enabled, audio_disabled});
+
+    if (!cap.is_open() || !cap.video_enabled())
+    {
+        cout << "Failed to open " << filepath << endl;
+        return EXIT_FAILURE;
+    }
+
+    // This is a small functor that creates an encoder using the command line arguments
+    // and different types of output buffers using the convenient sink() overload.
+    const auto make_encoder = [&](auto& out) 
+    {
+        return encoder([&] {
+            encoder::args args;
+            args.args_codec.codec_name  = get_option(parser, "codec", "mpeg4");
+            args.args_image.h           = get_option(parser, "height", cap.height());
+            args.args_image.w           = get_option(parser, "width",  cap.width());
+            args.args_image.framerate   = cap.fps();
+            return args;
+        }(), sink(out));
+    };
+
+    // Encode to multiple different types of buffers.
+    std::vector<char>       buf1;
+    std::vector<int8_t>     buf2;
+    std::vector<uint8_t>    buf3;
+    std::ostringstream      buf4;
+    std::ofstream           buf5("encoded.dat", std::ios::binary);
+
+    // Different encoders for different buffers
+    auto enc1 = make_encoder(buf1);
+    auto enc2 = make_encoder(buf2);
+    auto enc3 = make_encoder(buf3);
+    auto enc4 = make_encoder(buf4);
+    auto enc5 = make_encoder(buf5);
+
+    frame f;
+    while (cap.read(f))
+    {
+        enc1.push(f);
+        enc2.push(f);
+        enc3.push(f);
+        enc4.push(f);
+        enc5.push(f);
+    }
+
+    // Flush all the encoders
+    // Note, encoder::~encoder calls flush()
+    // So if the encoders were going out of scope at this point, you wouldn't have to call flush()
+    // Also note, flush() becomes a no-op after the 1st time you call it. 
+    // Calling it more than once is safe but has no effect.
+    // After calling flush(), push() will always return false.
+    enc1.flush();
+    enc2.flush();
+    enc3.flush();
+    enc4.flush();
+    enc5.flush();
+
+    cout << "vector<char>       size " << buf1.size() << endl;
+    cout << "vector<int8_t>     size " << buf2.size() << endl;
+    cout << "vector<uint8_t>    size " << buf3.size() << endl;
+    cout << "ostringstream      size " << buf4.tellp() << endl;
+    cout << "ofstream           size " << buf5.tellp() << endl;
+
+    return EXIT_SUCCESS;
+}
+catch (const std::exception& e)
+{
+    cout << e.what() << endl;
+    return EXIT_FAILURE;
+}