[FFmpeg] added push(image_type) methods to encoder and muxer (#2797)

* added push() methods

* Update dlib/media/ffmpeg_muxer.h

* Update dlib/media/ffmpeg_muxer.h

---------

Co-authored-by: pf <pf@me>
Co-authored-by: Davis E. King <davis685@gmail.com>
This commit is contained in:
pfeatherstone 2023-05-18 13:11:49 +01:00 committed by GitHub
parent dab9aa1fa5
commit 5b9ab23cc0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 163 additions and 2 deletions

View File

@ -251,6 +251,33 @@ namespace dlib
or an error occurred, in which case is_open() == false. or an error occurred, in which case is_open() == false.
!*/ !*/
template <
class image_type,
class Callback,
is_image_check<image_type> = true
>
bool push (
const image_type& img,
Callback&& sink
);
/*!
requires
- is_image_encoder() == true
- sink is set to a valid callback with signature bool(size_t, const char*)
for writing packet data. dlib/media/sink.h contains callback wrappers for
different buffer types.
- sink does not call encoder::push() or encoder::flush(),
i.e., the callback does not create a recursive loop.
ensures
- Encodes img using the constructor arguments, which may incur a resizing
operation if the image dimensions and pixel type don't match the codec.
- The sink callback may or may not be invoked as the underlying codec
can buffer if necessary.
- Returns true if successfully encoded, even if sink wasn't invoked.
- Returns false if either EOF, i.e. flush() has been previously called,
or an error occurred, in which case is_open() == false.
!*/
template <class Callback> template <class Callback>
void flush ( void flush (
Callback&& sink Callback&& sink
@ -565,6 +592,23 @@ namespace dlib
or an error occurred, in which case is_open() == false. or an error occurred, in which case is_open() == false.
!*/ !*/
template <
class image_type,
is_image_check<image_type> = true
>
bool push(const image_type& img);
/*!
requires
- is_image_encoder() == true
ensures
- Encodes img using the constructor arguments, which may incur a resizing
operation if the image dimensions and pixel type don't match the codec.
- Encodes and writes the encoded data to file/socket
- Returns true if successfully encoded.
- Returns false if either EOF, i.e. flush() has been previously called,
or an error occurred, in which case is_open() == false.
!*/
void flush(); void flush();
/*! /*!
ensures ensures
@ -1019,6 +1063,27 @@ namespace dlib
return state != ENCODE_ERROR; return state != ENCODE_ERROR;
} }
template <
class image_type,
class Callback,
is_image_check<image_type>
>
inline bool encoder::push (
const image_type& img,
Callback&& sink
)
{
// Unfortunately, FFmpeg assumes all data is over-aligned, and therefore,
// even though the API has facilities to convert img directly to a frame object,
// we cannot use it because it assumes the data in img is over-aligned, and of
// course, it is not. Shame. At some point, I'll more digging to see if we
// can get around this without doing a brute force copy like below.
using namespace details;
frame f;
convert(img, f);
return push(std::move(f), std::forward<Callback>(sink));
}
template <class Callback> template <class Callback>
inline void encoder::flush(Callback&& clb) inline void encoder::flush(Callback&& clb)
{ {
@ -1268,6 +1333,20 @@ namespace dlib
return false; return false;
} }
template <
class image_type,
is_image_check<image_type>
>
bool muxer::push(const image_type& img)
{
using namespace std;
using namespace details;
return is_open() &&
st.encoder_image.is_open() &&
st.encoder_image.push(img, muxer_sink(st.pFormatCtx.get(), st.stream_id_video));
}
inline void muxer::flush() inline void muxer::flush()
{ {
using namespace details; using namespace details;

View File

@ -584,7 +584,82 @@ namespace
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
void test_muxer ( template<class image_type>
void test_muxer1 (
const std::string& filepath,
AVCodecID image_codec
)
{
const std::string tmpfile = "dummy.avi";
// Load a video/audio as a source of frames
demuxer cap({filepath, video_enabled, audio_disabled});
DLIB_TEST(cap.is_open());
DLIB_TEST(cap.video_enabled());
DLIB_TEST(!cap.audio_enabled());
const int height = cap.height();
const int width = cap.width();
// Open muxer
muxer writer([&] {
muxer::args args;
args.filepath = tmpfile;
args.enable_audio = false;
args.args_image.codec = image_codec;
args.args_image.h = cap.height();
args.args_image.w = cap.width();
args.args_image.framerate = cap.fps();
args.args_image.fmt = AV_PIX_FMT_YUV420P;
return args;
}());
DLIB_TEST(writer.is_open());
DLIB_TEST(!writer.audio_enabled());
DLIB_TEST(writer.video_enabled());
DLIB_TEST(writer.get_video_codec_id() == image_codec);
DLIB_TEST(writer.height() == cap.height());
DLIB_TEST(writer.width() == cap.width());
// Demux then remux
int nimages_demuxed{0};
image_type img;
while (cap.read(img))
{
++nimages_demuxed;
DLIB_TEST(img.nr() == height);
DLIB_TEST(img.nc() == width);
DLIB_TEST(writer.push(img));
if (nimages_demuxed % 10 == 0)
print_spinner();
}
writer.flush();
// Demux everything back
demuxer cap2(tmpfile);
DLIB_TEST(cap2.is_open());
DLIB_TEST(cap2.video_enabled());
DLIB_TEST(!cap2.audio_enabled());
DLIB_TEST(cap2.get_video_codec_id() == image_codec);
DLIB_TEST(cap2.height() == height);
DLIB_TEST(cap2.width() == width);
int nimages_muxed{0};
while (cap2.read(img))
{
++nimages_muxed;
if (nimages_muxed % 10 == 0)
print_spinner();
}
DLIB_TEST(nimages_muxed == nimages_demuxed);
}
void test_muxer2 (
const std::string& filepath, const std::string& filepath,
AVCodecID image_codec, AVCodecID image_codec,
AVCodecID audio_codec AVCodecID audio_codec
@ -802,7 +877,14 @@ namespace
test_demuxer_full(filepath, nframes, height, width, sample_rate, has_video, has_audio); test_demuxer_full(filepath, nframes, height, width, sample_rate, has_video, has_audio);
test_encoder(filepath, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3); test_encoder(filepath, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3);
test_muxer(filepath, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3);
if (has_video)
{
test_muxer1<array2d<rgb_pixel>>(filepath, AV_CODEC_ID_MPEG4);
test_muxer1<matrix<bgr_pixel>>(filepath, AV_CODEC_ID_MPEG4);
}
test_muxer2(filepath, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3);
} }
} }
} }