FFmpeg : encoding (#2754)

* docs

* callbacks for encoder

* shorter video

* shorter video

* added is_byte type trait

* leave muxer for next PR

* added overloads for set_layout() and get_layout() in details namespace

* unit test

* example

* build

* overloads for ffmpeg < 5

* Update examples/ffmpeg_video_encoding_ex.cpp

Co-authored-by: Adrià Arrufat <1671644+arrufat@users.noreply.github.com>

* Update dlib/media/ffmpeg_abstract.h

Co-authored-by: Davis E. King <davis685@gmail.com>

* Update dlib/media/ffmpeg_abstract.h

Co-authored-by: Davis E. King <davis685@gmail.com>

* Update dlib/media/ffmpeg_abstract.h

Co-authored-by: Davis E. King <davis685@gmail.com>

* Update dlib/media/ffmpeg_abstract.h

Co-authored-by: Davis E. King <davis685@gmail.com>

* Update dlib/media/ffmpeg_abstract.h

Co-authored-by: Davis E. King <davis685@gmail.com>

* as per suggestion

* remove requires clause

* Update examples/ffmpeg_video_encoding_ex.cpp

Co-authored-by: Davis E. King <davis685@gmail.com>

* Update dlib/media/ffmpeg_abstract.h

Co-authored-by: Davis E. King <davis685@gmail.com>

* Update dlib/media/ffmpeg_abstract.h

Co-authored-by: Davis E. King <davis685@gmail.com>

* Update dlib/media/ffmpeg_abstract.h

Co-authored-by: Davis E. King <davis685@gmail.com>

* Update dlib/media/ffmpeg_muxer.h

Co-authored-by: Davis E. King <davis685@gmail.com>

* use dlib::logger

* oops

* Update dlib/media/ffmpeg_muxer.h

Co-authored-by: Davis E. King <davis685@gmail.com>

* Update dlib/media/ffmpeg_demuxer.h

* Update dlib/media/ffmpeg_demuxer.h

* Update dlib/media/ffmpeg_abstract.h

---------

Co-authored-by: pf <pf@me>
Co-authored-by: Davis E. King <davis685@gmail.com>
Co-authored-by: Adrià Arrufat <1671644+arrufat@users.noreply.github.com>
This commit is contained in:
pfeatherstone 2023-04-01 14:48:26 +01:00 committed by GitHub
parent f586d0a552
commit 937e07e90d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 1313 additions and 78 deletions

View File

@ -11,5 +11,7 @@ static_assert(false, "This version of dlib isn't built with the FFMPEG wrappers"
#include "media/ffmpeg_utils.h"
#include "media/ffmpeg_demuxer.h"
#include "media/ffmpeg_muxer.h"
#include "media/sink.h"
#endif // DLIB_MEDIA

View File

@ -1018,7 +1018,323 @@ namespace dlib
!*/
};
// ---------------------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------------------
template <
class Byte,
class Allocator,
std::enable_if_t<is_byte<Byte>::value, bool> = true
>
auto sink(std::vector<Byte, Allocator>& buf);
/*!
requires
- Byte must be a byte type, e.g. char, int8_t or uint8_t
ensures
- returns a function object with signature bool(std::size_t N, const char* data). When
called that function appends the first N bytes pointed to by data onto the end of buf.
- The returned function is valid only as long as buf exists.
- The function always returns true.
!*/
// ---------------------------------------------------------------------------------------------------
auto sink(std::ostream& out);
/*!
ensures
- returns a function object with signature bool(std::size_t N, const char* data). When
called that function writes the first N bytes pointed to by data to out.
- The returned view is valid only as long as out exists.
- Returns out.good(). I.e. returns true if the write to the stream succeeded and false otherwise.
!*/
// ---------------------------------------------------------------------------------------------------
struct encoder_image_args
{
/*!
WHAT THIS OBJECT REPRESENTS
This class groups a set of arguments passed to the encoder and muxer classes.
These must be set to non-zero or non-trivial values as they are used to configure
the underlying codec and optionally, an internal image scaler.
Any frame that is pushed to encoder or muxer instances is resized to the codec's
pre-configured settings if their dimensions or pixel format don't match.
For example, if the codec is configured to use height 512, width 384 and RGB format,
using the variables below, and the frames already have these settings when pushed,
then no resizing is performed. If however they don't, then they are first resized.
!*/
// Target height of codec.
int h{0};
// Target width of codec.
int w{0};
// Target pixel format of codec.
AVPixelFormat fmt{AV_PIX_FMT_YUV420P};
// Target framerate of codec/muxer
int framerate{0};
};
// ---------------------------------------------------------------------------------------------------
struct encoder_audio_args
{
/*!
WHAT THIS OBJECT REPRESENTS
This class groups a set of arguments passed to the encoder and muxer classes.
These must be set to non-zero or non-trivial values as they are used to configure
the underlying codec and optionally, an internal audio resampler.
Any frame that is pushed to encoder or muxer instances is resampled to the codec's
pre-configured settings if their sample format, sample rate or channel layout, don't match.
!*/
// Target sample rate of codec
int sample_rate{0};
// Target channel layout of codec
uint64_t channel_layout{AV_CH_LAYOUT_STEREO};
// Target sample format of codec
AVSampleFormat fmt{AV_SAMPLE_FMT_S16};
};
// ---------------------------------------------------------------------------------------------------
struct encoder_codec_args
{
/*!
WHAT THIS OBJECT REPRESENTS
This class groups a set of arguments passed to the encoder and muxer classes.
Some of these must be set to non-zero or non-trivial values as they are used
to configure the underlying codec. Others will only be used if non-zero or
non-trivial.
!*/
// Codec ID used to configure the encoder. Either codec or codec_name MUST be set.
AVCodecID codec{AV_CODEC_ID_NONE};
// Codec name used to configure the encoder. This is used if codec == AV_CODEC_ID_NONE.
std::string codec_name;
// A dictionary of AVCodecContext and codec-private options. Used by "avcodec_open2()"
std::unordered_map<std::string, std::string> codec_options;
// Sets AVCodecContext::bit_rate if non-negative.
int64_t bitrate{-1};
// Sets AVCodecContext::gop_size if non-negative.
int gop_size{-1};
// OR-ed with AVCodecContext::flags if non-negative.
int flags{0};
};
// ---------------------------------------------------------------------------------------------------
class encoder
{
public:
/*!
WHAT THIS OBJECT REPRESENTS
This class is a libavcodec wrapper which encodes video or audio to raw memory.
Note, if you are creating a media file, it is easier to use the muxer object
as it also works with raw codec files like .h264 files.
This class is suitable for example if you need to send raw packets over a socket
or interface with another library that requires encoded data, not raw images
or raw audio samples.
!*/
struct args
{
/*!
WHAT THIS OBJECT REPRESENTS
This holds constructor arguments for encoder.
!*/
encoder_codec_args args_codec;
encoder_image_args args_image;
encoder_audio_args args_audio;
};
encoder() = default;
/*!
ensures
- is_open() == false
!*/
encoder(
const args& a,
std::function<bool(std::size_t, const char*)> sink
);
/*!
requires
- a.args_codec.codec or a.args_codec.codec_name are set
- Either a.args_image or a.args_audio is fully set
- sink is set to a valid callback for writing packet data.
dlib/media/sink.h contains callback wrappers for
different buffer types.
ensures
- Constructs encoder from args and sink
- is_open() == true
!*/
encoder(encoder&& other) = default;
/*!
ensures
- Move constructor
- other is in an empty but otherwise valid state after move
- other.is_open() == false after move
!*/
encoder& operator=(encoder&& other) = default;
/*!
ensures
- Move assignment operator
- other is in an empty but otherwise valid state after move
- other.is_open() == false after move
!*/
~encoder();
/*!
ensures
- Destructor
- flush() is called if it hasn't been already
!*/
bool is_open() const noexcept;
/*!
ensures
- Returns true if the codec is open and user may call push()
!*/
bool is_image_encoder() const noexcept;
/*!
ensures
- Returns true if the codec is an image encoder.
!*/
bool is_audio_encoder() const noexcept;
/*!
ensures
- Returns true if the codec is an audio encoder.
!*/
AVCodecID get_codec_id() const noexcept;
/*!
requires
- is_open() == true
ensures
- returns the codec id. See ffmpeg documentation or libavcodec/codec_id.h
!*/
std::string get_codec_name() const noexcept;
/*!
requires
- is_open() == true
ensures
- returns string representation of codec id.
!*/
int height() const noexcept;
/*!
requires
- is_image_encoder() == true
ensures
- returns the height of the configured codec, not necessarily the
height of frames passed to push(frame)
!*/
int width() const noexcept;
/*!
requires
- is_image_encoder() == true
ensures
- returns the width of the configured codec, not necessarily the
width of frames passed to push(frame)
!*/
AVPixelFormat pixel_fmt() const noexcept;
/*!
requires
- is_image_encoder() == true
ensures
- returns the pixel format of the configured codec, not necessarily the
pixel format of frames passed to push(frame)
!*/
int fps() const noexcept;
/*!
requires
- is_image_encoder() == true
ensures
- returns the configured framerate of the codec.
!*/
int sample_rate() const noexcept;
/*!
requires
- is_audio_encoder() == true
ensures
- returns the sample rate of the configured codec, not necessarily the
sample rate of frames passed to push(frame)
!*/
uint64_t channel_layout() const noexcept;
/*!
requires
- is_audio_encoder() == true
ensures
- returns the channel layout of the configured codec, not necessarily the
channel layout of frames passed to push(frame).
e.g. AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_MONO etc.
!*/
AVSampleFormat sample_fmt() const noexcept;
/*!
requires
- is_audio_encoder() == true
ensures
- returns the sample format of the configured codec, not necessarily the
sample format of frames passed to push(frame)
!*/
int nchannels() const noexcept;
/*!
requires
- is_audio_encoder() == true
ensures
- returns the number of audio channels in the configured codec.
!*/
bool push(frame f);
/*!
requires
- is_open() == true
- if is_image_encoder() == true, then f.is_image() == true
- if is_audio_encoder() == true, then f.is_audio() == true
- flush() has not been called
ensures
- If f does not have matching settings to the codec, it is either
resized or resampled before being pushed to the codec and encoded.
- The callback passed to the constructor may or may not be invoked
as the underlying resampler, audio fifo and codec may buffer.
- Returns true if successfully encoded, even if callback wasn't invoked.
- Returns false if either EOF, i.e. flush() has been previously called,
or an error occured, in which case is_open() == false.
!*/
void flush();
/*!
ensures
- Flushes the codec. Callback passed to constructor will likely be invoked.
- is_open() == false
- Becomes a no-op after the first time you call this.
!*/
};
// ---------------------------------------------------------------------------------------------------
}
}

View File

@ -73,9 +73,10 @@ namespace dlib
decoder_extractor() = default;
decoder_extractor(
const args& a,
av_ptr<AVCodecContext> pCodecCtx_,
const AVCodec* codec
const args& a,
av_ptr<AVCodecContext> pCodecCtx_,
const AVCodec* codec,
std::shared_ptr<logger> log_
);
bool is_open() const noexcept;
@ -107,6 +108,7 @@ namespace dlib
resizer resizer_image;
resampler resizer_audio;
std::queue<frame> frame_queue;
std::shared_ptr<logger> log;
};
}
@ -152,6 +154,7 @@ namespace dlib
details::av_ptr<AVCodecParserContext> parser;
details::av_ptr<AVPacket> packet;
details::decoder_extractor extractor;
std::shared_ptr<logger> log;
};
// ---------------------------------------------------------------------------------------------------
@ -233,6 +236,7 @@ namespace dlib
int stream_id_video{-1};
int stream_id_audio{-1};
std::queue<frame> frame_queue;
std::shared_ptr<logger> log;
} st;
};
@ -256,10 +260,11 @@ namespace dlib
namespace details
{
inline decoder_extractor::decoder_extractor(
const args& a,
av_ptr<AVCodecContext> pCodecCtx_,
const AVCodec* codec
)
const args& a,
av_ptr<AVCodecContext> pCodecCtx_,
const AVCodec* codec,
std::shared_ptr<logger> log_
) : log(log_)
{
args_ = a;
avframe = make_avframe();
@ -274,7 +279,7 @@ namespace dlib
if (ret < 0)
{
printf("avcodec_open2() failed : `%s`\n", get_av_error(ret).c_str());
(*log) << LERROR << "avcodec_open2() failed : " << get_av_error(ret).c_str();
return;
}
@ -353,7 +358,7 @@ namespace dlib
} else {
pCodecCtx = nullptr;
state = EXTRACT_ERROR;
printf("avcodec_send_packet() failed : `%s`\n", get_av_error(ret).c_str());
(*log) << LERROR << "avcodec_send_packet() failed : " << get_av_error(ret);
}
};
@ -373,7 +378,7 @@ namespace dlib
{
pCodecCtx = nullptr;
state = EXTRACT_ERROR;
printf("avcodec_receive_frame() failed : %i - `%s`\n", ret, get_av_error(ret).c_str());
(*log) << LERROR << "avcodec_receive_frame() failed : " << get_av_error(ret);
}
else
{
@ -449,6 +454,7 @@ namespace dlib
// ---------------------------------------------------------------------------------------------------
inline decoder::decoder(const args &a)
: log(std::make_shared<logger>("ffmpeg::decoder"))
{
using namespace details;
@ -465,7 +471,12 @@ namespace dlib
if (!pCodec)
{
printf("Codec `%s` / `%s` not found\n", avcodec_get_name(a.args_codec.codec), a.args_codec.codec_name.c_str());
(*log) << LERROR
<< "Codec "
<< avcodec_get_name(a.args_codec.codec)
<< " / "
<< a.args_codec.codec_name
<< " not found.";
return;
}
@ -473,14 +484,14 @@ namespace dlib
if (!pCodecCtx)
{
printf("avcodec_alloc_context3() failed to allocate codec context for `%s`\n", pCodec->name);
(*log) << LERROR << "avcodec_alloc_context3() failed to allocate codec context for " << pCodec->name;
return;
}
if (pCodecCtx->codec_id == AV_CODEC_ID_AAC)
pCodecCtx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
extractor = decoder_extractor{{a.args_codec, a.args_image, a.args_audio, pCodecCtx->time_base}, std::move(pCodecCtx), pCodec};
extractor = decoder_extractor{{a.args_codec, a.args_image, a.args_audio, pCodecCtx->time_base}, std::move(pCodecCtx), pCodec, log};
if (!extractor.is_open())
return;
@ -493,7 +504,7 @@ namespace dlib
parser.reset(av_parser_init(pCodec->id));
if (!parser)
{
printf("av_parser_init() failed codec `%s` not found\n", pCodec->name);
(*log) << LERROR << "av_parser_init() failed codec " << pCodec->name << " not found";
return;
}
}
@ -539,7 +550,7 @@ namespace dlib
);
if (ret < 0)
return fail(cerr, "AV : error while parsing encoded buffer");
return fail(*log, "AV : error while parsing encoded buffer");
encoded += ret;
nencoded -= ret;
@ -640,6 +651,8 @@ namespace dlib
inline demuxer::demuxer(const args &a)
{
st.log = std::make_shared<logger>("ffmpeg::demuxer");
if (!open(a))
st.pFormatCtx = nullptr;
}
@ -718,7 +731,7 @@ namespace dlib
opts.get());
if (ret != 0)
return fail(cerr, "avformat_open_input() failed with error : ", get_av_error(ret));
return fail(*st.log, "avformat_open_input() failed with error : ", get_av_error(ret));
if (opts.size() > 0)
{
@ -732,7 +745,7 @@ namespace dlib
ret = avformat_find_stream_info(st.pFormatCtx.get(), NULL);
if (ret < 0)
return fail(cerr, "avformat_find_stream_info() failed with error : ", get_av_error(ret));
return fail(*st.log, "avformat_find_stream_info() failed with error : ", get_av_error(ret));
const auto setup_stream = [&](bool is_video)
{
@ -745,26 +758,26 @@ namespace dlib
return true; //You might be asking for both video and audio but only video is available. That's OK. Just provide video.
else if (stream_id == AVERROR_DECODER_NOT_FOUND)
return fail(cerr, "av_find_best_stream() : decoder not found for stream type : ", av_get_media_type_string(media_type));
return fail(*st.log, "av_find_best_stream() : decoder not found for stream type : ", av_get_media_type_string(media_type));
else if (stream_id < 0)
return fail(cerr, "av_find_best_stream() failed : ", get_av_error(stream_id));
return fail(*st.log, "av_find_best_stream() failed : ", get_av_error(stream_id));
av_ptr<AVCodecContext> pCodecCtx{avcodec_alloc_context3(pCodec)};
if (!pCodecCtx)
return fail(cerr, "avcodec_alloc_context3() failed to allocate codec context for ", pCodec->name);
return fail(*st.log, "avcodec_alloc_context3() failed to allocate codec context for ", pCodec->name);
const int ret = avcodec_parameters_to_context(pCodecCtx.get(), st.pFormatCtx->streams[stream_id]->codecpar);
if (ret < 0)
return fail(cerr, "avcodec_parameters_to_context() failed : ", get_av_error(ret));
return fail(*st.log, "avcodec_parameters_to_context() failed : ", get_av_error(ret));
if (pCodecCtx->codec_type == AVMEDIA_TYPE_VIDEO)
{
if (pCodecCtx->height == 0 ||
pCodecCtx->width == 0 ||
pCodecCtx->pix_fmt == AV_PIX_FMT_NONE)
return fail(cerr, "Codec parameters look wrong : (h,w,pixel_fmt) : (",
return fail(*st.log, "Codec parameters look wrong : (h,w,pixel_fmt) : (",
pCodecCtx->height, ",",
pCodecCtx->width, ",",
get_pixel_fmt_str(pCodecCtx->pix_fmt), ")");
@ -774,13 +787,13 @@ namespace dlib
if (pCodecCtx->sample_rate == 0 ||
pCodecCtx->sample_fmt == AV_SAMPLE_FMT_NONE ||
details::channel_layout_empty(pCodecCtx.get()))
return fail(cerr,"Codec parameters look wrong :",
return fail(*st.log,"Codec parameters look wrong :",
" sample_rate : ", pCodecCtx->sample_rate,
" sample format : ", get_audio_fmt_str(pCodecCtx->sample_fmt),
" channel layout : ", details::get_channel_layout_str(pCodecCtx.get()));
}
else
return fail(cerr,"Unrecognized media type ", pCodecCtx->codec_type);
return fail(*st.log,"Unrecognized media type ", pCodecCtx->codec_type);
if (is_video)
{
@ -790,7 +803,7 @@ namespace dlib
args.args_image = st.args_.args_image;
args.time_base = st.pFormatCtx->streams[stream_id]->time_base;
return args;
}(), std::move(pCodecCtx), pCodec};
}(), std::move(pCodecCtx), pCodec, st.log};
st.stream_id_video = stream_id;
}
@ -802,7 +815,7 @@ namespace dlib
args.args_audio = st.args_.args_audio;
args.time_base = st.pFormatCtx->streams[stream_id]->time_base;
return args;
}(), std::move(pCodecCtx), pCodec};
}(), std::move(pCodecCtx), pCodec, st.log};
st.stream_id_audio = stream_id;
}
@ -817,7 +830,7 @@ namespace dlib
return false;
if (!st.channel_audio.is_open() && !st.channel_video.is_open())
return fail(cerr, "At least one of video and audio channels must be enabled");
return fail(*st.log, "At least one of video and audio channels must be enabled");
populate_metadata();
@ -889,7 +902,7 @@ namespace dlib
return false;
else if (ret < 0)
return fail(cerr, "av_read_frame() failed : ", get_av_error(ret));
return fail(*st.log, "av_read_frame() failed : ", get_av_error(ret));
if (st.packet->stream_index == st.stream_id_video)
channel = &st.channel_video;

543
dlib/media/ffmpeg_muxer.h Normal file
View File

@ -0,0 +1,543 @@
// Copyright (C) 2023 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_VIDEO_MUXER
#define DLIB_VIDEO_MUXER
#include <queue>
#include <functional>
#include <unordered_map>
#include "ffmpeg_utils.h"
namespace dlib
{
namespace ffmpeg
{
// ---------------------------------------------------------------------------------------------------
struct encoder_image_args
{
int h{0};
int w{0};
AVPixelFormat fmt{AV_PIX_FMT_YUV420P};
int framerate{0};
};
// ---------------------------------------------------------------------------------------------------
struct encoder_audio_args
{
int sample_rate{0};
uint64_t channel_layout{AV_CH_LAYOUT_STEREO};
AVSampleFormat fmt{AV_SAMPLE_FMT_S16};
};
// ---------------------------------------------------------------------------------------------------
struct encoder_codec_args
{
AVCodecID codec{AV_CODEC_ID_NONE};
std::string codec_name;
std::unordered_map<std::string, std::string> codec_options;
int64_t bitrate{-1};
int gop_size{-1};
int flags{0};
};
// ---------------------------------------------------------------------------------------------------
class encoder
{
public:
struct args
{
encoder_codec_args args_codec;
encoder_image_args args_image;
encoder_audio_args args_audio;
};
encoder() = default;
encoder(encoder&& other) = default;
encoder& operator=(encoder&& other) = default;
encoder(
const args& a,
std::function<bool(std::size_t, const char*)> sink
);
~encoder();
bool is_open() const noexcept;
bool is_image_encoder() const noexcept;
bool is_audio_encoder() const noexcept;
AVCodecID get_codec_id() const noexcept;
std::string get_codec_name() const noexcept;
/*! video properties !*/
int height() const noexcept;
int width() const noexcept;
AVPixelFormat pixel_fmt() const noexcept;
int fps() const noexcept;
/*! audio properties !*/
int sample_rate() const noexcept;
uint64_t channel_layout() const noexcept;
AVSampleFormat sample_fmt() const noexcept;
int nchannels() const noexcept;
bool push(frame frame);
void flush();
private:
friend class muxer;
encoder(
const args& a,
std::function<bool(AVCodecContext*,AVPacket*)> sink,
std::shared_ptr<logger> log_
);
bool open();
args args_;
bool open_{false};
details::av_ptr<AVCodecContext> pCodecCtx;
details::av_ptr<AVPacket> packet;
int next_pts{0};
details::resizer resizer_image;
details::resampler resizer_audio;
details::audio_fifo fifo;
std::function<bool(AVCodecContext*,AVPacket*)> sink;
std::shared_ptr<logger> log;
};
// ---------------------------------------------------------------------------------------------------
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////// DECLARATIONS ////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
inline bool operator==(const AVRational& a, const AVRational& b) {return a.num == b.num && a.den == b.den;}
inline bool operator!=(const AVRational& a, const AVRational& b) {return !(a == b);}
inline bool operator==(const AVRational& a, int framerate) {return a.den > 0 && (a.num / a.den) == framerate;}
inline bool operator!=(const AVRational& a, int framerate) {return !(a == framerate);}
inline int to_int(const AVRational& a) {return a.num / a.den;}
inline AVRational inv(const AVRational& a) {return {a.den, a.num};}
inline void check_properties(
const AVCodec* pCodec,
AVCodecContext* pCodecCtx,
logger& log
)
{
// Video properties
if (pCodec->supported_framerates && pCodecCtx->framerate != 0)
{
bool framerate_supported = false;
for (int i = 0 ; pCodec->supported_framerates[i] != AVRational{0,0} ; i++)
{
if (pCodecCtx->framerate == pCodec->supported_framerates[i])
{
framerate_supported = true;
break;
}
}
if (!framerate_supported)
{
log << LINFO
<< "Requested framerate "
<< pCodecCtx->framerate.num / pCodecCtx->framerate.den
<< " not supported. Changing to default "
<< pCodec->supported_framerates[0].num / pCodec->supported_framerates[0].den;
pCodecCtx->framerate = pCodec->supported_framerates[0];
}
}
if (pCodec->pix_fmts)
{
bool pix_fmt_supported = false;
for (int i = 0 ; pCodec->pix_fmts[i] != AV_PIX_FMT_NONE ; i++)
{
if (pCodecCtx->pix_fmt == pCodec->pix_fmts[i])
{
pix_fmt_supported = true;
break;
}
}
if (!pix_fmt_supported)
{
log << LINFO
<< "Requested pixel format "
<< av_get_pix_fmt_name(pCodecCtx->pix_fmt)
<< " not supported. Changing to default "
<< av_get_pix_fmt_name(pCodec->pix_fmts[0]);
pCodecCtx->pix_fmt = pCodec->pix_fmts[0];
}
}
// Audio properties
if (pCodec->supported_samplerates)
{
bool sample_rate_supported = false;
for (int i = 0 ; pCodec->supported_samplerates[i] != 0 ; i++)
{
if (pCodecCtx->sample_rate == pCodec->supported_samplerates[i])
{
sample_rate_supported = true;
break;
}
}
if (!sample_rate_supported)
{
log << LINFO
<< "Requested sample rate "
<< pCodecCtx->sample_rate
<< " not supported. Changing to default "
<< pCodec->supported_samplerates[0];
pCodecCtx->sample_rate = pCodec->supported_samplerates[0];
}
}
if (pCodec->sample_fmts)
{
bool sample_fmt_supported = false;
for (int i = 0 ; pCodec->sample_fmts[i] != AV_SAMPLE_FMT_NONE ; i++)
{
if (pCodecCtx->sample_fmt == pCodec->sample_fmts[i])
{
sample_fmt_supported = true;
break;
}
}
if (!sample_fmt_supported)
{
log << LINFO
<< "Requested sample format "
<< av_get_sample_fmt_name(pCodecCtx->sample_fmt)
<< " not supported. Changing to default "
<< av_get_sample_fmt_name(pCodec->sample_fmts[0]);
pCodecCtx->sample_fmt = pCodec->sample_fmts[0];
}
}
#if FF_API_OLD_CHANNEL_LAYOUT
if (pCodec->ch_layouts)
{
bool channel_layout_supported = false;
for (int i = 0 ; av_channel_layout_check(&pCodec->ch_layouts[i]) ; ++i)
{
if (av_channel_layout_compare(&pCodecCtx->ch_layout, &pCodec->ch_layouts[i]) == 0)
{
channel_layout_supported = true;
break;
}
}
if (!channel_layout_supported)
{
log << LINFO
<< "Channel layout "
<< details::get_channel_layout_str(pCodecCtx)
<< " not supported. Changing to default "
<< details::get_channel_layout_str(pCodec->ch_layouts[0]);
av_channel_layout_copy(&pCodecCtx->ch_layout, &pCodec->ch_layouts[0]);
}
}
#else
if (pCodec->channel_layouts)
{
bool channel_layout_supported = false;
for (int i = 0 ; pCodec->channel_layouts[i] != 0 ; i++)
{
if (pCodecCtx->channel_layout == pCodec->channel_layouts[i])
{
channel_layout_supported = true;
break;
}
}
if (!channel_layout_supported)
{
log << LINFO
<< "Channel layout "
<< get_channel_layout_str(pCodecCtx->channel_layout)
<< " not supported. Changing to default "
<< get_channel_layout_str(pCodec->channel_layouts[0]);
pCodecCtx->channel_layout = pCodec->channel_layouts[0];
}
}
#endif
}
inline encoder::encoder(
const args &a,
std::function<bool(std::size_t, const char*)> sink
) : encoder(a, [sink](AVCodecContext*, AVPacket* pkt) {
return sink(pkt->size, (const char*)pkt->data);
}, std::make_shared<logger>("ffmpeg::encoder"))
{
}
inline encoder::encoder(
const args& a,
std::function<bool(AVCodecContext*,AVPacket*)> sink_,
std::shared_ptr<logger> log_
) : args_(a),
sink(std::move(sink_)),
log(log_)
{
if (!open())
pCodecCtx = nullptr;
}
inline encoder::~encoder()
{
flush();
}
inline bool encoder::open()
{
using namespace std;
using namespace details;
DLIB_CASSERT(sink != nullptr, "must provide an appriate sink callback");
const bool init = details::register_ffmpeg::get(); // This must be used somewhere otherwise compiler might optimize it away.
packet = make_avpacket();
const AVCodec* pCodec = nullptr;
if (args_.args_codec.codec != AV_CODEC_ID_NONE)
pCodec = init ? avcodec_find_encoder(args_.args_codec.codec) : nullptr;
else if (!args_.args_codec.codec_name.empty())
pCodec = init ? avcodec_find_encoder_by_name(args_.args_codec.codec_name.c_str()) : nullptr;
if (!pCodec)
return fail(*log, "Codec ", avcodec_get_name(args_.args_codec.codec), " or ", args_.args_codec.codec_name, " not found");
pCodecCtx.reset(avcodec_alloc_context3(pCodec));
if (!pCodecCtx)
return fail(*log, "AV : failed to allocate codec context for ", pCodec->name, " : likely ran out of memory");
if (args_.args_codec.bitrate > 0)
pCodecCtx->bit_rate = args_.args_codec.bitrate;
if (args_.args_codec.gop_size > 0)
pCodecCtx->gop_size = args_.args_codec.gop_size;
if (args_.args_codec.flags > 0)
pCodecCtx->flags |= args_.args_codec.flags;
if (pCodec->type == AVMEDIA_TYPE_VIDEO)
{
if (args_.args_image.h <= 0 ||
args_.args_image.w <= 0 ||
args_.args_image.fmt == AV_PIX_FMT_NONE ||
args_.args_image.framerate <= 0)
{
return fail(*log, pCodec->name, " is an image codec. height, width, fmt (pixel format) and framerate must be set");
}
pCodecCtx->height = args_.args_image.h;
pCodecCtx->width = args_.args_image.w;
pCodecCtx->pix_fmt = args_.args_image.fmt;
pCodecCtx->framerate = AVRational{args_.args_image.framerate, 1};
check_properties(pCodec, pCodecCtx.get(), *log);
pCodecCtx->time_base = inv(pCodecCtx->framerate);
//don't know what src options are, but at least dst options are set
resizer_image.reset(pCodecCtx->height, pCodecCtx->width, pCodecCtx->pix_fmt,
pCodecCtx->height, pCodecCtx->width, pCodecCtx->pix_fmt);
}
else if (pCodec->type == AVMEDIA_TYPE_AUDIO)
{
if (args_.args_audio.sample_rate <= 0 ||
args_.args_audio.channel_layout <= 0 ||
args_.args_audio.fmt == AV_SAMPLE_FMT_NONE)
{
return fail(*log, pCodec->name, " is an audio codec. sample_rate, channel_layout and fmt (sample format) must be set");
}
pCodecCtx->sample_rate = args_.args_audio.sample_rate;
pCodecCtx->sample_fmt = args_.args_audio.fmt;
set_layout(pCodecCtx.get(), args_.args_audio.channel_layout);
check_properties(pCodec, pCodecCtx.get(), *log);
pCodecCtx->time_base = AVRational{ 1, pCodecCtx->sample_rate };
if (pCodecCtx->codec_id == AV_CODEC_ID_AAC) {
pCodecCtx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
}
//don't know what src options are, but at least dst options are set
resizer_audio.reset(
pCodecCtx->sample_rate, get_layout(pCodecCtx.get()), pCodecCtx->sample_fmt,
pCodecCtx->sample_rate, get_layout(pCodecCtx.get()), pCodecCtx->sample_fmt
);
}
av_dict opt = args_.args_codec.codec_options;
const int ret = avcodec_open2(pCodecCtx.get(), pCodec, opt.get());
if (ret < 0)
return fail(*log, "avcodec_open2() failed : ", get_av_error(ret));
if (pCodec->type == AVMEDIA_TYPE_AUDIO)
{
fifo = audio_fifo(pCodecCtx->frame_size,
pCodecCtx->sample_fmt,
get_nchannels(pCodecCtx.get()));
}
open_ = true;
return open_;
}
inline bool encoder::is_open() const noexcept { return pCodecCtx != nullptr && sink != nullptr && open_; }
inline bool encoder::is_image_encoder() const noexcept { return pCodecCtx && pCodecCtx->codec_type == AVMEDIA_TYPE_VIDEO; }
inline bool encoder::is_audio_encoder() const noexcept { return pCodecCtx && pCodecCtx->codec_type == AVMEDIA_TYPE_AUDIO; }
inline AVCodecID encoder::get_codec_id() const noexcept { return pCodecCtx ? pCodecCtx->codec_id : AV_CODEC_ID_NONE; }
inline std::string encoder::get_codec_name() const noexcept { return pCodecCtx ? avcodec_get_name(pCodecCtx->codec_id) : "NONE"; }
inline int encoder::fps() const noexcept { return pCodecCtx ? to_int(pCodecCtx->framerate) : 0; }
inline int encoder::height() const noexcept { return resizer_image.get_dst_h(); }
inline int encoder::width() const noexcept { return resizer_image.get_dst_w(); }
inline AVPixelFormat encoder::pixel_fmt() const noexcept { return resizer_image.get_dst_fmt(); }
inline int encoder::sample_rate() const noexcept { return resizer_audio.get_dst_rate(); }
inline uint64_t encoder::channel_layout() const noexcept { return resizer_audio.get_dst_layout(); }
inline AVSampleFormat encoder::sample_fmt() const noexcept { return resizer_audio.get_dst_fmt(); }
inline int encoder::nchannels() const noexcept { return details::get_nchannels(channel_layout()); }
enum encoding_state
{
ENCODE_SEND_FRAME,
ENCODE_READ_PACKET_THEN_DONE,
ENCODE_READ_PACKET_THEN_SEND_FRAME,
ENCODE_DONE,
ENCODE_ERROR = -1
};
inline bool encoder::push(frame f_)
{
using namespace std::chrono;
using namespace details;
if (!is_open())
return false;
std::vector<frame> frames;
// Resize if image. Resample if audio. Push through audio fifo if necessary (some audio codecs requires fixed size frames)
if (f_.is_image())
{
resizer_image.resize(f_, f_);
frames.push_back(std::move(f_));
}
else if (f_.is_audio())
{
resizer_audio.resize(f_, f_);
frames = fifo.push_pull(std::move(f_));
}
else
{
// FLUSH
frames.push_back(std::move(f_));
}
// Set pts based on tracked state. Ignore timestamps for now
for (auto& f : frames)
{
if (f.f)
{
f.f->pts = next_pts;
next_pts += (f.is_image() ? 1 : f.nsamples());
}
}
const auto send_frame = [&](encoding_state& state, frame& f)
{
const int ret = avcodec_send_frame(pCodecCtx.get(), f.f.get());
if (ret >= 0) {
state = ENCODE_READ_PACKET_THEN_DONE;
} else if (ret == AVERROR(EAGAIN)) {
state = ENCODE_READ_PACKET_THEN_SEND_FRAME;
} else if (ret == AVERROR_EOF) {
open_ = false;
state = ENCODE_DONE;
} else {
open_ = false;
state = ENCODE_ERROR;
(*log) << LERROR << "avcodec_send_frame() failed : " << get_av_error(ret);
}
};
const auto recv_packet = [&](encoding_state& state, bool resend)
{
const int ret = avcodec_receive_packet(pCodecCtx.get(), packet.get());
if (ret == AVERROR(EAGAIN) && resend)
state = ENCODE_SEND_FRAME;
else if (ret == AVERROR(EAGAIN))
state = ENCODE_DONE;
else if (ret == AVERROR_EOF) {
open_ = false;
state = ENCODE_DONE;
}
else if (ret < 0)
{
open_ = false;
state = ENCODE_ERROR;
(*log) << LERROR << "avcodec_receive_packet() failed : " << get_av_error(ret);
}
else
{
if (!sink(pCodecCtx.get(), packet.get()))
{
open_ = false;
state = ENCODE_ERROR;
}
}
};
encoding_state state = ENCODE_SEND_FRAME;
for (size_t i = 0 ; i < frames.size() && is_open() ; ++i)
{
state = ENCODE_SEND_FRAME;
while (state != ENCODE_DONE && state != ENCODE_ERROR)
{
switch(state)
{
case ENCODE_SEND_FRAME: send_frame(state, frames[i]); break;
case ENCODE_READ_PACKET_THEN_DONE: recv_packet(state, false); break;
case ENCODE_READ_PACKET_THEN_SEND_FRAME: recv_packet(state, true); break;
default: break;
}
}
}
return state != ENCODE_ERROR;
}
inline void encoder::flush()
{
push(frame{});
}
}
}
#endif //DLIB_VIDEO_MUXER

View File

@ -22,6 +22,7 @@ static_assert(false, "This version of dlib isn't built with the FFMPEG wrappers"
#include "../image_processing/generic_image.h"
#include "../pixel.h"
#include "../assert.h"
#include "../logger.h"
#include "ffmpeg_abstract.h"
extern "C" {
@ -286,6 +287,7 @@ namespace dlib
friend class details::resampler;
friend class details::decoder_extractor;
friend class encoder;
frame(
int h,
@ -414,14 +416,14 @@ namespace dlib
namespace details
{
template<class... Args>
inline bool fail(std::ostream& out, Args&&... args)
inline bool fail(logger& out, Args&&... args)
{
auto ret = out << LERROR;
#ifdef __cpp_fold_expressions
((out << args),...);
((ret << args),...);
#else
(void)std::initializer_list<int>{((out << args), 0)...};
(void)std::initializer_list<int>{((ret << args), 0)...};
#endif
out << '\n';
return false;
}
}
@ -550,11 +552,21 @@ namespace dlib
return frame->ch_layout.u.mask;
}
inline void set_layout(AVCodecContext* pCodecCtx, const uint64_t channel_layout)
{
pCodecCtx->ch_layout = convert_layout(channel_layout);
}
inline void set_layout(AVFrame* frame, const uint64_t channel_layout)
{
frame->ch_layout = convert_layout(channel_layout);
}
inline int get_nchannels(const AVCodecContext* pCodecCtx)
{
return pCodecCtx->ch_layout.nb_channels;
}
inline int get_nchannels(const AVFrame* frame)
{
return frame->ch_layout.nb_channels;
@ -607,6 +619,11 @@ namespace dlib
return frame->channel_layout;
}
inline void set_layout(AVCodecContext* pCodecCtx, const uint64_t channel_layout)
{
pCodecCtx->channel_layout = channel_layout;
}
inline void set_layout(AVFrame* frame, const uint64_t channel_layout)
{
frame->channel_layout = channel_layout;
@ -617,6 +634,11 @@ namespace dlib
return av_get_channel_layout_nb_channels(channel_layout);
}
inline int get_nchannels(const AVCodecContext* pCodecCtx)
{
return get_nchannels(pCodecCtx->channel_layout);
}
inline int get_nchannels(const AVFrame* frame)
{
return get_nchannels(frame->channel_layout);

47
dlib/media/sink.h Normal file
View File

@ -0,0 +1,47 @@
// Copyright (C) 2023 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_FFMPEG_SINK
#define DLIB_FFMPEG_SINK
#include <cstdint>
#include <vector>
#include <ostream>
#include "../type_traits.h"
namespace dlib
{
namespace ffmpeg
{
// ---------------------------------------------------------------------------------------------------
template <
class Byte,
class Allocator,
std::enable_if_t<is_byte<Byte>::value, bool> = true
>
auto sink(std::vector<Byte, Allocator>& buf)
{
return [&](std::size_t ndata, const char* data) {
buf.insert(buf.end(), data, data + ndata);
return true;
};
}
// ---------------------------------------------------------------------------------------------------
inline auto sink(std::ostream& out)
{
return [&](std::size_t ndata, const char* data) {
out.write(data, ndata);
return out.good();
};
}
// ---------------------------------------------------------------------------------------------------
}
}
#endif //DLIB_FFMPEG_SINK

View File

@ -33,21 +33,20 @@ namespace
const int height = dlib::get_option(cfg, "height", 0);
const int width = dlib::get_option(cfg, "width", 0);
const int sample_rate = dlib::get_option(cfg, "sample_rate", 0);
const bool is_audio = sample_rate > 0;
const bool is_audio = sample_rate > 0;
decoder::args args;
args.args_codec.codec_name = codec;
args.args_image.fmt = AV_PIX_FMT_RGB24;
args.args_audio.fmt = AV_SAMPLE_FMT_S16;
args.args_audio.channel_layout = AV_CH_LAYOUT_MONO;
decoder dec([&] {
decoder::args args;
args.args_codec.codec_name = codec;
args.args_image.fmt = AV_PIX_FMT_RGB24;
args.args_audio.fmt = AV_SAMPLE_FMT_S16;
args.args_audio.channel_layout = AV_CH_LAYOUT_MONO;
return args;
}());
decoder dec(args);
DLIB_TEST(dec.is_open());
DLIB_TEST(dec.get_codec_name() == codec);
if (is_audio)
DLIB_TEST(dec.is_audio_decoder());
else
DLIB_TEST(dec.is_image_decoder());
DLIB_TEST(is_audio ? dec.is_audio_decoder() : dec.is_image_decoder());
array2d<rgb_pixel> img;
ffmpeg::audio<int16_t, 1> audio;
@ -126,11 +125,14 @@ namespace
DLIB_TEST(!dec.is_open());
}
void test_demuxer (
void test_demuxer_encoder_decoder (
const std::string& filepath,
const dlib::config_reader& cfg
const dlib::config_reader& cfg,
AVCodecID image_codec,
AVCodecID audio_codec
)
{
const std::string tmpfile = "dummy.avi";
const int nframes = dlib::get_option(cfg, "nframes", 0);
const int height = dlib::get_option(cfg, "height", 0);
const int width = dlib::get_option(cfg, "width", 0);
@ -138,18 +140,23 @@ namespace
const bool has_video = height > 0 && width > 0 && nframes > 0;
const bool has_audio = sample_rate > 0;
demuxer::args args;
args.filepath = filepath;
args.args_image.fmt = AV_PIX_FMT_RGB24;
args.args_audio.fmt = AV_SAMPLE_FMT_S16;
demuxer cap(args);
demuxer cap{[&] {
demuxer::args args;
args.filepath = filepath;
args.args_image.fmt = AV_PIX_FMT_RGB24;
args.args_audio.fmt = AV_SAMPLE_FMT_S16;
return args;
}()};
DLIB_TEST(cap.is_open());
DLIB_TEST(cap.video_enabled() == has_video);
DLIB_TEST(cap.audio_enabled() == has_audio);
DLIB_TEST(cap.height() == height);
DLIB_TEST(cap.width() == width);
DLIB_TEST(cap.sample_rate() == sample_rate);
DLIB_TEST(cap.video_enabled() == has_video);
DLIB_TEST(cap.audio_enabled() == has_audio);
DLIB_TEST(cap.height() == height);
DLIB_TEST(cap.width() == width);
DLIB_TEST(cap.sample_rate() == sample_rate);
// DLIB_TEST(cap.estimated_nframes() == nframes); // This won't always work with ffmpeg v3. v4 onwards is fine
int estimated_samples_min = cap.estimated_total_samples() - cap.sample_rate(); // - 1s
int estimated_samples_max = cap.estimated_total_samples() + cap.sample_rate(); // + 1s
if (has_video)
{
@ -159,40 +166,130 @@ namespace
{
DLIB_TEST(cap.sample_fmt() == AV_SAMPLE_FMT_S16);
}
// We're going to extract all frames, encode them, then decode then again
encoder enc_image, enc_audio;
decoder dec_image, dec_audio;
std::vector<uint8_t> buf_image, buf_audio;
if (has_video)
{
{
enc_image = encoder([&]{
encoder::args args;
args.args_codec.codec = image_codec;
args.args_image.h = cap.height();
args.args_image.w = cap.width();
args.args_image.framerate = cap.fps();
args.args_image.fmt = AV_PIX_FMT_YUV420P;
return args;
}(), sink(buf_image));
DLIB_TEST(enc_image.is_open());
DLIB_TEST(enc_image.is_image_encoder());
DLIB_TEST(enc_image.get_codec_id() == image_codec);
DLIB_TEST(enc_image.height() == cap.height());
DLIB_TEST(enc_image.width() == cap.width());
print_spinner();
}
{
dec_image = decoder{[&]{
decoder::args args;
args.args_codec.codec = enc_image.get_codec_id();
args.args_image.h = cap.height();
args.args_image.w = cap.width();
args.args_image.fmt = cap.pixel_fmt();
return args;
}()};
DLIB_TEST(dec_image.is_open());
DLIB_TEST(dec_image.is_image_decoder());
DLIB_TEST(dec_image.get_codec_id() == enc_image.get_codec_id());
print_spinner();
}
}
if (has_audio)
{
{
enc_audio = encoder([&]{
encoder::args args;
args.args_codec.codec = audio_codec;
args.args_audio.sample_rate = cap.sample_rate();
args.args_audio.channel_layout = cap.channel_layout();
args.args_audio.fmt = cap.sample_fmt();
return args;
}(), sink(buf_audio));
DLIB_TEST(enc_audio.is_open());
DLIB_TEST(enc_audio.is_audio_encoder());
DLIB_TEST(enc_audio.get_codec_id() == audio_codec);
//You can't guarantee that the requested sample rate or sample format are supported.
//In which case, the object changes them to values that ARE supported. So we can't add
//tests that check the sample rate is set to what we asked for.
print_spinner();
}
{
dec_audio = decoder{[&]{
decoder::args args;
args.args_codec.codec = enc_audio.get_codec_id();
args.args_audio.sample_rate = cap.sample_rate();
args.args_audio.channel_layout = cap.channel_layout();
args.args_audio.fmt = cap.sample_fmt();
return args;
}()};
DLIB_TEST(dec_audio.is_open());
DLIB_TEST(dec_audio.is_audio_decoder());
DLIB_TEST(dec_audio.get_codec_id() == enc_audio.get_codec_id());
print_spinner();
}
}
dlib::ffmpeg::frame frame, frame_copy;
array2d<rgb_pixel> img;
audio<int16_t, 1> audio1;
audio<int16_t, 2> audio2;
int count{0};
int nsamples{0};
int counter_images{0};
int counter_samples{0};
int iteration{0};
while (cap.read(frame))
{
if (frame.is_image())
{
DLIB_TEST(frame.height() == height);
DLIB_TEST(frame.width() == width);
DLIB_TEST(frame.pixfmt() == AV_PIX_FMT_RGB24);
// Test frame
DLIB_TEST(frame.height() == height);
DLIB_TEST(frame.width() == width);
DLIB_TEST(frame.pixfmt() == AV_PIX_FMT_RGB24);
convert(frame, img);
// Test frame -> dlib array
DLIB_TEST(img.nr() == height);
DLIB_TEST(img.nc() == width);
convert(img, frame_copy);
DLIB_TEST(frame_copy.height() == frame.height());
DLIB_TEST(frame_copy.width() == frame.width());
DLIB_TEST(frame_copy.pixfmt() == frame.pixfmt());
// Test dlib array -> frame
DLIB_TEST(frame_copy.height() == frame.height());
DLIB_TEST(frame_copy.width() == frame.width());
DLIB_TEST(frame_copy.pixfmt() == frame.pixfmt());
// Push to encoder
DLIB_TEST(enc_image.push(std::move(frame)));
++count;
++counter_images;
}
if (frame.is_audio())
{
// Test frame
DLIB_TEST(frame.sample_rate() == sample_rate);
DLIB_TEST(frame.samplefmt() == AV_SAMPLE_FMT_S16);
DLIB_TEST(frame.samplefmt() == AV_SAMPLE_FMT_S16);
// Test frame -> dlib array
// Test dlib array -> frame
if (frame.nchannels() == 1)
{
convert(frame, audio1);
@ -205,11 +302,16 @@ namespace
}
DLIB_TEST(frame.sample_rate() == sample_rate);
nsamples += frame.nsamples();
DLIB_TEST(frame_copy.is_audio());
DLIB_TEST(frame_copy.sample_rate() == frame.sample_rate());
DLIB_TEST(frame_copy.samplefmt() == frame.samplefmt());
DLIB_TEST(frame_copy.nsamples() == frame.nsamples());
DLIB_TEST(frame_copy.sample_rate() == frame.sample_rate());
DLIB_TEST(frame_copy.samplefmt() == frame.samplefmt());
DLIB_TEST(frame_copy.nsamples() == frame.nsamples());
DLIB_TEST(frame_copy.nchannels() == frame.nchannels());
counter_samples += frame.nsamples();
// Push to encoder
DLIB_TEST(enc_audio.push(std::move(frame)));
}
++iteration;
@ -217,8 +319,59 @@ namespace
print_spinner();
}
DLIB_TEST(count == nframes);
DLIB_TEST(counter_images == nframes);
DLIB_TEST(counter_samples >= estimated_samples_min); //within 1 second
DLIB_TEST(counter_samples <= estimated_samples_max); //within 1 second
DLIB_TEST(!cap.is_open());
enc_audio.flush();
enc_image.flush();
print_spinner();
// Decode encoded images
if (has_video)
{
DLIB_TEST(dec_image.push_encoded(buf_image.data(), buf_image.size()));
print_spinner();
dec_image.flush();
counter_images = 0;
decoder_status status;
while ((status = dec_image.read(frame)) == DECODER_FRAME_AVAILABLE)
{
++counter_images;
DLIB_TEST(frame.height() == height);
DLIB_TEST(frame.width() == width);
DLIB_TEST(frame.pixfmt() == AV_PIX_FMT_RGB24);
print_spinner();
}
DLIB_TEST(counter_images == nframes);
}
// Decode encoded audio
if (has_audio)
{
DLIB_TEST(dec_audio.push_encoded(buf_audio.data(), buf_audio.size()));
print_spinner();
dec_audio.flush();
counter_samples = 0;
decoder_status status;
while ((status = dec_audio.read(frame)) == DECODER_FRAME_AVAILABLE)
{
counter_samples += frame.nsamples();
DLIB_TEST(frame.sample_rate() == sample_rate);
DLIB_TEST(frame.samplefmt() == AV_SAMPLE_FMT_S16);
print_spinner();
}
DLIB_TEST(counter_samples >= estimated_samples_min); //within 1 second
DLIB_TEST(counter_samples <= estimated_samples_max); //within 1 second
}
}
class video_tester : public tester
@ -260,11 +413,11 @@ namespace
const auto& sublock = video_file_block.block(block);
const std::string filepath = get_parent_directory(f).full_name() + "/" + sublock["file"];
test_demuxer(filepath, sublock);
test_demuxer_encoder_decoder(filepath, sublock, AV_CODEC_ID_MPEG4, AV_CODEC_ID_AC3);
}
}
}
} a;
}
#endif
#endif

Binary file not shown.

View File

@ -35,10 +35,10 @@ demuxing
{
file1
{
file = MOT20-05-raw.mp4
file = MOT20-05-raw_shorter.mp4
width = 826
height = 540
nframes = 3315
nframes = 752
sample_rate = 0
}
@ -59,4 +59,4 @@ demuxing
nframes = 0
sample_rate = 16000
}
}
}

View File

@ -97,6 +97,17 @@ namespace dlib
// ----------------------------------------------------------------------------------------
template<class Byte>
using is_byte = std::integral_constant<bool, std::is_same<Byte,char>::value
|| std::is_same<Byte,int8_t>::value
|| std::is_same<Byte,uint8_t>::value
#ifdef __cpp_lib_byte
|| std::is_same<Byte,std::byte>::value
#endif
>;
// ----------------------------------------------------------------------------------------
template< class T >
using remove_cvref_t = std::remove_cv_t<std::remove_reference_t<T>>;
@ -203,4 +214,4 @@ namespace dlib
}
#endif //DLIB_TYPE_TRAITS_H_
#endif //DLIB_TYPE_TRAITS_H_

View File

@ -239,6 +239,7 @@ if (DLIB_USE_FFMPEG AND NOT DLIB_NO_GUI_SUPPORT)
add_example(ffmpeg_video_decoding_ex)
add_example(ffmpeg_info_ex)
add_example(ffmpeg_screen_grab_ex)
add_example(ffmpeg_video_encoding_ex)
endif()
if (DLIB_NO_GUI_SUPPORT)

View File

@ -0,0 +1,127 @@
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
This is an example illustrating the use of the ffmpeg wrappers, in this case the encding API.
This is a pretty simple example. It loads a video file, extracts the images and
re-encodes them into a raw buffer using a user-specified codec.
Please see the following examples on how to decode, demux, and get information on your installation of ffmpeg:
- ffmpeg_info_ex.cpp
- ffmpeg_video_decoding_ex.cpp
- ffmpeg_video_demuxing_ex.cpp
*/
#include <cstdio>
#include <dlib/media.h>
#include <dlib/cmd_line_parser.h>
using namespace std;
using namespace dlib;
using namespace dlib::ffmpeg;
int main(const int argc, const char** argv)
try
{
command_line_parser parser;
parser.add_option("i", "input video", 1);
parser.add_option("codec", "codec name. e.g. h264. Defaults to mpeg4", 1);
parser.add_option("height", "height of encoded stream. Defaults to whatever is in the video file", 1);
parser.add_option("width", "width of encoded stream. Defaults to whatever is in the video file", 1);
parser.set_group_name("Help Options");
parser.add_option("h", "alias of --help");
parser.add_option("help", "display this message and exit");
parser.parse(argc, argv);
const char* one_time_opts[] = {"i", "codec", "height", "width"};
parser.check_one_time_options(one_time_opts);
if (parser.option("h") || parser.option("help"))
{
parser.print_options();
return 0;
}
if (!parser.option("i"))
{
cout << "Missing -i" << endl;
parser.print_options();
return 0;
}
const std::string filepath = parser.option("i").argument();
// Load input video.
// Note, this uses a convenient constructor which (dis)enables audio and/or video.
demuxer cap({filepath, video_enabled, audio_disabled});
if (!cap.is_open() || !cap.video_enabled())
{
cout << "Failed to open " << filepath << endl;
return EXIT_FAILURE;
}
// This is a small functor that creates an encoder using the command line arguments
// and different types of output buffers using the convenient sink() overload.
const auto make_encoder = [&](auto& out)
{
return encoder([&] {
encoder::args args;
args.args_codec.codec_name = get_option(parser, "codec", "mpeg4");
args.args_image.h = get_option(parser, "height", cap.height());
args.args_image.w = get_option(parser, "width", cap.width());
args.args_image.framerate = cap.fps();
return args;
}(), sink(out));
};
// Encode to multiple different types of buffers.
std::vector<char> buf1;
std::vector<int8_t> buf2;
std::vector<uint8_t> buf3;
std::ostringstream buf4;
std::ofstream buf5("encoded.dat", std::ios::binary);
// Different encoders for different buffers
auto enc1 = make_encoder(buf1);
auto enc2 = make_encoder(buf2);
auto enc3 = make_encoder(buf3);
auto enc4 = make_encoder(buf4);
auto enc5 = make_encoder(buf5);
frame f;
while (cap.read(f))
{
enc1.push(f);
enc2.push(f);
enc3.push(f);
enc4.push(f);
enc5.push(f);
}
// Flush all the encoders
// Note, encoder::~encoder calls flush()
// So if the encoders were going out of scope at this point, you wouldn't have to call flush()
// Also note, flush() becomes a no-op after the 1st time you call it.
// Calling it more than once is safe but has no effect.
// After calling flush(), push() will always return false.
enc1.flush();
enc2.flush();
enc3.flush();
enc4.flush();
enc5.flush();
cout << "vector<char> size " << buf1.size() << endl;
cout << "vector<int8_t> size " << buf2.size() << endl;
cout << "vector<uint8_t> size " << buf3.size() << endl;
cout << "ostringstream size " << buf4.tellp() << endl;
cout << "ofstream size " << buf5.tellp() << endl;
return EXIT_SUCCESS;
}
catch (const std::exception& e)
{
cout << e.what() << endl;
return EXIT_FAILURE;
}