[FFmpeg] added save_frame() and improved picking of codecs when not specified (#2800)

* WIP

* - pick best codec
- added save_frame()
- fixed example

* Add missing variable declaration

* added extensions
fixed tests

* attempt to set quality as high as possible. lower expected PSNR

* arrufat suggestion

* refactoring

---------

Co-authored-by: Your name <you@example.com>
Co-authored-by: pf <pf@me>
Co-authored-by: Adrià Arrufat <1671644+arrufat@users.noreply.github.com>
pull/2806/head
pfeatherstone 1 year ago committed by GitHub
parent b86871241b
commit eccb9675ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1627,7 +1627,7 @@ namespace dlib
inline void load_frame(image_type& image, const std::string& file_name)
{
if (!demuxer({file_name, video_enabled, audio_disabled}).read(image))
throw error("ffmpeg::load_frame: error while loading " + file_name);
throw error(EIMAGE_LOAD, "ffmpeg::load_frame: error while loading " + file_name);
}
// ---------------------------------------------------------------------------------------------------

@ -480,6 +480,47 @@ namespace dlib { namespace ffmpeg { namespace details
}
}
// ---------------------------------------------------------------------------------------------------
inline AVCodecID pick_codec_from_filename(const std::string& filename)
{
const auto ext_pos = filename.find_last_of(".");
if (ext_pos != std::string::npos)
{
const std::string ext = filename.substr(ext_pos + 1);
if (ext == "png" || ext == "PNG")
return AV_CODEC_ID_PNG;
else if (ext == "jpeg" || ext == "jpg" || ext == "JPEG")
return AV_CODEC_ID_MJPEG;
else if (ext == "tiff")
return AV_CODEC_ID_TIFF;
else if (ext == "webp")
return AV_CODEC_ID_WEBP;
else if (ext == "bmp")
return AV_CODEC_ID_BMP;
else if (ext == "h264")
return AV_CODEC_ID_H264;
else if (ext == "h265" || ext == "hevc")
return AV_CODEC_ID_H265;
else if (ext == "aac")
return AV_CODEC_ID_AAC;
else if (ext == "ac3")
return AV_CODEC_ID_AC3;
else if (ext == "jls")
return AV_CODEC_ID_JPEGLS;
else if (ext == "jp2")
return AV_CODEC_ID_JPEG2000;
#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 37, 100)
else if (ext == "jxl")
return AV_CODEC_ID_JPEGXL;
#endif
}
return AV_CODEC_ID_NONE;
}
// ---------------------------------------------------------------------------------------------------
}}}

@ -634,6 +634,24 @@ namespace dlib
} st;
};
// ---------------------------------------------------------------------------------------------------
template <
class image_type,
is_image_check<image_type> = true
>
void save_frame(
const image_type& image,
const std::string& file_name,
const std::unordered_map<std::string, std::string>& codec_options = {}
);
/*!
requires
- image_type must be a type conforming to the generic image interface.
ensures
- encodes the image into the file pointed by file_name using options described in codec_options.
!*/
// ---------------------------------------------------------------------------------------------------
}
@ -822,10 +840,64 @@ namespace dlib
}
#endif
}
}
// ---------------------------------------------------------------------------------------------------
inline bool check_codecs (
const bool is_video,
const std::string& filename,
const AVOutputFormat* oformat,
encoder::args& args
)
{
// Check the codec is supported by this muxer
const auto supported_codecs = list_codecs_for_muxer(oformat);
const auto codec_supported = [&](AVCodecID id, const std::string& name)
{
return std::find_if(begin(supported_codecs), end(supported_codecs), [&](const auto& supported) {
return id != AV_CODEC_ID_NONE ? id == supported.codec_id : name == supported.codec_name;
}) != end(supported_codecs);
};
if (codec_supported(args.args_codec.codec, args.args_codec.codec_name))
return true;
logger_dlib_wrapper() << LWARN
<< "Codec " << avcodec_get_name(args.args_codec.codec) << " or " << args.args_codec.codec_name
<< " cannot be stored in this file";
// Pick codec based on file extension
args.args_codec.codec = pick_codec_from_filename(filename);
if (codec_supported(args.args_codec.codec, ""))
{
logger_dlib_wrapper() << LWARN << "Picking codec " << avcodec_get_name(args.args_codec.codec);
return true;
}
// Pick the default codec as suggested by FFmpeg
args.args_codec.codec = is_video ? oformat->video_codec : oformat->audio_codec;
if (args.args_codec.codec != AV_CODEC_ID_NONE)
{
logger_dlib_wrapper() << LWARN << "Picking default codec " << avcodec_get_name(args.args_codec.codec);
return true;
}
logger_dlib_wrapper() << LWARN
<< "List of supported codecs for muxer " << oformat->name << " in this installation of ffmpeg:";
for (const auto& supported : supported_codecs)
logger_dlib_wrapper() << LWARN << " " << supported.codec_name;
return false;
}
// ---------------------------------------------------------------------------------------------------
}
inline encoder::encoder(
const args& a
) : args_(a)
@ -1188,37 +1260,8 @@ namespace dlib
if (st.pFormatCtx->oformat->flags & AVFMT_GLOBALHEADER)
args.args_codec.flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
// Before we create the encoder, check the codec is supported by this muxer
const auto supported_codecs = list_codecs_for_muxer(st.pFormatCtx->oformat);
if (std::find_if(begin(supported_codecs), end(supported_codecs), [&](const auto& supported) {
return args.args_codec.codec != AV_CODEC_ID_NONE ?
supported.codec_id == args.args_codec.codec :
supported.codec_name == args.args_codec.codec_name;
}) == end(supported_codecs))
{
logger_dlib_wrapper() << LWARN
<< "Codec " << avcodec_get_name(args.args_codec.codec) << " or " << args.args_codec.codec_name
<< " cannot be stored in this file";
args.args_codec.codec = is_video ? st.pFormatCtx->oformat->video_codec :
st.pFormatCtx->oformat->audio_codec;
if (args.args_codec.codec != AV_CODEC_ID_NONE)
{
logger_dlib_wrapper() << LWARN
<< "Picking default codec " << avcodec_get_name(args.args_codec.codec);
}
else
{
logger_dlib_wrapper() << LWARN
<< "List of supported codecs for muxer " << st.pFormatCtx->oformat->name << " in this installation of ffmpeg:";
for (const auto& supported : supported_codecs)
logger_dlib_wrapper() << LWARN << " " << supported.codec_name;
return false;
}
}
if (!check_codecs(is_video, st.args_.filepath, st.pFormatCtx->oformat, args))
return false;
// Codec is supported by muxer, so create encoder
enc = encoder(args);
@ -1381,6 +1424,36 @@ namespace dlib
inline AVCodecID muxer::get_audio_codec_id() const noexcept { return st.encoder_audio.get_codec_id(); }
inline std::string muxer::get_audio_codec_name() const noexcept { return st.encoder_audio.get_codec_name(); }
// ---------------------------------------------------------------------------------------------------
template <
class image_type,
is_image_check<image_type>
>
inline void save_frame(
const image_type& image,
const std::string& file_name,
const std::unordered_map<std::string, std::string>& codec_options
)
{
muxer writer([&] {
muxer::args args;
args.filepath = file_name;
args.enable_image = true;
args.enable_audio = false;
args.args_image.h = num_rows(image);
args.args_image.w = num_columns(image);
args.args_image.framerate = 1;
args.args_image.fmt = pix_traits<pixel_type_t<image_type>>::fmt;
args.args_image.codec_options = codec_options;
args.format_options["update"] = "1";
return args;
}());
if (!writer.push(image))
throw error(EIMAGE_SAVE, "ffmpeg::save_frame: error while saving " + file_name);
}
// ---------------------------------------------------------------------------------------------------
}

@ -146,6 +146,21 @@ namespace
DLIB_TEST_MSG(similarity > 25.0, "psnr " << similarity);
}
template<class pixel_type>
void test_load_save_frame(const std::string& filename)
{
matrix<pixel_type> img1, img2;
img1 = get_random_image<pixel_type>();
save_frame(img1, filename, {{"qmin", "1"}, {"qmax", "1"}});
load_frame(img2, filename);
DLIB_TEST(img1.nr() == img2.nr());
DLIB_TEST(img1.nc() == img2.nc());
const double similarity = psnr(img1, img2);
DLIB_TEST_MSG(similarity > 20.0, "psnr " << similarity);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
// DECODER
@ -784,6 +799,13 @@ namespace
DLIB_TEST(samples <= (nsamples + rate));
}
const auto codec_supported = [](const AVCodecID id)
{
return std::find_if(begin(list_codecs()), end(list_codecs()), [=](const auto& supported) {
return supported.codec_id == id && supported.supports_encoding;
}) != end(list_codecs());
};
class video_tester : public tester
{
public:
@ -802,6 +824,30 @@ namespace
test_frame<bgr_pixel>();
test_frame<rgb_alpha_pixel>();
test_frame<bgr_alpha_pixel>();
if (codec_supported(AV_CODEC_ID_PNG))
{
test_load_save_frame<rgb_pixel>("dummy.png");
test_load_save_frame<bgr_pixel>("dummy.png");
}
if (codec_supported(AV_CODEC_ID_MJPEG))
{
test_load_save_frame<rgb_pixel>("dummy.jpg");
test_load_save_frame<bgr_pixel>("dummy.jpg");
}
if (codec_supported(AV_CODEC_ID_BMP))
{
test_load_save_frame<rgb_pixel>("dummy.bmp");
test_load_save_frame<bgr_pixel>("dummy.bmp");
}
if (codec_supported(AV_CODEC_ID_TIFF))
{
test_load_save_frame<rgb_pixel>("dummy.tiff");
test_load_save_frame<bgr_pixel>("dummy.tiff");
}
}
dlib::file f(DLIB_FFMPEG_DATA);

@ -40,7 +40,7 @@ try
const std::string filename = get_option(parser, "i", "");
const std::string device = get_option(parser, "o", "hw:0,0");
const std::string codec = get_option(parser, "codec", "pcm_s16le");
const std::string codec = get_option(parser, "codec", "");
if (device.empty())
{
@ -65,7 +65,7 @@ try
args.enable_image = false;
args.args_audio.codec_name = codec;
args.args_audio.sample_rate = 44100;
args.args_audio.channel_layout = cap.channel_layout();
args.args_audio.channel_layout = AV_CH_LAYOUT_STEREO;
args.args_audio.fmt = cap.sample_fmt();
return args;
}());

Loading…
Cancel
Save