| /* |
| * Copyright (c) 2019 Guo Yejun |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| /** |
| * @file |
| * implementing a generic image processing filter using deep learning networks. |
| */ |
| |
| #include "libavformat/avio.h" |
| #include "libavutil/opt.h" |
| #include "libavutil/pixdesc.h" |
| #include "libavutil/avassert.h" |
| #include "libavutil/imgutils.h" |
| #include "avfilter.h" |
| #include "dnn_interface.h" |
| #include "formats.h" |
| #include "internal.h" |
| #include "libswscale/swscale.h" |
| |
| typedef struct DnnProcessingContext { |
| const AVClass *class; |
| |
| char *model_filename; |
| DNNBackendType backend_type; |
| char *model_inputname; |
| char *model_outputname; |
| char *backend_options; |
| |
| DNNModule *dnn_module; |
| DNNModel *model; |
| |
| struct SwsContext *sws_uv_scale; |
| int sws_uv_height; |
| } DnnProcessingContext; |
| |
| #define OFFSET(x) offsetof(DnnProcessingContext, x) |
| #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM |
| static const AVOption dnn_processing_options[] = { |
| { "dnn_backend", "DNN backend", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS, "backend" }, |
| { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" }, |
| #if (CONFIG_LIBTENSORFLOW == 1) |
| { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, |
| #endif |
| #if (CONFIG_LIBOPENVINO == 1) |
| { "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, 0, 0, FLAGS, "backend" }, |
| #endif |
| { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, |
| { "input", "input name of the model", OFFSET(model_inputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, |
| { "output", "output name of the model", OFFSET(model_outputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, |
| { "options", "backend options", OFFSET(backend_options), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, |
| { NULL } |
| }; |
| |
| AVFILTER_DEFINE_CLASS(dnn_processing); |
| |
| static av_cold int init(AVFilterContext *context) |
| { |
| DnnProcessingContext *ctx = context->priv; |
| |
| if (!ctx->model_filename) { |
| av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n"); |
| return AVERROR(EINVAL); |
| } |
| if (!ctx->model_inputname) { |
| av_log(ctx, AV_LOG_ERROR, "input name of the model network is not specified\n"); |
| return AVERROR(EINVAL); |
| } |
| if (!ctx->model_outputname) { |
| av_log(ctx, AV_LOG_ERROR, "output name of the model network is not specified\n"); |
| return AVERROR(EINVAL); |
| } |
| |
| ctx->dnn_module = ff_get_dnn_module(ctx->backend_type); |
| if (!ctx->dnn_module) { |
| av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n"); |
| return AVERROR(ENOMEM); |
| } |
| if (!ctx->dnn_module->load_model) { |
| av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n"); |
| return AVERROR(EINVAL); |
| } |
| |
| ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, ctx->backend_options, ctx); |
| if (!ctx->model) { |
| av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); |
| return AVERROR(EINVAL); |
| } |
| |
| return 0; |
| } |
| |
| static int query_formats(AVFilterContext *context) |
| { |
| static const enum AVPixelFormat pix_fmts[] = { |
| AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, |
| AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32, |
| AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, |
| AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, |
| AV_PIX_FMT_NONE |
| }; |
| AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); |
| return ff_set_common_formats(context, fmts_list); |
| } |
| |
| #define LOG_FORMAT_CHANNEL_MISMATCH() \ |
| av_log(ctx, AV_LOG_ERROR, \ |
| "the frame's format %s does not match " \ |
| "the model input channel %d\n", \ |
| av_get_pix_fmt_name(fmt), \ |
| model_input->channels); |
| |
| static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink) |
| { |
| AVFilterContext *ctx = inlink->dst; |
| enum AVPixelFormat fmt = inlink->format; |
| |
| // the design is to add explicit scale filter before this filter |
| if (model_input->height != -1 && model_input->height != inlink->h) { |
| av_log(ctx, AV_LOG_ERROR, "the model requires frame height %d but got %d\n", |
| model_input->height, inlink->h); |
| return AVERROR(EIO); |
| } |
| if (model_input->width != -1 && model_input->width != inlink->w) { |
| av_log(ctx, AV_LOG_ERROR, "the model requires frame width %d but got %d\n", |
| model_input->width, inlink->w); |
| return AVERROR(EIO); |
| } |
| if (model_input->dt != DNN_FLOAT) { |
| av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32.\n"); |
| return AVERROR(EIO); |
| } |
| |
| switch (fmt) { |
| case AV_PIX_FMT_RGB24: |
| case AV_PIX_FMT_BGR24: |
| if (model_input->channels != 3) { |
| LOG_FORMAT_CHANNEL_MISMATCH(); |
| return AVERROR(EIO); |
| } |
| return 0; |
| case AV_PIX_FMT_GRAYF32: |
| case AV_PIX_FMT_YUV420P: |
| case AV_PIX_FMT_YUV422P: |
| case AV_PIX_FMT_YUV444P: |
| case AV_PIX_FMT_YUV410P: |
| case AV_PIX_FMT_YUV411P: |
| if (model_input->channels != 1) { |
| LOG_FORMAT_CHANNEL_MISMATCH(); |
| return AVERROR(EIO); |
| } |
| return 0; |
| default: |
| av_log(ctx, AV_LOG_ERROR, "%s not supported.\n", av_get_pix_fmt_name(fmt)); |
| return AVERROR(EIO); |
| } |
| |
| return 0; |
| } |
| |
| static int config_input(AVFilterLink *inlink) |
| { |
| AVFilterContext *context = inlink->dst; |
| DnnProcessingContext *ctx = context->priv; |
| DNNReturnType result; |
| DNNData model_input; |
| int check; |
| |
| result = ctx->model->get_input(ctx->model->model, &model_input, ctx->model_inputname); |
| if (result != DNN_SUCCESS) { |
| av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n"); |
| return AVERROR(EIO); |
| } |
| |
| check = check_modelinput_inlink(&model_input, inlink); |
| if (check != 0) { |
| return check; |
| } |
| |
| return 0; |
| } |
| |
| static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt) |
| { |
| const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); |
| av_assert0(desc); |
| return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3; |
| } |
| |
| static int prepare_uv_scale(AVFilterLink *outlink) |
| { |
| AVFilterContext *context = outlink->src; |
| DnnProcessingContext *ctx = context->priv; |
| AVFilterLink *inlink = context->inputs[0]; |
| enum AVPixelFormat fmt = inlink->format; |
| |
| if (isPlanarYUV(fmt)) { |
| if (inlink->w != outlink->w || inlink->h != outlink->h) { |
| const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); |
| int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); |
| int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); |
| int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h); |
| int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w); |
| ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8, |
| sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8, |
| SWS_BICUBIC, NULL, NULL, NULL); |
| ctx->sws_uv_height = sws_src_h; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static int config_output(AVFilterLink *outlink) |
| { |
| AVFilterContext *context = outlink->src; |
| DnnProcessingContext *ctx = context->priv; |
| DNNReturnType result; |
| AVFilterLink *inlink = context->inputs[0]; |
| |
| // have a try run in case that the dnn model resize the frame |
| result = ctx->model->get_output(ctx->model->model, ctx->model_inputname, inlink->w, inlink->h, |
| ctx->model_outputname, &outlink->w, &outlink->h); |
| if (result != DNN_SUCCESS) { |
| av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n"); |
| return AVERROR(EIO); |
| } |
| |
| prepare_uv_scale(outlink); |
| |
| return 0; |
| } |
| |
| static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in) |
| { |
| const AVPixFmtDescriptor *desc; |
| int uv_height; |
| |
| if (!ctx->sws_uv_scale) { |
| av_assert0(in->height == out->height && in->width == out->width); |
| desc = av_pix_fmt_desc_get(in->format); |
| uv_height = AV_CEIL_RSHIFT(in->height, desc->log2_chroma_h); |
| for (int i = 1; i < 3; ++i) { |
| int bytewidth = av_image_get_linesize(in->format, in->width, i); |
| av_image_copy_plane(out->data[i], out->linesize[i], |
| in->data[i], in->linesize[i], |
| bytewidth, uv_height); |
| } |
| } else { |
| sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1, |
| 0, ctx->sws_uv_height, out->data + 1, out->linesize + 1); |
| sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), in->linesize + 2, |
| 0, ctx->sws_uv_height, out->data + 2, out->linesize + 2); |
| } |
| |
| return 0; |
| } |
| |
| static int filter_frame(AVFilterLink *inlink, AVFrame *in) |
| { |
| AVFilterContext *context = inlink->dst; |
| AVFilterLink *outlink = context->outputs[0]; |
| DnnProcessingContext *ctx = context->priv; |
| DNNReturnType dnn_result; |
| AVFrame *out; |
| |
| out = ff_get_video_buffer(outlink, outlink->w, outlink->h); |
| if (!out) { |
| av_frame_free(&in); |
| return AVERROR(ENOMEM); |
| } |
| av_frame_copy_props(out, in); |
| |
| dnn_result = (ctx->dnn_module->execute_model)(ctx->model, ctx->model_inputname, in, |
| (const char **)&ctx->model_outputname, 1, out); |
| if (dnn_result != DNN_SUCCESS){ |
| av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); |
| av_frame_free(&in); |
| av_frame_free(&out); |
| return AVERROR(EIO); |
| } |
| |
| if (isPlanarYUV(in->format)) |
| copy_uv_planes(ctx, out, in); |
| |
| av_frame_free(&in); |
| return ff_filter_frame(outlink, out); |
| } |
| |
| static av_cold void uninit(AVFilterContext *ctx) |
| { |
| DnnProcessingContext *context = ctx->priv; |
| |
| sws_freeContext(context->sws_uv_scale); |
| |
| if (context->dnn_module) |
| (context->dnn_module->free_model)(&context->model); |
| |
| av_freep(&context->dnn_module); |
| } |
| |
| static const AVFilterPad dnn_processing_inputs[] = { |
| { |
| .name = "default", |
| .type = AVMEDIA_TYPE_VIDEO, |
| .config_props = config_input, |
| .filter_frame = filter_frame, |
| }, |
| { NULL } |
| }; |
| |
| static const AVFilterPad dnn_processing_outputs[] = { |
| { |
| .name = "default", |
| .type = AVMEDIA_TYPE_VIDEO, |
| .config_props = config_output, |
| }, |
| { NULL } |
| }; |
| |
| AVFilter ff_vf_dnn_processing = { |
| .name = "dnn_processing", |
| .description = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."), |
| .priv_size = sizeof(DnnProcessingContext), |
| .init = init, |
| .uninit = uninit, |
| .query_formats = query_formats, |
| .inputs = dnn_processing_inputs, |
| .outputs = dnn_processing_outputs, |
| .priv_class = &dnn_processing_class, |
| }; |