libavfilter/dnn/dnn_backend_native.h - manifest_repos/ffmpeg - Git at Google

 /*
  * Copyright (c) 2018 Sergey Lavrushkin
  *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */

 /**
  * @file
  * DNN inference functions interface for native backend.
  */


 #ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_H
 #define AVFILTER_DNN_DNN_BACKEND_NATIVE_H

 #include "../dnn_interface.h"
 #include "libavformat/avio.h"
 #include "libavutil/opt.h"

 /**
  * the enum value of DNNLayerType should not be changed,
  * the same values are used in convert_from_tensorflow.py
  * and, it is used to index the layer execution/load function pointer.
  */
 typedef enum {
     DLT_INPUT = 0,
     DLT_CONV2D = 1,
     DLT_DEPTH_TO_SPACE = 2,
     DLT_MIRROR_PAD = 3,
     DLT_MAXIMUM = 4,
     DLT_MATH_BINARY = 5,
     DLT_MATH_UNARY = 6,
     DLT_AVG_POOL = 7,
     DLT_DENSE = 8,
     DLT_COUNT
 } DNNLayerType;

 typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_OUTPUT} DNNOperandType;
 typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam;
 typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;

 typedef struct Layer{
     DNNLayerType type;
     /**
      * a layer can have multiple inputs and one output.
      * 4 is just a big enough number for input operands (increase it if necessary),
      * do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks.
      */
     int32_t input_operand_indexes[4];
     int32_t output_operand_index;
     void *params;
 } Layer;

 typedef struct DnnOperand{
     /**
      * there are two memory layouts, NHWC or NCHW, so we use dims,
      * dims[0] is Number.
      */
     int32_t dims[4];

     /**
      * input/output/intermediate operand of the network
      */
     DNNOperandType type;

     /**
      * support different kinds of data type such as float, half float, int8 etc,
      * first support float now.
      */
     DNNDataType data_type;

     /**
      * NHWC if 1, otherwise NCHW.
      * let's first support NHWC only, this flag is for extensive usage.
      */
     int8_t isNHWC;

     /**
      * to avoid possible memory leak, do not use char *name
      */
     char name[128];

     /**
      * data pointer with data length in bytes.
      * usedNumbersLeft is only valid for intermediate operand,
      * it means how many layers still depend on this operand,
      * todo: the memory can be reused when usedNumbersLeft is zero.
      */
     void *data;
     int32_t length;
     int32_t usedNumbersLeft;
 }DnnOperand;

 typedef struct InputParams{
     int height, width, channels;
 } InputParams;

 typedef struct NativeOptions{
     uint32_t conv2d_threads;
 } NativeOptions;

 typedef struct NativeContext {
     const AVClass *class;
     NativeOptions options;
 } NativeContext;

 // Represents simple feed-forward convolutional network.
 typedef struct NativeModel{
     NativeContext ctx;
     DNNModel *model;
     Layer *layers;
     int32_t layers_num;
     DnnOperand *operands;
     int32_t operands_num;
 } NativeModel;

 DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, void *userdata);

 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
                                           const char **output_names, uint32_t nb_output, AVFrame *out_frame);

 void ff_dnn_free_model_native(DNNModel **model);

 // NOTE: User must check for error (return value <= 0) to handle
 // case like integer overflow.
 int32_t calculate_operand_data_length(const DnnOperand *oprd);
 int32_t calculate_operand_dims_count(const DnnOperand *oprd);
 #endif
	/*
	* Copyright (c) 2018 Sergey Lavrushkin
	*
	* This file is part of FFmpeg.
	*
	* FFmpeg is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* FFmpeg is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with FFmpeg; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	/**
	* @file
	* DNN inference functions interface for native backend.
	*/


	#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_H
	#define AVFILTER_DNN_DNN_BACKEND_NATIVE_H

	#include "../dnn_interface.h"
	#include "libavformat/avio.h"
	#include "libavutil/opt.h"

	/**
	* the enum value of DNNLayerType should not be changed,
	* the same values are used in convert_from_tensorflow.py
	* and, it is used to index the layer execution/load function pointer.
	*/
	typedef enum {
	DLT_INPUT = 0,
	DLT_CONV2D = 1,
	DLT_DEPTH_TO_SPACE = 2,
	DLT_MIRROR_PAD = 3,
	DLT_MAXIMUM = 4,
	DLT_MATH_BINARY = 5,
	DLT_MATH_UNARY = 6,
	DLT_AVG_POOL = 7,
	DLT_DENSE = 8,
	DLT_COUNT
	} DNNLayerType;

	typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT \| DOT_OUTPUT} DNNOperandType;
	typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam;
	typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;

	typedef struct Layer{
	DNNLayerType type;
	/**
	* a layer can have multiple inputs and one output.
	* 4 is just a big enough number for input operands (increase it if necessary),
	* do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks.
	*/
	int32_t input_operand_indexes[4];
	int32_t output_operand_index;
	void *params;
	} Layer;

	typedef struct DnnOperand{
	/**
	* there are two memory layouts, NHWC or NCHW, so we use dims,
	* dims[0] is Number.
	*/
	int32_t dims[4];

	/**
	* input/output/intermediate operand of the network
	*/
	DNNOperandType type;

	/**
	* support different kinds of data type such as float, half float, int8 etc,
	* first support float now.
	*/
	DNNDataType data_type;

	/**
	* NHWC if 1, otherwise NCHW.
	* let's first support NHWC only, this flag is for extensive usage.
	*/
	int8_t isNHWC;

	/**
	* to avoid possible memory leak, do not use char *name
	*/
	char name[128];

	/**
	* data pointer with data length in bytes.
	* usedNumbersLeft is only valid for intermediate operand,
	* it means how many layers still depend on this operand,
	* todo: the memory can be reused when usedNumbersLeft is zero.
	*/
	void *data;
	int32_t length;
	int32_t usedNumbersLeft;
	}DnnOperand;

	typedef struct InputParams{
	int height, width, channels;
	} InputParams;

	typedef struct NativeOptions{
	uint32_t conv2d_threads;
	} NativeOptions;

	typedef struct NativeContext {
	const AVClass *class;
	NativeOptions options;
	} NativeContext;

	// Represents simple feed-forward convolutional network.
	typedef struct NativeModel{
	NativeContext ctx;
	DNNModel *model;
	Layer *layers;
	int32_t layers_num;
	DnnOperand *operands;
	int32_t operands_num;
	} NativeModel;

	DNNModel ff_dnn_load_model_native(const char model_filename, const char options, void userdata);

	DNNReturnType ff_dnn_execute_model_native(const DNNModel model, const char input_name, AVFrame *in_frame,
	const char *output_names, uint32_t nb_output, AVFrame out_frame);

	void ff_dnn_free_model_native(DNNModel **model);

	// NOTE: User must check for error (return value <= 0) to handle
	// case like integer overflow.
	int32_t calculate_operand_data_length(const DnnOperand *oprd);
	int32_t calculate_operand_dims_count(const DnnOperand *oprd);
	#endif