libavfilter/af_volumedetect.c - manifest_repos/ffmpeg - Git at Google

 /*
  * Copyright (c) 2012 Nicolas George
  *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
  * as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public License
  * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */

 #include "libavutil/channel_layout.h"
 #include "libavutil/avassert.h"
 #include "audio.h"
 #include "avfilter.h"
 #include "internal.h"

 typedef struct VolDetectContext {
     /**
      * Number of samples at each PCM value.
      * histogram[0x8000 + i] is the number of samples at value i.
      * The extra element is there for symmetry.
      */
     uint64_t histogram[0x10001];
 } VolDetectContext;

 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVSampleFormat sample_fmts[] = {
         AV_SAMPLE_FMT_S16,
         AV_SAMPLE_FMT_S16P,
         AV_SAMPLE_FMT_NONE
     };
     AVFilterFormats *formats;
     AVFilterChannelLayouts *layouts;
     int ret;

     if (!(formats = ff_make_format_list(sample_fmts)))
         return AVERROR(ENOMEM);

     layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
     if (ret < 0)
         return ret;

     return ff_set_common_formats(ctx, formats);
 }

 static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
 {
     AVFilterContext *ctx = inlink->dst;
     VolDetectContext *vd = ctx->priv;
     int nb_samples  = samples->nb_samples;
     int nb_channels = samples->channels;
     int nb_planes   = nb_channels;
     int plane, i;
     int16_t *pcm;

     if (!av_sample_fmt_is_planar(samples->format)) {
         nb_samples *= nb_channels;
         nb_planes = 1;
     }
     for (plane = 0; plane < nb_planes; plane++) {
         pcm = (int16_t *)samples->extended_data[plane];
         for (i = 0; i < nb_samples; i++)
             vd->histogram[pcm[i] + 0x8000]++;
     }

     return ff_filter_frame(inlink->dst->outputs[0], samples);
 }

 #define MAX_DB 91

 static inline double logdb(uint64_t v)
 {
     double d = v / (double)(0x8000 * 0x8000);
     if (!v)
         return MAX_DB;
     return -log10(d) * 10;
 }

 static void print_stats(AVFilterContext *ctx)
 {
     VolDetectContext *vd = ctx->priv;
     int i, max_volume, shift;
     uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
     uint64_t histdb[MAX_DB + 1] = { 0 };

     for (i = 0; i < 0x10000; i++)
         nb_samples += vd->histogram[i];
     av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
     if (!nb_samples)
         return;

     /* If nb_samples > 1<<34, there is a risk of overflow in the
        multiplication or the sum: shift all histogram values to avoid that.
        The total number of samples must be recomputed to avoid rounding
        errors. */
     shift = av_log2(nb_samples >> 33);
     for (i = 0; i < 0x10000; i++) {
         nb_samples_shift += vd->histogram[i] >> shift;
         power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
     }
     if (!nb_samples_shift)
         return;
     power = (power + nb_samples_shift / 2) / nb_samples_shift;
     av_assert0(power <= 0x8000 * 0x8000);
     av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));

     max_volume = 0x8000;
     while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
                              !vd->histogram[0x8000 - max_volume])
         max_volume--;
     av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));

     for (i = 0; i < 0x10000; i++)
         histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
     for (i = 0; i <= MAX_DB && !histdb[i]; i++);
     for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
         av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
         sum += histdb[i];
     }
 }

 static av_cold void uninit(AVFilterContext *ctx)
 {
     print_stats(ctx);
 }

 static const AVFilterPad volumedetect_inputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_AUDIO,
         .filter_frame = filter_frame,
     },
     { NULL }
 };

 static const AVFilterPad volumedetect_outputs[] = {
     {
         .name = "default",
         .type = AVMEDIA_TYPE_AUDIO,
     },
     { NULL }
 };

 AVFilter ff_af_volumedetect = {
     .name          = "volumedetect",
     .description   = NULL_IF_CONFIG_SMALL("Detect audio volume."),
     .priv_size     = sizeof(VolDetectContext),
     .query_formats = query_formats,
     .uninit        = uninit,
     .inputs        = volumedetect_inputs,
     .outputs       = volumedetect_outputs,
 };
	/*
	* Copyright (c) 2012 Nicolas George
	*
	* This file is part of FFmpeg.
	*
	* FFmpeg is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public License
	* as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* FFmpeg is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public License
	* along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	#include "libavutil/channel_layout.h"
	#include "libavutil/avassert.h"
	#include "audio.h"
	#include "avfilter.h"
	#include "internal.h"

	typedef struct VolDetectContext {
	/**
	* Number of samples at each PCM value.
	* histogram[0x8000 + i] is the number of samples at value i.
	* The extra element is there for symmetry.
	*/
	uint64_t histogram[0x10001];
	} VolDetectContext;

	static int query_formats(AVFilterContext *ctx)
	{
	static const enum AVSampleFormat sample_fmts[] = {
	AV_SAMPLE_FMT_S16,
	AV_SAMPLE_FMT_S16P,
	AV_SAMPLE_FMT_NONE
	};
	AVFilterFormats *formats;
	AVFilterChannelLayouts *layouts;
	int ret;

	if (!(formats = ff_make_format_list(sample_fmts)))
	return AVERROR(ENOMEM);

	layouts = ff_all_channel_counts();
	if (!layouts)
	return AVERROR(ENOMEM);
	ret = ff_set_common_channel_layouts(ctx, layouts);
	if (ret < 0)
	return ret;

	return ff_set_common_formats(ctx, formats);
	}

	static int filter_frame(AVFilterLink inlink, AVFrame samples)
	{
	AVFilterContext *ctx = inlink->dst;
	VolDetectContext *vd = ctx->priv;
	int nb_samples = samples->nb_samples;
	int nb_channels = samples->channels;
	int nb_planes = nb_channels;
	int plane, i;
	int16_t *pcm;

	if (!av_sample_fmt_is_planar(samples->format)) {
	nb_samples *= nb_channels;
	nb_planes = 1;
	}
	for (plane = 0; plane < nb_planes; plane++) {
	pcm = (int16_t *)samples->extended_data[plane];
	for (i = 0; i < nb_samples; i++)
	vd->histogram[pcm[i] + 0x8000]++;
	}

	return ff_filter_frame(inlink->dst->outputs[0], samples);
	}

	#define MAX_DB 91

	static inline double logdb(uint64_t v)
	{
	double d = v / (double)(0x8000 * 0x8000);
	if (!v)
	return MAX_DB;
	return -log10(d) * 10;
	}

	static void print_stats(AVFilterContext *ctx)
	{
	VolDetectContext *vd = ctx->priv;
	int i, max_volume, shift;
	uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
	uint64_t histdb[MAX_DB + 1] = { 0 };

	for (i = 0; i < 0x10000; i++)
	nb_samples += vd->histogram[i];
	av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
	if (!nb_samples)
	return;

	/* If nb_samples > 1<<34, there is a risk of overflow in the
	multiplication or the sum: shift all histogram values to avoid that.
	The total number of samples must be recomputed to avoid rounding
	errors. */
	shift = av_log2(nb_samples >> 33);
	for (i = 0; i < 0x10000; i++) {
	nb_samples_shift += vd->histogram[i] >> shift;
	power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
	}
	if (!nb_samples_shift)
	return;
	power = (power + nb_samples_shift / 2) / nb_samples_shift;
	av_assert0(power <= 0x8000 * 0x8000);
	av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));

	max_volume = 0x8000;
	while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
	!vd->histogram[0x8000 - max_volume])
	max_volume--;
	av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));

	for (i = 0; i < 0x10000; i++)
	histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
	for (i = 0; i <= MAX_DB && !histdb[i]; i++);
	for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
	av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
	sum += histdb[i];
	}
	}

	static av_cold void uninit(AVFilterContext *ctx)
	{
	print_stats(ctx);
	}

	static const AVFilterPad volumedetect_inputs[] = {
	{
	.name = "default",
	.type = AVMEDIA_TYPE_AUDIO,
	.filter_frame = filter_frame,
	},
	{ NULL }
	};

	static const AVFilterPad volumedetect_outputs[] = {
	{
	.name = "default",
	.type = AVMEDIA_TYPE_AUDIO,
	},
	{ NULL }
	};

	AVFilter ff_af_volumedetect = {
	.name = "volumedetect",
	.description = NULL_IF_CONFIG_SMALL("Detect audio volume."),
	.priv_size = sizeof(VolDetectContext),
	.query_formats = query_formats,
	.uninit = uninit,
	.inputs = volumedetect_inputs,
	.outputs = volumedetect_outputs,
	};