blob: b7db5bb27f7fdae5e30c03def066999947dd36e0 [file] [log] [blame]
/*
* Copyright (C) 2011, Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
#include "third_party/blink/renderer/modules/webaudio/wave_shaper_dsp_kernel.h"
#include <algorithm>
#include <memory>
#include "build/build_config.h"
#include "third_party/blink/renderer/platform/audio/audio_utilities.h"
#include "third_party/blink/renderer/platform/audio/vector_math.h"
#include "third_party/blink/renderer/platform/wtf/threading.h"
namespace blink {
WaveShaperDSPKernel::WaveShaperDSPKernel(WaveShaperProcessor* processor)
: AudioDSPKernel(processor),
tail_time_(0),
// 4 times render size to handle 4x oversampling.
virtual_index_(4 * audio_utilities::kRenderQuantumFrames),
index_(4 * audio_utilities::kRenderQuantumFrames),
v1_(4 * audio_utilities::kRenderQuantumFrames),
v2_(4 * audio_utilities::kRenderQuantumFrames),
f_(4 * audio_utilities::kRenderQuantumFrames) {
if (processor->Oversample() != WaveShaperProcessor::kOverSampleNone)
LazyInitializeOversampling();
}
void WaveShaperDSPKernel::LazyInitializeOversampling() {
if (!temp_buffer_) {
temp_buffer_ = std::make_unique<AudioFloatArray>(
audio_utilities::kRenderQuantumFrames * 2);
temp_buffer2_ = std::make_unique<AudioFloatArray>(
audio_utilities::kRenderQuantumFrames * 4);
up_sampler_ =
std::make_unique<UpSampler>(audio_utilities::kRenderQuantumFrames);
down_sampler_ = std::make_unique<DownSampler>(
audio_utilities::kRenderQuantumFrames * 2);
up_sampler2_ =
std::make_unique<UpSampler>(audio_utilities::kRenderQuantumFrames * 2);
down_sampler2_ = std::make_unique<DownSampler>(
audio_utilities::kRenderQuantumFrames * 4);
}
}
void WaveShaperDSPKernel::Process(const float* source,
float* destination,
uint32_t frames_to_process) {
switch (GetWaveShaperProcessor()->Oversample()) {
case WaveShaperProcessor::kOverSampleNone:
ProcessCurve(source, destination, frames_to_process);
break;
case WaveShaperProcessor::kOverSample2x:
ProcessCurve2x(source, destination, frames_to_process);
break;
case WaveShaperProcessor::kOverSample4x:
ProcessCurve4x(source, destination, frames_to_process);
break;
default:
NOTREACHED();
}
}
double WaveShaperDSPKernel::WaveShaperCurveValue(float input,
const float* curve_data,
int curve_length) const {
// Calculate a virtual index based on input -1 -> +1 with -1 being curve[0],
// +1 being curve[curveLength - 1], and 0 being at the center of the curve
// data. Then linearly interpolate between the two points in the curve.
double virtual_index = 0.5 * (input + 1) * (curve_length - 1);
double output;
if (virtual_index < 0) {
// input < -1, so use curve[0]
output = curve_data[0];
} else if (virtual_index >= curve_length - 1) {
// input >= 1, so use last curve value
output = curve_data[curve_length - 1];
} else {
// The general case where -1 <= input < 1, where 0 <= virtualIndex <
// curveLength - 1, so interpolate between the nearest samples on the
// curve.
unsigned index1 = static_cast<unsigned>(virtual_index);
unsigned index2 = index1 + 1;
double interpolation_factor = virtual_index - index1;
double value1 = curve_data[index1];
double value2 = curve_data[index2];
output =
(1.0 - interpolation_factor) * value1 + interpolation_factor * value2;
}
return output;
}
void WaveShaperDSPKernel::WaveShaperCurveValues(float* destination,
const float* source,
uint32_t frames_to_process,
const float* curve_data,
int curve_length) const {
DCHECK_LE(frames_to_process, virtual_index_.size());
// Index into the array computed from the source value.
float* virtual_index = virtual_index_.Data();
// virtual_index[k] =
// clampTo(0.5 * (source[k] + 1) * (curve_length - 1),
// 0.0f,
// static_cast<float>(curve_length - 1))
// Add 1 to source puttting result in virtual_index
vector_math::Vsadd(source, 1, 1, virtual_index, 1, frames_to_process);
// Scale virtual_index in place by (curve_lenth -1)/2
vector_math::Vsmul(virtual_index, 1, 0.5 * (curve_length - 1), virtual_index,
1, frames_to_process);
// Clip virtual_index, in place.
vector_math::Vclip(virtual_index, 1, 0, curve_length - 1, virtual_index, 1,
frames_to_process);
// index = floor(virtual_index)
DCHECK_LE(frames_to_process, index_.size());
float* index = index_.Data();
// v1 and v2 hold the curve_data corresponding to the closest curve
// values to the source sample. To save memory, v1 will use the
// destination array.
DCHECK_LE(frames_to_process, v1_.size());
DCHECK_LE(frames_to_process, v2_.size());
float* v1 = v1_.Data();
float* v2 = v2_.Data();
// Interpolation factor: virtual_index - index.
DCHECK_LE(frames_to_process, f_.size());
float* f = f_.Data();
int max_index = curve_length - 1;
unsigned k = 0;
#if defined(ARCH_CPU_X86_FAMILY)
{
int loop_limit = frames_to_process / 4;
// one = 1
__m128i one = _mm_set1_epi32(1);
// Do 4 eleemnts at a time
for (int loop = 0; loop < loop_limit; ++loop, k += 4) {
// v = virtual_index[k]
__m128 v = _mm_loadu_ps(virtual_index + k);
// index1 = static_cast<int>(v);
__m128i index1 = _mm_cvttps_epi32(v);
// v = static_cast<float>(index1) and save result to index[k:k+3]
v = _mm_cvtepi32_ps(index1);
_mm_storeu_ps(&index[k], v);
// index2 = index2 + 1;
__m128i index2 = _mm_add_epi32(index1, one);
// Convert index1/index2 to arrays of 32-bit int values that are our
// array indices to use to get the curve data.
int32_t* i1 = reinterpret_cast<int32_t*>(&index1);
int32_t* i2 = reinterpret_cast<int32_t*>(&index2);
// Get the curve_data values and save them in v1 and v2,
// carfully clamping the values. If the input is NaN, index1
// could be 0x8000000.
v1[k] = curve_data[clampTo(i1[0], 0, max_index)];
v2[k] = curve_data[clampTo(i2[0], 0, max_index)];
v1[k + 1] = curve_data[clampTo(i1[1], 0, max_index)];
v2[k + 1] = curve_data[clampTo(i2[1], 0, max_index)];
v1[k + 2] = curve_data[clampTo(i1[2], 0, max_index)];
v2[k + 2] = curve_data[clampTo(i2[2], 0, max_index)];
v1[k + 3] = curve_data[clampTo(i1[3], 0, max_index)];
v2[k + 3] = curve_data[clampTo(i2[3], 0, max_index)];
}
}
#elif defined(CPU_ARM_NEON)
{
int loop_limit = frames_to_process / 4;
// Neon constants:
// zero = 0
// one = 1
// max = max_index
int32x4_t zero = vdupq_n_s32(0);
int32x4_t one = vdupq_n_s32(1);
int32x4_t max = vdupq_n_s32(max_index);
for (int loop = 0; loop < loop_limit; ++loop, k += 4) {
// v = virtual_index
float32x4_t v = vld1q_f32(virtual_index + k);
// index1 = static_cast<int32_t>(v), then clamp to a valid index range for
// curve_data
int32x4_t index1 = vcvtq_s32_f32(v);
index1 = vmaxq_s32(vminq_s32(index1, max), zero);
// v = static_cast<float>(v) and save it away for later use.
v = vcvtq_f32_s32(index1);
vst1q_f32(&index[k], v);
// index2 = index1 + 1, then clamp to a valid range for curve_data.
int32x4_t index2 = vaddq_s32(index1, one);
index2 = vmaxq_s32(vminq_s32(index2, max), zero);
// Save index1/2 so we can get the individual parts. Aligned to
// 16 bytes for vst1q instruction.
int32_t i1[4] __attribute__((aligned(16)));
int32_t i2[4] __attribute__((aligned(16)));
vst1q_s32(i1, index1);
vst1q_s32(i2, index2);
// Get curve elements corresponding to the indices.
v1[k] = curve_data[i1[0]];
v2[k] = curve_data[i2[0]];
v1[k + 1] = curve_data[i1[1]];
v2[k + 1] = curve_data[i2[1]];
v1[k + 2] = curve_data[i1[2]];
v2[k + 2] = curve_data[i2[2]];
v1[k + 3] = curve_data[i1[3]];
v2[k + 3] = curve_data[i2[3]];
}
}
#endif
// Compute values for index1 and load the curve_data corresponding to indices.
for (; k < frames_to_process; ++k) {
unsigned index1 =
clampTo(static_cast<unsigned>(virtual_index[k]), 0, max_index);
unsigned index2 = clampTo(index1 + 1, 0, max_index);
index[k] = index1;
v1[k] = curve_data[index1];
v2[k] = curve_data[index2];
}
// f[k] = virtual_index[k] - index[k]
vector_math::Vsub(virtual_index, 1, index, 1, f, 1, frames_to_process);
// Do the linear interpolation of the curve data:
// destination[k] = v1[k] + f[k]*(v2[k] - v1[k])
//
// 1. v2[k] = v2[k] - v1[k]
// 2. v2[k] = f[k]*v2[k] = f[k]*(v2[k] - v1[k])
// 3. destination[k] = destination[k] + v2[k]
// = v1[k] + f[k]*(v2[k] - v1[k])
vector_math::Vsub(v2, 1, v1, 1, v2, 1, frames_to_process);
vector_math::Vmul(f, 1, v2, 1, v2, 1, frames_to_process);
vector_math::Vadd(v2, 1, v1, 1, destination, 1, frames_to_process);
}
void WaveShaperDSPKernel::ProcessCurve(const float* source,
float* destination,
uint32_t frames_to_process) {
DCHECK(source);
DCHECK(destination);
DCHECK(GetWaveShaperProcessor());
Vector<float>* curve = GetWaveShaperProcessor()->Curve();
if (!curve) {
// Act as "straight wire" pass-through if no curve is set.
memcpy(destination, source, sizeof(float) * frames_to_process);
return;
}
float* curve_data = curve->data();
int curve_length = curve->size();
DCHECK(curve_data);
if (!curve_data || !curve_length) {
memcpy(destination, source, sizeof(float) * frames_to_process);
return;
}
// Apply waveshaping curve.
WaveShaperCurveValues(destination, source, frames_to_process, curve_data,
curve_length);
}
void WaveShaperDSPKernel::ProcessCurve2x(const float* source,
float* destination,
uint32_t frames_to_process) {
DCHECK_EQ(frames_to_process, audio_utilities::kRenderQuantumFrames);
float* temp_p = temp_buffer_->Data();
up_sampler_->Process(source, temp_p, frames_to_process);
// Process at 2x up-sampled rate.
ProcessCurve(temp_p, temp_p, frames_to_process * 2);
down_sampler_->Process(temp_p, destination, frames_to_process * 2);
}
void WaveShaperDSPKernel::ProcessCurve4x(const float* source,
float* destination,
uint32_t frames_to_process) {
DCHECK_EQ(frames_to_process, audio_utilities::kRenderQuantumFrames);
float* temp_p = temp_buffer_->Data();
float* temp_p2 = temp_buffer2_->Data();
up_sampler_->Process(source, temp_p, frames_to_process);
up_sampler2_->Process(temp_p, temp_p2, frames_to_process * 2);
// Process at 4x up-sampled rate.
ProcessCurve(temp_p2, temp_p2, frames_to_process * 4);
down_sampler2_->Process(temp_p2, temp_p, frames_to_process * 4);
down_sampler_->Process(temp_p, destination, frames_to_process * 2);
}
void WaveShaperDSPKernel::Reset() {
if (up_sampler_) {
up_sampler_->Reset();
down_sampler_->Reset();
up_sampler2_->Reset();
down_sampler2_->Reset();
}
}
bool WaveShaperDSPKernel::RequiresTailProcessing() const {
// Always return true even if the tail time and latency might both be zero.
return true;
}
double WaveShaperDSPKernel::TailTime() const {
return tail_time_;
}
double WaveShaperDSPKernel::LatencyTime() const {
size_t latency_frames = 0;
WaveShaperDSPKernel* kernel = const_cast<WaveShaperDSPKernel*>(this);
switch (kernel->GetWaveShaperProcessor()->Oversample()) {
case WaveShaperProcessor::kOverSampleNone:
break;
case WaveShaperProcessor::kOverSample2x:
latency_frames += up_sampler_->LatencyFrames();
latency_frames += down_sampler_->LatencyFrames();
break;
case WaveShaperProcessor::kOverSample4x: {
// Account for first stage upsampling.
latency_frames += up_sampler_->LatencyFrames();
latency_frames += down_sampler_->LatencyFrames();
// Account for second stage upsampling.
// and divide by 2 to get back down to the regular sample-rate.
size_t latency_frames2 =
(up_sampler2_->LatencyFrames() + down_sampler2_->LatencyFrames()) / 2;
latency_frames += latency_frames2;
break;
}
default:
NOTREACHED();
}
return static_cast<double>(latency_frames) / SampleRate();
}
} // namespace blink