/** * VoiceLevelingProcessor — AudioWorkletProcessor that implements * broadcast-grade per-speaker automatic gain control (AGC). * * ═══════════════════════════════════════════════════════════════════ * DSP DESIGN NOTES * ═══════════════════════════════════════════════════════════════════ * * This processor mimics WebRTC's Gain Controller 2 (AGC2) behaviour * using a lightweight algorithm suitable for real-time voice in an * AudioWorklet thread. * * Pipeline (per 128-sample render quantum ≈ 2.67 ms @ 48 kHz): * * 1. RMS level estimation (short-term envelope) * 2. Silence gate (freeze gain when below noise floor) * 3. Target gain compute (desired dBFS → linear gain) * 4. Gain smoothing (exponential attack / release) * 5. Max-gain clamp (prevent runaway boost) * 6. Soft-clip limiter (prevent digital overs) * * Key properties: * • No per-frame allocation — all buffers pre-allocated. * • Synchronous processing — no message passing in hot path. * • Uses Float32 throughout — native AudioWorklet format. * • 128-sample quantum fits within 10 ms at 48 kHz (2.67 ms). * * The processor receives configuration via AudioWorkletNode.port * messages and applies them on the next render quantum. * * ═══════════════════════════════════════════════════════════════════ */ /* ──────────────────────────────────────────────────────────────── */ /* Constants */ /* ──────────────────────────────────────────────────────────────── */ /** Processor name registered with `registerProcessor`. */ const PROCESSOR_NAME = 'VoiceLevelingProcessor'; /** * Web Audio render quantum size — the number of samples processed * in each call to `process()`. The AudioWorklet spec mandates 128. */ const RENDER_QUANTUM_FRAMES = 128; /** * Minimum RMS level (linear) below which the input is considered * silence. Gain is frozen/decayed when the signal is this quiet. * Roughly −60 dBFS. */ const DEFAULT_SILENCE_THRESHOLD = 0.001; /** * The target RMS level in dBFS. −18 dBFS is a comfortable * conversational loudness for headphone listening. */ const DEFAULT_TARGET_DBFS = -18; /** Default maximum gain boost in dB. */ const DEFAULT_MAX_GAIN_DB = 12; /** Soft-clip ceiling — prevents digital overs. */ const SOFT_CLIP_THRESHOLD = 0.95; /** * Speed presets: attack and release time constants (seconds). * * Attack = how fast gain *decreases* when a loud signal arrives. * Release = how fast gain *increases* when the signal gets quieter. * * Asymmetric: fast attack prevents clipping, slow release sounds * natural and avoids "pumping". */ const SPEED_PRESETS = { slow: { attack: 0.015, release: 0.800 }, medium: { attack: 0.010, release: 0.400 }, fast: { attack: 0.005, release: 0.150 }, }; /** * AGC strength presets: scale the computed gain adjustment. * 1.0 = full correction toward target; lower = gentler leveling. */ const STRENGTH_PRESETS = { low: 0.5, medium: 0.75, high: 1.0, }; /** * When silence is detected, the gain decays toward 1.0 (unity) * at this rate (seconds). This prevents the gain from sitting at * a huge value after long silence and then blasting when speech * resumes. */ const SILENCE_DECAY_TC = 2.0; /* ──────────────────────────────────────────────────────────────── */ /* Helpers */ /* ──────────────────────────────────────────────────────────────── */ /** Convert decibels to linear gain. */ function dbToLinear(db) { return Math.pow(10, db / 20); } /** Convert linear amplitude to dBFS. Returns −Infinity for 0. */ function linearToDb(linear) { if (linear <= 0) return -Infinity; return 20 * Math.log10(linear); } /** * Compute the exponential smoothing coefficient (α) for a given * time constant and **frame rate** (not sample rate!). * * Because the envelope / gain update runs once per render quantum * (128 samples), the rate passed here must be frames-per-second * (sampleRate / 128), NOT samples-per-second. Using the raw * sampleRate would produce absurdly small α values, making the * AGC appear frozen. * * α = 1 − e^(−1 / (tc * fps)) * * Larger α → faster response. * * @param {number} tc Time constant in seconds. * @param {number} fps Frame rate (render quanta per second). * @returns {number} Smoothing coefficient (0–1). */ function timeConstantToAlpha(tc, fps) { if (tc <= 0) return 1.0; return 1.0 - Math.exp(-1.0 / (tc * fps)); } /** * Attempt to use SharedArrayBuffer for the envelope history if * the environment supports it. Falls back to a regular * Float32Array. * * @param {number} length Number of elements. * @returns {Float32Array} */ function allocateBuffer(length) { try { if (typeof SharedArrayBuffer !== 'undefined') { return new Float32Array(new SharedArrayBuffer(length * 4)); } } catch { /* fall through */ } return new Float32Array(length); } /** * Soft-clip function (tanh-based) that prevents digital overs * while preserving signal shape. * * Below the threshold the signal passes through unchanged. * Above it, tanh compression is applied symmetrically. * * @param {number} sample Input sample. * @returns {number} Clipped sample. */ function softClip(sample) { const abs = Math.abs(sample); if (abs <= SOFT_CLIP_THRESHOLD) return sample; const sign = sample >= 0 ? 1 : -1; // Map (threshold..∞) → (threshold..1) using tanh const excess = (abs - SOFT_CLIP_THRESHOLD) / (1 - SOFT_CLIP_THRESHOLD); return sign * (SOFT_CLIP_THRESHOLD + (1 - SOFT_CLIP_THRESHOLD) * Math.tanh(excess)); } /* ──────────────────────────────────────────────────────────────── */ /* Processor */ /* ──────────────────────────────────────────────────────────────── */ class VoiceLevelingProcessor extends AudioWorkletProcessor { /* ── State ──────────────────────────────────────────────────── */ /** Whether processing is enabled (bypass when false). */ _enabled = true; /** Target loudness in dBFS. */ _targetDbfs = DEFAULT_TARGET_DBFS; /** Maximum gain boost in dB. */ _maxGainDb = DEFAULT_MAX_GAIN_DB; /** Linear ceiling for the gain multiplier. */ _maxGainLinear = dbToLinear(DEFAULT_MAX_GAIN_DB); /** AGC strength factor (0–1). Scales the gain correction. */ _strength = STRENGTH_PRESETS.medium; /** Whether the silence/noise gate is active. */ _noiseGateEnabled = false; /** RMS threshold below which input is treated as silence. */ _silenceThreshold = DEFAULT_SILENCE_THRESHOLD; /** Attack smoothing coefficient. */ _alphaAttack = 0; /** Release smoothing coefficient. */ _alphaRelease = 0; /** Silence decay smoothing coefficient. */ _alphaSilenceDecay = 0; /** * Running RMS envelope (squared, to avoid sqrt every frame). * Smoothed with a one-pole filter. */ _envelopeSq = 0; /** Current applied gain (linear). Smoothed toward target. */ _currentGain = 1.0; /** * Pre-allocated buffer used for RMS computation. * Sized to the largest possible render quantum (128 samples). */ _scratchBuffer = allocateBuffer(128); /* ── Constructor ────────────────────────────────────────────── */ constructor(options) { super(options); // Compute smoothing coefficients from default speed this._applySpeed('medium'); // Listen for configuration changes from the main thread. // Messages are consumed before the next render quantum. this.port.onmessage = (event) => this._handleMessage(event.data); } /* ── Configuration ──────────────────────────────────────────── */ /** * Handle a configuration message from the main thread. * * Accepted keys: * enabled : boolean * targetDbfs : number (-30 … -12) * maxGainDb : number (3 … 20) * strength : 'low' | 'medium' | 'high' * speed : 'slow' | 'medium' | 'fast' * noiseGate : boolean * * @param {object} msg */ _handleMessage(msg) { if (msg == null || typeof msg !== 'object') return; if (typeof msg.enabled === 'boolean') { this._enabled = msg.enabled; if (!msg.enabled) { // Reset gain to unity on disable so re-enabling starts clean this._currentGain = 1.0; this._envelopeSq = 0; } } if (typeof msg.targetDbfs === 'number') { this._targetDbfs = Math.max(-30, Math.min(-12, msg.targetDbfs)); } if (typeof msg.maxGainDb === 'number') { const clamped = Math.max(3, Math.min(20, msg.maxGainDb)); this._maxGainDb = clamped; this._maxGainLinear = dbToLinear(clamped); } if (typeof msg.strength === 'string' && STRENGTH_PRESETS[msg.strength] != null) { this._strength = STRENGTH_PRESETS[msg.strength]; } if (typeof msg.speed === 'string' && SPEED_PRESETS[msg.speed] != null) { this._applySpeed(msg.speed); } if (typeof msg.noiseGate === 'boolean') { this._noiseGateEnabled = msg.noiseGate; } } /** * Recompute attack/release/silence-decay coefficients for * the current sample rate. * * IMPORTANT: We use frames-per-second (sampleRate / 128), NOT * the raw sampleRate, because the smoothing filter is applied * once per render quantum — not once per sample. * * @param {'slow' | 'medium' | 'fast'} preset */ _applySpeed(preset) { const { attack, release } = SPEED_PRESETS[preset]; const fps = sampleRate / RENDER_QUANTUM_FRAMES; this._alphaAttack = timeConstantToAlpha(attack, fps); this._alphaRelease = timeConstantToAlpha(release, fps); this._alphaSilenceDecay = timeConstantToAlpha(SILENCE_DECAY_TC, fps); } /* ── DSP ────────────────────────────────────────────────────── */ /** * Main audio processing callback. * * @param {Float32Array[][]} inputs Input channels. * @param {Float32Array[][]} outputs Output channels. * @returns {boolean} `true` to keep the processor alive. */ process(inputs, outputs) { const input = inputs[0]; const output = outputs[0]; // No input → silence pass-through if (!input || input.length === 0 || !input[0]) { return true; } const inputChannel = input[0]; const outputChannel = output[0]; const numSamples = inputChannel.length; // ── Bypass mode ────────────────────────────────────────── if (!this._enabled) { // Copy input → output unchanged for (let i = 0; i < numSamples; i++) { outputChannel[i] = inputChannel[i]; } // Also copy any additional channels (stereo, etc.) for (let ch = 1; ch < input.length; ch++) { if (output[ch] && input[ch]) { for (let i = 0; i < numSamples; i++) { output[ch][i] = input[ch][i]; } } } return true; } // ── 1. RMS level estimation ────────────────────────────── // // Compute the RMS of this render quantum and smooth it with // a one-pole IIR filter (exponential moving average). // // We work in the squared domain to avoid a sqrt per sample; // the sqrt is taken only once per quantum for the gain calc. let sumSq = 0; for (let i = 0; i < numSamples; i++) { const s = inputChannel[i]; sumSq += s * s; } const frameMeanSq = sumSq / numSamples; // Smooth envelope: use attack for rising levels, release for falling const alpha = frameMeanSq > this._envelopeSq ? this._alphaAttack : this._alphaRelease; this._envelopeSq += alpha * (frameMeanSq - this._envelopeSq); // Current smoothed RMS (linear) const rms = Math.sqrt(Math.max(this._envelopeSq, 1e-12)); // ── 2. Silence gate ────────────────────────────────────── // // If the RMS is below the silence threshold, do NOT compute // a new gain target. Instead, decay the current gain slowly // toward unity (1.0) so we don't slam the listener when // speech resumes. const isSilence = rms < this._silenceThreshold; if (isSilence && this._noiseGateEnabled) { // Decay gain toward 1.0 this._currentGain += this._alphaSilenceDecay * (1.0 - this._currentGain); } else if (!isSilence) { // ── 3. Target gain computation ─────────────────────── // // Desired gain = 10^((targetDbfs − currentDbfs) / 20) // // We scale the correction by the strength factor so that // "low" strength applies only 50 % of the correction. const currentDbfs = linearToDb(rms); const errorDb = this._targetDbfs - currentDbfs; // Scale the correction by strength. // A strength of 1.0 means "correct fully to target". const correctionDb = errorDb * this._strength; let desiredGain = dbToLinear(correctionDb); // Clamp to max gain if (desiredGain > this._maxGainLinear) { desiredGain = this._maxGainLinear; } // Never attenuate below a certain floor (we're leveling UP, // but very loud signals still need to be pulled down). // Allow attenuation down to −6 dB. if (desiredGain < 0.5) { desiredGain = 0.5; } // ── 4. Gain smoothing ────────────────────────────── // // Exponentially interpolate the current gain toward the // desired gain. Use fast attack (gain DOWN) and slow // release (gain UP) for natural dynamics. const gainAlpha = desiredGain < this._currentGain ? this._alphaAttack // Gain is decreasing (loud signal arrived) : this._alphaRelease; // Gain is increasing (signal got quieter) this._currentGain += gainAlpha * (desiredGain - this._currentGain); } // If isSilence && !noiseGateEnabled → gain stays as-is (frozen) // ── 5. Apply gain & soft-clip ───────────────────────────── const gain = this._currentGain; for (let i = 0; i < numSamples; i++) { outputChannel[i] = softClip(inputChannel[i] * gain); } // Copy any additional channels with same gain for (let ch = 1; ch < input.length; ch++) { if (output[ch] && input[ch]) { for (let i = 0; i < numSamples; i++) { output[ch][i] = softClip(input[ch][i] * gain); } } } return true; } } registerProcessor(PROCESSOR_NAME, VoiceLevelingProcessor);