diff --git a/public/voice-leveling-worklet.js b/public/voice-leveling-worklet.js new file mode 100644 index 0000000..7c666ac --- /dev/null +++ b/public/voice-leveling-worklet.js @@ -0,0 +1,442 @@ +/** + * VoiceLevelingProcessor — AudioWorkletProcessor that implements + * broadcast-grade per-speaker automatic gain control (AGC). + * + * ═══════════════════════════════════════════════════════════════════ + * DSP DESIGN NOTES + * ═══════════════════════════════════════════════════════════════════ + * + * This processor mimics WebRTC's Gain Controller 2 (AGC2) behaviour + * using a lightweight algorithm suitable for real-time voice in an + * AudioWorklet thread. + * + * Pipeline (per 128-sample render quantum ≈ 2.67 ms @ 48 kHz): + * + * 1. RMS level estimation (short-term envelope) + * 2. Silence gate (freeze gain when below noise floor) + * 3. Target gain compute (desired dBFS → linear gain) + * 4. Gain smoothing (exponential attack / release) + * 5. Max-gain clamp (prevent runaway boost) + * 6. Soft-clip limiter (prevent digital overs) + * + * Key properties: + * • No per-frame allocation — all buffers pre-allocated. + * • Synchronous processing — no message passing in hot path. + * • Uses Float32 throughout — native AudioWorklet format. + * • 128-sample quantum fits within 10 ms at 48 kHz (2.67 ms). + * + * The processor receives configuration via AudioWorkletNode.port + * messages and applies them on the next render quantum. + * + * ═══════════════════════════════════════════════════════════════════ + */ + +/* ──────────────────────────────────────────────────────────────── */ +/* Constants */ +/* ──────────────────────────────────────────────────────────────── */ + +/** Processor name registered with `registerProcessor`. */ +const PROCESSOR_NAME = 'VoiceLevelingProcessor'; + +/** + * Web Audio render quantum size — the number of samples processed + * in each call to `process()`. The AudioWorklet spec mandates 128. + */ +const RENDER_QUANTUM_FRAMES = 128; + +/** + * Minimum RMS level (linear) below which the input is considered + * silence. Gain is frozen/decayed when the signal is this quiet. + * Roughly −60 dBFS. + */ +const DEFAULT_SILENCE_THRESHOLD = 0.001; + +/** + * The target RMS level in dBFS. −18 dBFS is a comfortable + * conversational loudness for headphone listening. + */ +const DEFAULT_TARGET_DBFS = -18; + +/** Default maximum gain boost in dB. */ +const DEFAULT_MAX_GAIN_DB = 12; + +/** Soft-clip ceiling — prevents digital overs. */ +const SOFT_CLIP_THRESHOLD = 0.95; + +/** + * Speed presets: attack and release time constants (seconds). + * + * Attack = how fast gain *decreases* when a loud signal arrives. + * Release = how fast gain *increases* when the signal gets quieter. + * + * Asymmetric: fast attack prevents clipping, slow release sounds + * natural and avoids "pumping". + */ +const SPEED_PRESETS = { + slow: { attack: 0.015, release: 0.800 }, + medium: { attack: 0.010, release: 0.400 }, + fast: { attack: 0.005, release: 0.150 }, +}; + +/** + * AGC strength presets: scale the computed gain adjustment. + * 1.0 = full correction toward target; lower = gentler leveling. + */ +const STRENGTH_PRESETS = { + low: 0.5, + medium: 0.75, + high: 1.0, +}; + +/** + * When silence is detected, the gain decays toward 1.0 (unity) + * at this rate (seconds). This prevents the gain from sitting at + * a huge value after long silence and then blasting when speech + * resumes. + */ +const SILENCE_DECAY_TC = 2.0; + +/* ──────────────────────────────────────────────────────────────── */ +/* Helpers */ +/* ──────────────────────────────────────────────────────────────── */ + +/** Convert decibels to linear gain. */ +function dbToLinear(db) { + return Math.pow(10, db / 20); +} + +/** Convert linear amplitude to dBFS. Returns −Infinity for 0. */ +function linearToDb(linear) { + if (linear <= 0) return -Infinity; + return 20 * Math.log10(linear); +} + +/** + * Compute the exponential smoothing coefficient (α) for a given + * time constant and **frame rate** (not sample rate!). + * + * Because the envelope / gain update runs once per render quantum + * (128 samples), the rate passed here must be frames-per-second + * (sampleRate / 128), NOT samples-per-second. Using the raw + * sampleRate would produce absurdly small α values, making the + * AGC appear frozen. + * + * α = 1 − e^(−1 / (tc * fps)) + * + * Larger α → faster response. + * + * @param {number} tc Time constant in seconds. + * @param {number} fps Frame rate (render quanta per second). + * @returns {number} Smoothing coefficient (0–1). + */ +function timeConstantToAlpha(tc, fps) { + if (tc <= 0) return 1.0; + return 1.0 - Math.exp(-1.0 / (tc * fps)); +} + +/** + * Attempt to use SharedArrayBuffer for the envelope history if + * the environment supports it. Falls back to a regular + * Float32Array. + * + * @param {number} length Number of elements. + * @returns {Float32Array} + */ +function allocateBuffer(length) { + try { + if (typeof SharedArrayBuffer !== 'undefined') { + return new Float32Array(new SharedArrayBuffer(length * 4)); + } + } catch { /* fall through */ } + return new Float32Array(length); +} + +/** + * Soft-clip function (tanh-based) that prevents digital overs + * while preserving signal shape. + * + * Below the threshold the signal passes through unchanged. + * Above it, tanh compression is applied symmetrically. + * + * @param {number} sample Input sample. + * @returns {number} Clipped sample. + */ +function softClip(sample) { + const abs = Math.abs(sample); + if (abs <= SOFT_CLIP_THRESHOLD) return sample; + const sign = sample >= 0 ? 1 : -1; + // Map (threshold..∞) → (threshold..1) using tanh + const excess = (abs - SOFT_CLIP_THRESHOLD) / (1 - SOFT_CLIP_THRESHOLD); + return sign * (SOFT_CLIP_THRESHOLD + (1 - SOFT_CLIP_THRESHOLD) * Math.tanh(excess)); +} + +/* ──────────────────────────────────────────────────────────────── */ +/* Processor */ +/* ──────────────────────────────────────────────────────────────── */ + +class VoiceLevelingProcessor extends AudioWorkletProcessor { + + /* ── State ──────────────────────────────────────────────────── */ + + /** Whether processing is enabled (bypass when false). */ + _enabled = true; + + /** Target loudness in dBFS. */ + _targetDbfs = DEFAULT_TARGET_DBFS; + + /** Maximum gain boost in dB. */ + _maxGainDb = DEFAULT_MAX_GAIN_DB; + + /** Linear ceiling for the gain multiplier. */ + _maxGainLinear = dbToLinear(DEFAULT_MAX_GAIN_DB); + + /** AGC strength factor (0–1). Scales the gain correction. */ + _strength = STRENGTH_PRESETS.medium; + + /** Whether the silence/noise gate is active. */ + _noiseGateEnabled = false; + + /** RMS threshold below which input is treated as silence. */ + _silenceThreshold = DEFAULT_SILENCE_THRESHOLD; + + /** Attack smoothing coefficient. */ + _alphaAttack = 0; + + /** Release smoothing coefficient. */ + _alphaRelease = 0; + + /** Silence decay smoothing coefficient. */ + _alphaSilenceDecay = 0; + + /** + * Running RMS envelope (squared, to avoid sqrt every frame). + * Smoothed with a one-pole filter. + */ + _envelopeSq = 0; + + /** Current applied gain (linear). Smoothed toward target. */ + _currentGain = 1.0; + + /** + * Pre-allocated buffer used for RMS computation. + * Sized to the largest possible render quantum (128 samples). + */ + _scratchBuffer = allocateBuffer(128); + + /* ── Constructor ────────────────────────────────────────────── */ + + constructor(options) { + super(options); + + // Compute smoothing coefficients from default speed + this._applySpeed('medium'); + + // Listen for configuration changes from the main thread. + // Messages are consumed before the next render quantum. + this.port.onmessage = (event) => this._handleMessage(event.data); + } + + /* ── Configuration ──────────────────────────────────────────── */ + + /** + * Handle a configuration message from the main thread. + * + * Accepted keys: + * enabled : boolean + * targetDbfs : number (-30 … -12) + * maxGainDb : number (3 … 20) + * strength : 'low' | 'medium' | 'high' + * speed : 'slow' | 'medium' | 'fast' + * noiseGate : boolean + * + * @param {object} msg + */ + _handleMessage(msg) { + if (msg == null || typeof msg !== 'object') return; + + if (typeof msg.enabled === 'boolean') { + this._enabled = msg.enabled; + if (!msg.enabled) { + // Reset gain to unity on disable so re-enabling starts clean + this._currentGain = 1.0; + this._envelopeSq = 0; + } + } + + if (typeof msg.targetDbfs === 'number') { + this._targetDbfs = Math.max(-30, Math.min(-12, msg.targetDbfs)); + } + + if (typeof msg.maxGainDb === 'number') { + const clamped = Math.max(3, Math.min(20, msg.maxGainDb)); + this._maxGainDb = clamped; + this._maxGainLinear = dbToLinear(clamped); + } + + if (typeof msg.strength === 'string' && STRENGTH_PRESETS[msg.strength] != null) { + this._strength = STRENGTH_PRESETS[msg.strength]; + } + + if (typeof msg.speed === 'string' && SPEED_PRESETS[msg.speed] != null) { + this._applySpeed(msg.speed); + } + + if (typeof msg.noiseGate === 'boolean') { + this._noiseGateEnabled = msg.noiseGate; + } + } + + /** + * Recompute attack/release/silence-decay coefficients for + * the current sample rate. + * + * IMPORTANT: We use frames-per-second (sampleRate / 128), NOT + * the raw sampleRate, because the smoothing filter is applied + * once per render quantum — not once per sample. + * + * @param {'slow' | 'medium' | 'fast'} preset + */ + _applySpeed(preset) { + const { attack, release } = SPEED_PRESETS[preset]; + const fps = sampleRate / RENDER_QUANTUM_FRAMES; + this._alphaAttack = timeConstantToAlpha(attack, fps); + this._alphaRelease = timeConstantToAlpha(release, fps); + this._alphaSilenceDecay = timeConstantToAlpha(SILENCE_DECAY_TC, fps); + } + + /* ── DSP ────────────────────────────────────────────────────── */ + + /** + * Main audio processing callback. + * + * @param {Float32Array[][]} inputs Input channels. + * @param {Float32Array[][]} outputs Output channels. + * @returns {boolean} `true` to keep the processor alive. + */ + process(inputs, outputs) { + const input = inputs[0]; + const output = outputs[0]; + + // No input → silence pass-through + if (!input || input.length === 0 || !input[0]) { + return true; + } + + const inputChannel = input[0]; + const outputChannel = output[0]; + const numSamples = inputChannel.length; + + // ── Bypass mode ────────────────────────────────────────── + if (!this._enabled) { + // Copy input → output unchanged + for (let i = 0; i < numSamples; i++) { + outputChannel[i] = inputChannel[i]; + } + // Also copy any additional channels (stereo, etc.) + for (let ch = 1; ch < input.length; ch++) { + if (output[ch] && input[ch]) { + for (let i = 0; i < numSamples; i++) { + output[ch][i] = input[ch][i]; + } + } + } + return true; + } + + // ── 1. RMS level estimation ────────────────────────────── + // + // Compute the RMS of this render quantum and smooth it with + // a one-pole IIR filter (exponential moving average). + // + // We work in the squared domain to avoid a sqrt per sample; + // the sqrt is taken only once per quantum for the gain calc. + + let sumSq = 0; + for (let i = 0; i < numSamples; i++) { + const s = inputChannel[i]; + sumSq += s * s; + } + const frameMeanSq = sumSq / numSamples; + + // Smooth envelope: use attack for rising levels, release for falling + const alpha = frameMeanSq > this._envelopeSq + ? this._alphaAttack + : this._alphaRelease; + this._envelopeSq += alpha * (frameMeanSq - this._envelopeSq); + + // Current smoothed RMS (linear) + const rms = Math.sqrt(Math.max(this._envelopeSq, 1e-12)); + + // ── 2. Silence gate ────────────────────────────────────── + // + // If the RMS is below the silence threshold, do NOT compute + // a new gain target. Instead, decay the current gain slowly + // toward unity (1.0) so we don't slam the listener when + // speech resumes. + + const isSilence = rms < this._silenceThreshold; + + if (isSilence && this._noiseGateEnabled) { + // Decay gain toward 1.0 + this._currentGain += this._alphaSilenceDecay * (1.0 - this._currentGain); + } else if (!isSilence) { + // ── 3. Target gain computation ─────────────────────── + // + // Desired gain = 10^((targetDbfs − currentDbfs) / 20) + // + // We scale the correction by the strength factor so that + // "low" strength applies only 50 % of the correction. + + const currentDbfs = linearToDb(rms); + const errorDb = this._targetDbfs - currentDbfs; + + // Scale the correction by strength. + // A strength of 1.0 means "correct fully to target". + const correctionDb = errorDb * this._strength; + let desiredGain = dbToLinear(correctionDb); + + // Clamp to max gain + if (desiredGain > this._maxGainLinear) { + desiredGain = this._maxGainLinear; + } + // Never attenuate below a certain floor (we're leveling UP, + // but very loud signals still need to be pulled down). + // Allow attenuation down to −6 dB. + if (desiredGain < 0.5) { + desiredGain = 0.5; + } + + // ── 4. Gain smoothing ────────────────────────────── + // + // Exponentially interpolate the current gain toward the + // desired gain. Use fast attack (gain DOWN) and slow + // release (gain UP) for natural dynamics. + + const gainAlpha = desiredGain < this._currentGain + ? this._alphaAttack // Gain is decreasing (loud signal arrived) + : this._alphaRelease; // Gain is increasing (signal got quieter) + + this._currentGain += gainAlpha * (desiredGain - this._currentGain); + } + // If isSilence && !noiseGateEnabled → gain stays as-is (frozen) + + // ── 5. Apply gain & soft-clip ───────────────────────────── + const gain = this._currentGain; + for (let i = 0; i < numSamples; i++) { + outputChannel[i] = softClip(inputChannel[i] * gain); + } + + // Copy any additional channels with same gain + for (let ch = 1; ch < input.length; ch++) { + if (output[ch] && input[ch]) { + for (let i = 0; i < numSamples; i++) { + output[ch][i] = softClip(input[ch][i] * gain); + } + } + } + + return true; + } +} + +registerProcessor(PROCESSOR_NAME, VoiceLevelingProcessor); diff --git a/src/app/core/constants.ts b/src/app/core/constants.ts index c44e7ed..4481186 100644 --- a/src/app/core/constants.ts +++ b/src/app/core/constants.ts @@ -34,3 +34,6 @@ export const DEFAULT_VOLUME = 100; /** Default search debounce time in milliseconds. */ export const SEARCH_DEBOUNCE_MS = 300; + +/** Key used to persist voice leveling (AGC) settings. */ +export const STORAGE_KEY_VOICE_LEVELING_SETTINGS = 'metoyou_voice_leveling_settings'; diff --git a/src/app/core/services/index.ts b/src/app/core/services/index.ts index e51e1bb..8e6bc44 100644 --- a/src/app/core/services/index.ts +++ b/src/app/core/services/index.ts @@ -8,3 +8,4 @@ export * from './voice-session.service'; export * from './voice-activity.service'; export * from './external-link.service'; export * from './settings-modal.service'; +export * from './voice-leveling.service'; diff --git a/src/app/core/services/voice-leveling.service.ts b/src/app/core/services/voice-leveling.service.ts new file mode 100644 index 0000000..1c1f49c --- /dev/null +++ b/src/app/core/services/voice-leveling.service.ts @@ -0,0 +1,258 @@ +/** + * VoiceLevelingService — Angular service that manages the + * per-speaker voice leveling (AGC) system. + * + * ═══════════════════════════════════════════════════════════════════ + * RESPONSIBILITIES + * ═══════════════════════════════════════════════════════════════════ + * + * 1. Owns the {@link VoiceLevelingManager} singleton and proxies + * its methods to the rest of the application. + * + * 2. Persists user settings in localStorage and restores them on + * construction so preferences survive across sessions. + * + * 3. Exposes reactive Angular signals for the current settings so + * UI components can bind declaratively. + * + * 4. Provides an `enable` / `disable` / `disableAll` API that + * the voice-controls component uses to insert and remove the + * AGC pipeline from the remote audio playback chain — mirroring + * the {@link NoiseReductionManager} toggle pattern. + * + * 5. Fires a callback when the user toggles the enabled state so + * the voice-controls component can rebuild audio elements live. + * + * ═══════════════════════════════════════════════════════════════════ + */ +import { Injectable, signal, computed, OnDestroy } from '@angular/core'; +import { + VoiceLevelingManager, + VoiceLevelingSettings, + DEFAULT_VOICE_LEVELING_SETTINGS, +} from './webrtc/voice-leveling.manager'; +import { WebRTCLogger } from './webrtc/webrtc-logger'; +import { STORAGE_KEY_VOICE_LEVELING_SETTINGS } from '../constants'; + +@Injectable({ providedIn: 'root' }) +export class VoiceLevelingService implements OnDestroy { + /** The underlying per-speaker pipeline manager. */ + private readonly manager: VoiceLevelingManager; + + /* ── Reactive signals ────────────────────────────────────────── */ + + private readonly _enabled = signal(DEFAULT_VOICE_LEVELING_SETTINGS.enabled); + private readonly _targetDbfs = signal(DEFAULT_VOICE_LEVELING_SETTINGS.targetDbfs); + private readonly _strength = signal<'low' | 'medium' | 'high'>(DEFAULT_VOICE_LEVELING_SETTINGS.strength); + private readonly _maxGainDb = signal(DEFAULT_VOICE_LEVELING_SETTINGS.maxGainDb); + private readonly _speed = signal<'slow' | 'medium' | 'fast'>(DEFAULT_VOICE_LEVELING_SETTINGS.speed); + private readonly _noiseGate = signal(DEFAULT_VOICE_LEVELING_SETTINGS.noiseGate); + + /** Whether voice leveling is enabled. */ + readonly enabled = computed(() => this._enabled()); + + /** Target loudness in dBFS. */ + readonly targetDbfs = computed(() => this._targetDbfs()); + + /** AGC strength preset. */ + readonly strength = computed(() => this._strength()); + + /** Maximum gain boost in dB. */ + readonly maxGainDb = computed(() => this._maxGainDb()); + + /** Gain response speed preset. */ + readonly speed = computed(() => this._speed()); + + /** Whether the noise floor gate is active. */ + readonly noiseGate = computed(() => this._noiseGate()); + + /** Number of speakers currently being processed. */ + readonly activeSpeakerCount = computed(() => this.manager.activePipelineCount); + + /* ── Enabled-change callbacks ────────────────────────────────── */ + + private _enabledChangeCallbacks: Array<(enabled: boolean) => void> = []; + + constructor() { + const logger = new WebRTCLogger(/* debugEnabled */ false); + this.manager = new VoiceLevelingManager(logger); + + // Restore persisted settings + this._loadSettings(); + } + + /* ── Settings API ────────────────────────────────────────────── */ + + /** + * Toggle the enabled state. + * + * Unlike the manager's `enable`/`disable` which operate per-peer, + * this is the user-facing master toggle. It persists the setting + * and notifies all registered callbacks so that the voice-controls + * component can rebuild Audio elements immediately. + */ + setEnabled(enabled: boolean): void { + this._enabled.set(enabled); + this._saveSettings(); + // Notify listeners so the voice-controls component can rebuild + this._enabledChangeCallbacks.forEach((cb) => cb(enabled)); + } + + /** Set the target loudness in dBFS (−30 to −12). */ + setTargetDbfs(value: number): void { + const clamped = Math.max(-30, Math.min(-12, value)); + this._targetDbfs.set(clamped); + this._pushAndPersist({ targetDbfs: clamped }); + } + + /** Set the AGC strength preset. */ + setStrength(strength: 'low' | 'medium' | 'high'): void { + this._strength.set(strength); + this._pushAndPersist({ strength }); + } + + /** Set the maximum gain boost in dB (3 to 20). */ + setMaxGainDb(value: number): void { + const clamped = Math.max(3, Math.min(20, value)); + this._maxGainDb.set(clamped); + this._pushAndPersist({ maxGainDb: clamped }); + } + + /** Set the gain response speed preset. */ + setSpeed(speed: 'slow' | 'medium' | 'fast'): void { + this._speed.set(speed); + this._pushAndPersist({ speed }); + } + + /** Toggle the noise floor gate. */ + setNoiseGate(enabled: boolean): void { + this._noiseGate.set(enabled); + this._pushAndPersist({ noiseGate: enabled }); + } + + /* ── Pipeline API (mirrors NoiseReductionManager pattern) ───── */ + + /** + * Build the AGC pipeline for a remote speaker and return the + * leveled stream. The caller sets this as `audio.srcObject`. + * + * @param peerId The remote peer's unique identifier. + * @param stream The remote peer's raw {@link MediaStream}. + * @returns The leveled {@link MediaStream} for playback. + */ + async enable(peerId: string, stream: MediaStream): Promise { + return this.manager.enable(peerId, stream); + } + + /** + * Tear down the AGC pipeline for a single speaker. + * The caller swaps the Audio element back to the raw stream. + * + * @param peerId The peer to clean up. + */ + disable(peerId: string): void { + this.manager.disable(peerId); + } + + /** Tear down all speaker pipelines at once. */ + disableAll(): void { + this.manager.disableAll(); + } + + /** + * Set the post-AGC volume for a specific speaker. + * + * @param peerId The speaker's peer ID. + * @param volume Normalised volume (0–1). + */ + setSpeakerVolume(peerId: string, volume: number): void { + this.manager.setSpeakerVolume(peerId, volume); + } + + /** + * Set the master volume applied after AGC to all speakers. + * + * @param volume Normalised volume (0–1). + */ + setMasterVolume(volume: number): void { + this.manager.setMasterVolume(volume); + } + + /* ── Live toggle notification ────────────────────────────────── */ + + /** + * Register a callback that fires whenever the user toggles the + * enabled state. Returns an unsubscribe function. + */ + onEnabledChange(callback: (enabled: boolean) => void): () => void { + this._enabledChangeCallbacks.push(callback); + return () => { + this._enabledChangeCallbacks = this._enabledChangeCallbacks.filter( + (cb) => cb !== callback, + ); + }; + } + + /* ── Persistence ─────────────────────────────────────────────── */ + + /** Push a partial config update to the manager and persist. */ + private _pushAndPersist(partial: Partial): void { + this.manager.updateSettings(partial); + this._saveSettings(); + } + + /** Persist all current settings to localStorage. */ + private _saveSettings(): void { + try { + const settings: VoiceLevelingSettings = { + enabled: this._enabled(), + targetDbfs: this._targetDbfs(), + strength: this._strength(), + maxGainDb: this._maxGainDb(), + speed: this._speed(), + noiseGate: this._noiseGate(), + }; + localStorage.setItem( + STORAGE_KEY_VOICE_LEVELING_SETTINGS, + JSON.stringify(settings), + ); + } catch { /* localStorage unavailable — ignore */ } + } + + /** Load settings from localStorage and apply to the manager. */ + private _loadSettings(): void { + try { + const raw = localStorage.getItem(STORAGE_KEY_VOICE_LEVELING_SETTINGS); + if (!raw) return; + const saved = JSON.parse(raw) as Partial; + + if (typeof saved.enabled === 'boolean') this._enabled.set(saved.enabled); + if (typeof saved.targetDbfs === 'number') this._targetDbfs.set(saved.targetDbfs); + if (saved.strength === 'low' || saved.strength === 'medium' || saved.strength === 'high') { + this._strength.set(saved.strength); + } + if (typeof saved.maxGainDb === 'number') this._maxGainDb.set(saved.maxGainDb); + if (saved.speed === 'slow' || saved.speed === 'medium' || saved.speed === 'fast') { + this._speed.set(saved.speed); + } + if (typeof saved.noiseGate === 'boolean') this._noiseGate.set(saved.noiseGate); + + // Push the restored settings to the manager + this.manager.updateSettings({ + enabled: this._enabled(), + targetDbfs: this._targetDbfs(), + strength: this._strength(), + maxGainDb: this._maxGainDb(), + speed: this._speed(), + noiseGate: this._noiseGate(), + }); + } catch { /* corrupted data — use defaults */ } + } + + /* ── Cleanup ─────────────────────────────────────────────────── */ + + ngOnDestroy(): void { + this.manager.destroy(); + this._enabledChangeCallbacks = []; + } +} diff --git a/src/app/core/services/webrtc/index.ts b/src/app/core/services/webrtc/index.ts index ca30142..ab1b400 100644 --- a/src/app/core/services/webrtc/index.ts +++ b/src/app/core/services/webrtc/index.ts @@ -12,3 +12,4 @@ export * from './peer-connection.manager'; export * from './media.manager'; export * from './screen-share.manager'; export * from './noise-reduction.manager'; +export * from './voice-leveling.manager'; diff --git a/src/app/core/services/webrtc/voice-leveling.manager.ts b/src/app/core/services/webrtc/voice-leveling.manager.ts new file mode 100644 index 0000000..45b9831 --- /dev/null +++ b/src/app/core/services/webrtc/voice-leveling.manager.ts @@ -0,0 +1,359 @@ +/** + * VoiceLevelingManager — manages per-speaker automatic gain control + * pipelines for remote voice streams. + * + * ═══════════════════════════════════════════════════════════════════ + * ARCHITECTURE + * ═══════════════════════════════════════════════════════════════════ + * + * For every remote MediaStream a dedicated processing chain is built: + * + * Remote MediaStreamTrack + * ↓ + * MediaStreamSource (AudioContext) + * ↓ + * AudioWorkletNode (VoiceLevelingProcessor — per-speaker AGC) + * ↓ + * GainNode (post fine-tuning — master volume knob) + * ↓ + * MediaStreamDestination → leveled MediaStream + * + * Each speaker gets its own AudioWorkletNode instance so that the + * AGC adapts independently to each person's microphone level. + * + * A fallback mode using {@link DynamicsCompressorNode} is provided + * for browsers that don't support AudioWorklet or SharedArrayBuffer. + * + * ═══════════════════════════════════════════════════════════════════ + * DESIGN — mirrors the NoiseReductionManager pattern + * ═══════════════════════════════════════════════════════════════════ + * + * • `enable(peerId, rawStream)` builds the pipeline and returns a + * processed stream. + * • `disable(peerId)` tears down the pipeline. The caller swaps + * the Audio element's srcObject back to the raw stream. + * • `disableAll()` tears down every pipeline at once. + * + * The calling component keeps a reference to the original raw stream + * and swaps the Audio element's `srcObject` between the raw stream + * and the leveled stream when the user toggles the feature — exactly + * like noise reduction does for the local mic. + * + * ═══════════════════════════════════════════════════════════════════ + */ +import { WebRTCLogger } from './webrtc-logger'; + +/* ──────────────────────────────────────────────────────────────── */ +/* Types */ +/* ──────────────────────────────────────────────────────────────── */ + +/** User-configurable voice leveling parameters. */ +export interface VoiceLevelingSettings { + /** Master on/off toggle. When false, audio passes through unchanged. */ + enabled: boolean; + /** Target loudness in dBFS (−30 … −12). Default −18. */ + targetDbfs: number; + /** AGC strength preset. Default 'medium'. */ + strength: 'low' | 'medium' | 'high'; + /** Maximum gain boost in dB (3 … 20). Default 12. */ + maxGainDb: number; + /** Gain response speed preset. Default 'medium'. */ + speed: 'slow' | 'medium' | 'fast'; + /** Whether the silence noise gate is active. Default false. */ + noiseGate: boolean; +} + +/** Default settings used when none are explicitly provided. */ +export const DEFAULT_VOICE_LEVELING_SETTINGS: VoiceLevelingSettings = { + enabled: false, + targetDbfs: -18, + strength: 'medium', + maxGainDb: 12, + speed: 'medium', + noiseGate: false, +}; + +/** + * Internal bookkeeping for a single speaker's processing chain. + */ +interface SpeakerPipeline { + ctx: AudioContext; + source: MediaStreamAudioSourceNode; + workletNode: AudioWorkletNode | null; + compressorNode: DynamicsCompressorNode | null; + gainNode: GainNode; + destination: MediaStreamAudioDestinationNode; + originalStream: MediaStream; + isFallback: boolean; +} + +/** AudioWorklet module path (served from public/). */ +const WORKLET_MODULE_PATH = 'voice-leveling-worklet.js'; + +/** Processor name — must match `registerProcessor` in the worklet. */ +const WORKLET_PROCESSOR_NAME = 'VoiceLevelingProcessor'; + +/* ──────────────────────────────────────────────────────────────── */ +/* Manager */ +/* ──────────────────────────────────────────────────────────────── */ + +export class VoiceLevelingManager { + /** Active per-speaker pipelines keyed by peer ID. */ + private readonly pipelines = new Map(); + + /** Cached DSP settings pushed to worklets. */ + private _settings: VoiceLevelingSettings = { ...DEFAULT_VOICE_LEVELING_SETTINGS }; + + /** Whether the AudioWorklet module is available. */ + private _workletAvailable: boolean | null = null; + + /** Shared AudioContext (avoids browser per-page limits). */ + private _sharedCtx: AudioContext | null = null; + + /** Whether the worklet module has been loaded. */ + private _workletLoaded = false; + + constructor(private readonly logger: WebRTCLogger) {} + + /* ── Public API ─────────────────────────────────────────────── */ + + get settings(): Readonly { + return this._settings; + } + + get activePeerIds(): string[] { + return Array.from(this.pipelines.keys()); + } + + get activePipelineCount(): number { + return this.pipelines.size; + } + + /** + * Update DSP settings and propagate to all active worklets. + * Only provided keys are updated; the rest stay unchanged. + */ + updateSettings(partial: Partial): void { + this._settings = { ...this._settings, ...partial }; + this.pipelines.forEach((p) => this._pushSettingsToPipeline(p)); + } + + /** + * Enable voice leveling for a single speaker. + * + * Builds the processing pipeline and returns the leveled + * {@link MediaStream}. The caller sets this as the Audio + * element's `srcObject`. + * + * If a pipeline already exists for this peer with the **same** + * raw stream, the existing leveled stream is returned (no rebuild). + * + * @param peerId Remote peer identifier. + * @param stream The remote peer's raw MediaStream. + * @returns The leveled MediaStream (or raw on failure). + */ + async enable(peerId: string, stream: MediaStream): Promise { + // Reuse existing pipeline if it targets the same stream + const existing = this.pipelines.get(peerId); + if (existing && existing.originalStream === stream) { + return existing.destination.stream; + } + + // Tear down stale pipeline for this peer + if (existing) { + this._disposePipeline(existing); + this.pipelines.delete(peerId); + } + + // No audio tracks → nothing to process + if (stream.getAudioTracks().length === 0) { + this.logger.info('VoiceLeveling: no audio tracks, skipping', { peerId }); + return stream; + } + + try { + const pipeline = await this._buildPipeline(stream); + this.pipelines.set(peerId, pipeline); + this.logger.info('VoiceLeveling: pipeline created', { + peerId, + fallback: pipeline.isFallback, + }); + return pipeline.destination.stream; + } catch (err) { + this.logger.error('VoiceLeveling: pipeline build failed, returning raw stream', err); + return stream; + } + } + + /** + * Disable voice leveling for a single speaker. + * + * Tears down the pipeline. The caller is responsible for swapping + * the Audio element's `srcObject` back to the raw stream. + */ + disable(peerId: string): void { + const pipeline = this.pipelines.get(peerId); + if (!pipeline) return; + this._disposePipeline(pipeline); + this.pipelines.delete(peerId); + this.logger.info('VoiceLeveling: pipeline removed', { peerId }); + } + + /** Tear down ALL speaker pipelines. */ + disableAll(): void { + this.pipelines.forEach((p) => this._disposePipeline(p)); + this.pipelines.clear(); + } + + setSpeakerVolume(peerId: string, volume: number): void { + const pipeline = this.pipelines.get(peerId); + if (!pipeline) return; + pipeline.gainNode.gain.setValueAtTime( + Math.max(0, Math.min(1, volume)), + pipeline.ctx.currentTime, + ); + } + + setMasterVolume(volume: number): void { + const clamped = Math.max(0, Math.min(1, volume)); + this.pipelines.forEach((pipeline) => { + pipeline.gainNode.gain.setValueAtTime(clamped, pipeline.ctx.currentTime); + }); + } + + /** Tear down all pipelines and release all resources. */ + destroy(): void { + this.disableAll(); + if (this._sharedCtx && this._sharedCtx.state !== 'closed') { + this._sharedCtx.close().catch(() => { /* best-effort */ }); + } + this._sharedCtx = null; + this._workletLoaded = false; + this._workletAvailable = null; + } + + /* ── Pipeline construction ──────────────────────────────────── */ + + private async _buildPipeline(stream: MediaStream): Promise { + const ctx = await this._getOrCreateContext(); + + if (ctx.state === 'suspended') { + await ctx.resume(); + } + + const source = ctx.createMediaStreamSource(stream); + const gainNode = ctx.createGain(); + gainNode.gain.value = 1.0; + const destination = ctx.createMediaStreamDestination(); + + const workletOk = await this._ensureWorkletLoaded(ctx); + + if (workletOk) { + const workletNode = new AudioWorkletNode(ctx, WORKLET_PROCESSOR_NAME); + + source.connect(workletNode); + workletNode.connect(gainNode); + gainNode.connect(destination); + + const pipeline: SpeakerPipeline = { + ctx, + source, + workletNode, + compressorNode: null, + gainNode, + destination, + originalStream: stream, + isFallback: false, + }; + + this._pushSettingsToPipeline(pipeline); + return pipeline; + } else { + this.logger.warn('VoiceLeveling: AudioWorklet unavailable, using fallback compressor'); + const compressor = this._createFallbackCompressor(ctx); + + source.connect(compressor); + compressor.connect(gainNode); + gainNode.connect(destination); + + return { + ctx, + source, + workletNode: null, + compressorNode: compressor, + gainNode, + destination, + originalStream: stream, + isFallback: true, + }; + } + } + + /** + * Get or create the shared AudioContext. + * + * Uses the system default sample rate (instead of forcing 48 kHz) + * to avoid resampling issues with remote WebRTC streams whose + * sample rate is determined by the sender's codec. + */ + private async _getOrCreateContext(): Promise { + if (this._sharedCtx && this._sharedCtx.state !== 'closed') { + return this._sharedCtx; + } + this._sharedCtx = new AudioContext(); + this._workletLoaded = false; + return this._sharedCtx; + } + + private async _ensureWorkletLoaded(ctx: AudioContext): Promise { + if (this._workletAvailable === false) return false; + if (this._workletLoaded && this._workletAvailable === true) return true; + + try { + await ctx.audioWorklet.addModule(WORKLET_MODULE_PATH); + this._workletLoaded = true; + this._workletAvailable = true; + this.logger.info('VoiceLeveling: worklet module loaded'); + return true; + } catch (err) { + this.logger.error('VoiceLeveling: worklet module failed to load', err); + this._workletAvailable = false; + return false; + } + } + + private _createFallbackCompressor(ctx: AudioContext): DynamicsCompressorNode { + const compressor = ctx.createDynamicsCompressor(); + compressor.threshold.setValueAtTime(-24, ctx.currentTime); + compressor.knee.setValueAtTime(30, ctx.currentTime); + compressor.ratio.setValueAtTime(3, ctx.currentTime); + compressor.attack.setValueAtTime(0.01, ctx.currentTime); + compressor.release.setValueAtTime(0.25, ctx.currentTime); + return compressor; + } + + /* ── Settings propagation ───────────────────────────────────── */ + + private _pushSettingsToPipeline(pipeline: SpeakerPipeline): void { + if (pipeline.workletNode) { + pipeline.workletNode.port.postMessage({ + enabled: true, // Pipeline only exists when leveling is on; DSP always active + targetDbfs: this._settings.targetDbfs, + maxGainDb: this._settings.maxGainDb, + strength: this._settings.strength, + speed: this._settings.speed, + noiseGate: this._settings.noiseGate, + }); + } + } + + /* ── Cleanup ────────────────────────────────────────────────── */ + + private _disposePipeline(pipeline: SpeakerPipeline): void { + try { pipeline.source.disconnect(); } catch { /* already disconnected */ } + try { pipeline.workletNode?.disconnect(); } catch { /* ok */ } + try { pipeline.compressorNode?.disconnect(); } catch { /* ok */ } + try { pipeline.gainNode.disconnect(); } catch { /* ok */ } + try { pipeline.destination.disconnect(); } catch { /* ok */ } + } +} diff --git a/src/app/features/settings/settings-modal/voice-settings/voice-settings.component.html b/src/app/features/settings/settings-modal/voice-settings/voice-settings.component.html index 0543bf2..b89cd25 100644 --- a/src/app/features/settings/settings-modal/voice-settings/voice-settings.component.html +++ b/src/app/features/settings/settings-modal/voice-settings/voice-settings.component.html @@ -145,4 +145,138 @@ + + +
+
+ +

Voice Leveling

+
+
+ +
+
+

Voice Leveling

+

+ Automatically equalise volume across speakers +

+
+ +
+ + + @if (voiceLeveling.enabled()) { +
+ +
+ + +
+ -30 (quiet) + -12 (loud) +
+
+ + +
+ + +
+ + +
+ + +
+ 3 dB (subtle) + 20 dB (strong) +
+
+ + +
+ + +
+ + +
+
+

Noise Floor Gate

+

Prevents boosting silence

+
+ +
+
+ } +
+
diff --git a/src/app/features/settings/settings-modal/voice-settings/voice-settings.component.ts b/src/app/features/settings/settings-modal/voice-settings/voice-settings.component.ts index dfdf56d..2413c30 100644 --- a/src/app/features/settings/settings-modal/voice-settings/voice-settings.component.ts +++ b/src/app/features/settings/settings-modal/voice-settings/voice-settings.component.ts @@ -2,9 +2,10 @@ import { Component, inject, signal } from '@angular/core'; import { CommonModule } from '@angular/common'; import { FormsModule } from '@angular/forms'; import { NgIcon, provideIcons } from '@ng-icons/core'; -import { lucideMic, lucideHeadphones, lucideAudioLines } from '@ng-icons/lucide'; +import { lucideMic, lucideHeadphones, lucideAudioLines, lucideActivity } from '@ng-icons/lucide'; import { WebRTCService } from '../../../../core/services/webrtc.service'; +import { VoiceLevelingService } from '../../../../core/services/voice-leveling.service'; import { STORAGE_KEY_VOICE_SETTINGS } from '../../../../core/constants'; interface AudioDevice { @@ -21,12 +22,14 @@ interface AudioDevice { lucideMic, lucideHeadphones, lucideAudioLines, + lucideActivity, }), ], templateUrl: './voice-settings.component.html', }) export class VoiceSettingsComponent { private webrtcService = inject(WebRTCService); + readonly voiceLeveling = inject(VoiceLevelingService); inputDevices = signal([]); outputDevices = signal([]); @@ -151,4 +154,34 @@ export class VoiceSettingsComponent { await this.webrtcService.toggleNoiseReduction(this.noiseReduction()); this.saveVoiceSettings(); } + + /* ── Voice Leveling handlers ───────────────────────────────── */ + + onVoiceLevelingToggle(): void { + this.voiceLeveling.setEnabled(!this.voiceLeveling.enabled()); + } + + onTargetDbfsChange(event: Event): void { + const input = event.target as HTMLInputElement; + this.voiceLeveling.setTargetDbfs(parseInt(input.value, 10)); + } + + onStrengthChange(event: Event): void { + const select = event.target as HTMLSelectElement; + this.voiceLeveling.setStrength(select.value as 'low' | 'medium' | 'high'); + } + + onMaxGainDbChange(event: Event): void { + const input = event.target as HTMLInputElement; + this.voiceLeveling.setMaxGainDb(parseInt(input.value, 10)); + } + + onSpeedChange(event: Event): void { + const select = event.target as HTMLSelectElement; + this.voiceLeveling.setSpeed(select.value as 'slow' | 'medium' | 'fast'); + } + + onNoiseGateToggle(): void { + this.voiceLeveling.setNoiseGate(!this.voiceLeveling.noiseGate()); + } } diff --git a/src/app/features/voice/voice-controls/voice-controls.component.ts b/src/app/features/voice/voice-controls/voice-controls.component.ts index 049d87e..731c39d 100644 --- a/src/app/features/voice/voice-controls/voice-controls.component.ts +++ b/src/app/features/voice/voice-controls/voice-controls.component.ts @@ -27,6 +27,7 @@ import { import { WebRTCService } from '../../../core/services/webrtc.service'; import { VoiceSessionService } from '../../../core/services/voice-session.service'; import { VoiceActivityService } from '../../../core/services/voice-activity.service'; +import { VoiceLevelingService } from '../../../core/services/voice-leveling.service'; import { UsersActions } from '../../../store/users/users.actions'; import { selectCurrentUser } from '../../../store/users/users.selectors'; import { selectCurrentRoom } from '../../../store/rooms/rooms.selectors'; @@ -62,11 +63,18 @@ export class VoiceControlsComponent implements OnInit, OnDestroy { private webrtcService = inject(WebRTCService); private voiceSessionService = inject(VoiceSessionService); private voiceActivity = inject(VoiceActivityService); + private voiceLeveling = inject(VoiceLevelingService); private store = inject(Store); private settingsModal = inject(SettingsModalService); private remoteStreamSubscription: Subscription | null = null; private remoteAudioElements = new Map(); private pendingRemoteStreams = new Map(); + /** Raw (unprocessed) remote streams keyed by peer ID — used to swap + * between raw playback and leveled playback when the user toggles + * the voice leveling setting. */ + private rawRemoteStreams = new Map(); + /** Unsubscribe function for live voice-leveling toggle notifications. */ + private voiceLevelingUnsubscribe: (() => void) | null = null; currentUser = this.store.selectSignal(selectCurrentUser); currentRoom = this.store.selectSignal(selectCurrentRoom); @@ -106,6 +114,12 @@ export class VoiceControlsComponent implements OnInit, OnDestroy { }, ); + // Listen for live voice-leveling toggle changes so we can + // rebuild all remote Audio elements immediately (no reconnect). + this.voiceLevelingUnsubscribe = this.voiceLeveling.onEnabledChange( + (enabled) => this.rebuildAllRemoteAudio(enabled), + ); + // Subscribe to voice connected event to play pending streams and ensure all remote audio is set up this.voiceConnectedSubscription = this.webrtcService.onVoiceConnected.subscribe(() => { this.playPendingStreams(); @@ -132,9 +146,12 @@ export class VoiceControlsComponent implements OnInit, OnDestroy { audio.remove(); }); this.remoteAudioElements.clear(); + this.rawRemoteStreams.clear(); + this.voiceLeveling.disableAll(); this.remoteStreamSubscription?.unsubscribe(); this.voiceConnectedSubscription?.unsubscribe(); + this.voiceLevelingUnsubscribe?.(); } /** @@ -159,9 +176,12 @@ export class VoiceControlsComponent implements OnInit, OnDestroy { for (const peerId of connectedPeers) { const stream = this.webrtcService.getRemoteStream(peerId); if (stream && stream.getAudioTracks().length > 0) { - // Check if we already have an active audio element for this peer + // Check if we already have an active audio element for this peer. + // Compare against the stashed raw stream (not srcObject which may + // be the leveled stream when voice leveling is enabled). const existingAudio = this.remoteAudioElements.get(peerId); - if (!existingAudio || existingAudio.srcObject !== stream) { + const trackedRaw = this.rawRemoteStreams.get(peerId); + if (!existingAudio || trackedRaw !== stream) { this.playRemoteAudio(peerId, stream); } } @@ -171,6 +191,10 @@ export class VoiceControlsComponent implements OnInit, OnDestroy { private removeRemoteAudio(peerId: string): void { // Remove from pending streams this.pendingRemoteStreams.delete(peerId); + this.rawRemoteStreams.delete(peerId); + + // Remove voice leveling pipeline for this speaker + this.voiceLeveling.disable(peerId); // Remove audio element const audio = this.remoteAudioElements.get(peerId); @@ -195,12 +219,6 @@ export class VoiceControlsComponent implements OnInit, OnDestroy { return; } - // Check if audio track is live - const audioTrack = audioTracks[0]; - if (audioTrack.readyState !== 'live') { - // Still try to play it - it might become live later - } - // Remove existing audio element for this peer if any const existingAudio = this.remoteAudioElements.get(peerId); if (existingAudio) { @@ -208,24 +226,65 @@ export class VoiceControlsComponent implements OnInit, OnDestroy { existingAudio.remove(); } - // Create a new audio element for this peer + // Always stash the raw stream so we can re-wire on toggle + this.rawRemoteStreams.set(peerId, stream); + + // ── Step 1: Immediately start playback with the raw stream ── + // This guarantees audio is never lost even if the pipeline + // build takes time or fails. const audio = new Audio(); audio.srcObject = stream; audio.autoplay = true; audio.volume = this.outputVolume() / 100; - - // Mute if deafened if (this.isDeafened()) { audio.muted = true; } - - // Play the audio - audio - .play() - .then(() => {}) - .catch((error) => {}); - + audio.play().then(() => {}).catch(() => {}); this.remoteAudioElements.set(peerId, audio); + + // ── Step 2: Asynchronously swap in the leveled stream ── + // Only when voice leveling is enabled. If it fails or is + // disabled, playback continues on the raw stream. + if (this.voiceLeveling.enabled()) { + this.voiceLeveling.enable(peerId, stream).then((leveledStream) => { + // Guard: audio element may have been replaced or removed + const currentAudio = this.remoteAudioElements.get(peerId); + if (currentAudio && leveledStream !== stream) { + currentAudio.srcObject = leveledStream; + } + }); + } + } + + /** + * Rebuild all remote Audio elements when the user toggles voice + * leveling on or off. This runs synchronously for each peer, + * swapping `srcObject` between the raw stream and the leveled one. + * + * Mirrors the noise-reduction live-toggle pattern. + */ + private async rebuildAllRemoteAudio(enabled: boolean): Promise { + if (enabled) { + // Enable: build pipelines and swap to leveled streams + for (const [peerId, rawStream] of this.rawRemoteStreams) { + try { + const leveledStream = await this.voiceLeveling.enable(peerId, rawStream); + const audio = this.remoteAudioElements.get(peerId); + if (audio && leveledStream !== rawStream) { + audio.srcObject = leveledStream; + } + } catch { /* already playing raw — fine */ } + } + } else { + // Disable: tear down all pipelines, swap back to raw streams + this.voiceLeveling.disableAll(); + for (const [peerId, rawStream] of this.rawRemoteStreams) { + const audio = this.remoteAudioElements.get(peerId); + if (audio) { + audio.srcObject = rawStream; + } + } + } } async loadAudioDevices(): Promise { @@ -344,12 +403,16 @@ export class VoiceControlsComponent implements OnInit, OnDestroy { // Disable voice (stops audio tracks but keeps peer connections open for chat) this.webrtcService.disableVoice(); + // Tear down all voice leveling pipelines + this.voiceLeveling.disableAll(); + // Clear all remote audio elements this.remoteAudioElements.forEach((audio) => { audio.srcObject = null; audio.remove(); }); this.remoteAudioElements.clear(); + this.rawRemoteStreams.clear(); this.pendingRemoteStreams.clear(); const user = this.currentUser();