Voice Leveling (untested)

2026-03-03 03:41:59 +01:00
parent cf91d77502
commit 8315df42fc
9 changed files with 1313 additions and 19 deletions
--- a/src/app/core/services/webrtc/index.ts
+++ b/src/app/core/services/webrtc/index.ts
@@ -12,3 +12,4 @@ export * from './peer-connection.manager';
 export * from './media.manager';
 export * from './screen-share.manager';
 export * from './noise-reduction.manager';
+export * from './voice-leveling.manager';
--- a/src/app/core/services/webrtc/voice-leveling.manager.ts
+++ b/src/app/core/services/webrtc/voice-leveling.manager.ts
@@ -0,0 +1,359 @@
+/**
+ * VoiceLevelingManager — manages per-speaker automatic gain control
+ * pipelines for remote voice streams.
+ *
+ * ═══════════════════════════════════════════════════════════════════
+ *  ARCHITECTURE
+ * ═══════════════════════════════════════════════════════════════════
+ *
+ * For every remote MediaStream a dedicated processing chain is built:
+ *
+ *   Remote MediaStreamTrack
+ *       ↓
+ *   MediaStreamSource (AudioContext)
+ *       ↓
+ *   AudioWorkletNode (VoiceLevelingProcessor — per-speaker AGC)
+ *       ↓
+ *   GainNode (post fine-tuning — master volume knob)
+ *       ↓
+ *   MediaStreamDestination → leveled MediaStream
+ *
+ * Each speaker gets its own AudioWorkletNode instance so that the
+ * AGC adapts independently to each person's microphone level.
+ *
+ * A fallback mode using {@link DynamicsCompressorNode} is provided
+ * for browsers that don't support AudioWorklet or SharedArrayBuffer.
+ *
+ * ═══════════════════════════════════════════════════════════════════
+ *  DESIGN — mirrors the NoiseReductionManager pattern
+ * ═══════════════════════════════════════════════════════════════════
+ *
+ *  • `enable(peerId, rawStream)` builds the pipeline and returns a
+ *    processed stream.
+ *  • `disable(peerId)` tears down the pipeline.  The caller swaps
+ *    the Audio element's srcObject back to the raw stream.
+ *  • `disableAll()` tears down every pipeline at once.
+ *
+ * The calling component keeps a reference to the original raw stream
+ * and swaps the Audio element's `srcObject` between the raw stream
+ * and the leveled stream when the user toggles the feature — exactly
+ * like noise reduction does for the local mic.
+ *
+ * ═══════════════════════════════════════════════════════════════════
+ */
+import { WebRTCLogger } from './webrtc-logger';
+
+/* ──────────────────────────────────────────────────────────────── */
+/*  Types                                                           */
+/* ──────────────────────────────────────────────────────────────── */
+
+/** User-configurable voice leveling parameters. */
+export interface VoiceLevelingSettings {
+  /** Master on/off toggle. When false, audio passes through unchanged. */
+  enabled: boolean;
+  /** Target loudness in dBFS (−30 … −12). Default −18. */
+  targetDbfs: number;
+  /** AGC strength preset. Default 'medium'. */
+  strength: 'low' | 'medium' | 'high';
+  /** Maximum gain boost in dB (3 … 20). Default 12. */
+  maxGainDb: number;
+  /** Gain response speed preset. Default 'medium'. */
+  speed: 'slow' | 'medium' | 'fast';
+  /** Whether the silence noise gate is active. Default false. */
+  noiseGate: boolean;
+}
+
+/** Default settings used when none are explicitly provided. */
+export const DEFAULT_VOICE_LEVELING_SETTINGS: VoiceLevelingSettings = {
+  enabled: false,
+  targetDbfs: -18,
+  strength: 'medium',
+  maxGainDb: 12,
+  speed: 'medium',
+  noiseGate: false,
+};
+
+/**
+ * Internal bookkeeping for a single speaker's processing chain.
+ */
+interface SpeakerPipeline {
+  ctx: AudioContext;
+  source: MediaStreamAudioSourceNode;
+  workletNode: AudioWorkletNode | null;
+  compressorNode: DynamicsCompressorNode | null;
+  gainNode: GainNode;
+  destination: MediaStreamAudioDestinationNode;
+  originalStream: MediaStream;
+  isFallback: boolean;
+}
+
+/** AudioWorklet module path (served from public/). */
+const WORKLET_MODULE_PATH = 'voice-leveling-worklet.js';
+
+/** Processor name — must match `registerProcessor` in the worklet. */
+const WORKLET_PROCESSOR_NAME = 'VoiceLevelingProcessor';
+
+/* ──────────────────────────────────────────────────────────────── */
+/*  Manager                                                         */
+/* ──────────────────────────────────────────────────────────────── */
+
+export class VoiceLevelingManager {
+  /** Active per-speaker pipelines keyed by peer ID. */
+  private readonly pipelines = new Map<string, SpeakerPipeline>();
+
+  /** Cached DSP settings pushed to worklets. */
+  private _settings: VoiceLevelingSettings = { ...DEFAULT_VOICE_LEVELING_SETTINGS };
+
+  /** Whether the AudioWorklet module is available. */
+  private _workletAvailable: boolean | null = null;
+
+  /** Shared AudioContext (avoids browser per-page limits). */
+  private _sharedCtx: AudioContext | null = null;
+
+  /** Whether the worklet module has been loaded. */
+  private _workletLoaded = false;
+
+  constructor(private readonly logger: WebRTCLogger) {}
+
+  /* ── Public API ─────────────────────────────────────────────── */
+
+  get settings(): Readonly<VoiceLevelingSettings> {
+    return this._settings;
+  }
+
+  get activePeerIds(): string[] {
+    return Array.from(this.pipelines.keys());
+  }
+
+  get activePipelineCount(): number {
+    return this.pipelines.size;
+  }
+
+  /**
+   * Update DSP settings and propagate to all active worklets.
+   * Only provided keys are updated; the rest stay unchanged.
+   */
+  updateSettings(partial: Partial<VoiceLevelingSettings>): void {
+    this._settings = { ...this._settings, ...partial };
+    this.pipelines.forEach((p) => this._pushSettingsToPipeline(p));
+  }
+
+  /**
+   * Enable voice leveling for a single speaker.
+   *
+   * Builds the processing pipeline and returns the leveled
+   * {@link MediaStream}.  The caller sets this as the Audio
+   * element's `srcObject`.
+   *
+   * If a pipeline already exists for this peer with the **same**
+   * raw stream, the existing leveled stream is returned (no rebuild).
+   *
+   * @param peerId  Remote peer identifier.
+   * @param stream  The remote peer's raw MediaStream.
+   * @returns       The leveled MediaStream (or raw on failure).
+   */
+  async enable(peerId: string, stream: MediaStream): Promise<MediaStream> {
+    // Reuse existing pipeline if it targets the same stream
+    const existing = this.pipelines.get(peerId);
+    if (existing && existing.originalStream === stream) {
+      return existing.destination.stream;
+    }
+
+    // Tear down stale pipeline for this peer
+    if (existing) {
+      this._disposePipeline(existing);
+      this.pipelines.delete(peerId);
+    }
+
+    // No audio tracks → nothing to process
+    if (stream.getAudioTracks().length === 0) {
+      this.logger.info('VoiceLeveling: no audio tracks, skipping', { peerId });
+      return stream;
+    }
+
+    try {
+      const pipeline = await this._buildPipeline(stream);
+      this.pipelines.set(peerId, pipeline);
+      this.logger.info('VoiceLeveling: pipeline created', {
+        peerId,
+        fallback: pipeline.isFallback,
+      });
+      return pipeline.destination.stream;
+    } catch (err) {
+      this.logger.error('VoiceLeveling: pipeline build failed, returning raw stream', err);
+      return stream;
+    }
+  }
+
+  /**
+   * Disable voice leveling for a single speaker.
+   *
+   * Tears down the pipeline. The caller is responsible for swapping
+   * the Audio element's `srcObject` back to the raw stream.
+   */
+  disable(peerId: string): void {
+    const pipeline = this.pipelines.get(peerId);
+    if (!pipeline) return;
+    this._disposePipeline(pipeline);
+    this.pipelines.delete(peerId);
+    this.logger.info('VoiceLeveling: pipeline removed', { peerId });
+  }
+
+  /** Tear down ALL speaker pipelines. */
+  disableAll(): void {
+    this.pipelines.forEach((p) => this._disposePipeline(p));
+    this.pipelines.clear();
+  }
+
+  setSpeakerVolume(peerId: string, volume: number): void {
+    const pipeline = this.pipelines.get(peerId);
+    if (!pipeline) return;
+    pipeline.gainNode.gain.setValueAtTime(
+      Math.max(0, Math.min(1, volume)),
+      pipeline.ctx.currentTime,
+    );
+  }
+
+  setMasterVolume(volume: number): void {
+    const clamped = Math.max(0, Math.min(1, volume));
+    this.pipelines.forEach((pipeline) => {
+      pipeline.gainNode.gain.setValueAtTime(clamped, pipeline.ctx.currentTime);
+    });
+  }
+
+  /** Tear down all pipelines and release all resources. */
+  destroy(): void {
+    this.disableAll();
+    if (this._sharedCtx && this._sharedCtx.state !== 'closed') {
+      this._sharedCtx.close().catch(() => { /* best-effort */ });
+    }
+    this._sharedCtx = null;
+    this._workletLoaded = false;
+    this._workletAvailable = null;
+  }
+
+  /* ── Pipeline construction ──────────────────────────────────── */
+
+  private async _buildPipeline(stream: MediaStream): Promise<SpeakerPipeline> {
+    const ctx = await this._getOrCreateContext();
+
+    if (ctx.state === 'suspended') {
+      await ctx.resume();
+    }
+
+    const source = ctx.createMediaStreamSource(stream);
+    const gainNode = ctx.createGain();
+    gainNode.gain.value = 1.0;
+    const destination = ctx.createMediaStreamDestination();
+
+    const workletOk = await this._ensureWorkletLoaded(ctx);
+
+    if (workletOk) {
+      const workletNode = new AudioWorkletNode(ctx, WORKLET_PROCESSOR_NAME);
+
+      source.connect(workletNode);
+      workletNode.connect(gainNode);
+      gainNode.connect(destination);
+
+      const pipeline: SpeakerPipeline = {
+        ctx,
+        source,
+        workletNode,
+        compressorNode: null,
+        gainNode,
+        destination,
+        originalStream: stream,
+        isFallback: false,
+      };
+
+      this._pushSettingsToPipeline(pipeline);
+      return pipeline;
+    } else {
+      this.logger.warn('VoiceLeveling: AudioWorklet unavailable, using fallback compressor');
+      const compressor = this._createFallbackCompressor(ctx);
+
+      source.connect(compressor);
+      compressor.connect(gainNode);
+      gainNode.connect(destination);
+
+      return {
+        ctx,
+        source,
+        workletNode: null,
+        compressorNode: compressor,
+        gainNode,
+        destination,
+        originalStream: stream,
+        isFallback: true,
+      };
+    }
+  }
+
+  /**
+   * Get or create the shared AudioContext.
+   *
+   * Uses the system default sample rate (instead of forcing 48 kHz)
+   * to avoid resampling issues with remote WebRTC streams whose
+   * sample rate is determined by the sender's codec.
+   */
+  private async _getOrCreateContext(): Promise<AudioContext> {
+    if (this._sharedCtx && this._sharedCtx.state !== 'closed') {
+      return this._sharedCtx;
+    }
+    this._sharedCtx = new AudioContext();
+    this._workletLoaded = false;
+    return this._sharedCtx;
+  }
+
+  private async _ensureWorkletLoaded(ctx: AudioContext): Promise<boolean> {
+    if (this._workletAvailable === false) return false;
+    if (this._workletLoaded && this._workletAvailable === true) return true;
+
+    try {
+      await ctx.audioWorklet.addModule(WORKLET_MODULE_PATH);
+      this._workletLoaded = true;
+      this._workletAvailable = true;
+      this.logger.info('VoiceLeveling: worklet module loaded');
+      return true;
+    } catch (err) {
+      this.logger.error('VoiceLeveling: worklet module failed to load', err);
+      this._workletAvailable = false;
+      return false;
+    }
+  }
+
+  private _createFallbackCompressor(ctx: AudioContext): DynamicsCompressorNode {
+    const compressor = ctx.createDynamicsCompressor();
+    compressor.threshold.setValueAtTime(-24, ctx.currentTime);
+    compressor.knee.setValueAtTime(30, ctx.currentTime);
+    compressor.ratio.setValueAtTime(3, ctx.currentTime);
+    compressor.attack.setValueAtTime(0.01, ctx.currentTime);
+    compressor.release.setValueAtTime(0.25, ctx.currentTime);
+    return compressor;
+  }
+
+  /* ── Settings propagation ───────────────────────────────────── */
+
+  private _pushSettingsToPipeline(pipeline: SpeakerPipeline): void {
+    if (pipeline.workletNode) {
+      pipeline.workletNode.port.postMessage({
+        enabled: true, // Pipeline only exists when leveling is on; DSP always active
+        targetDbfs: this._settings.targetDbfs,
+        maxGainDb: this._settings.maxGainDb,
+        strength: this._settings.strength,
+        speed: this._settings.speed,
+        noiseGate: this._settings.noiseGate,
+      });
+    }
+  }
+
+  /* ── Cleanup ────────────────────────────────────────────────── */
+
+  private _disposePipeline(pipeline: SpeakerPipeline): void {
+    try { pipeline.source.disconnect(); } catch { /* already disconnected */ }
+    try { pipeline.workletNode?.disconnect(); } catch { /* ok */ }
+    try { pipeline.compressorNode?.disconnect(); } catch { /* ok */ }
+    try { pipeline.gainNode.disconnect(); } catch { /* ok */ }
+    try { pipeline.destination.disconnect(); } catch { /* ok */ }
+  }
+}