Voice Leveling (untested)

This commit is contained in:
2026-03-03 03:41:59 +01:00
parent cf91d77502
commit 8315df42fc
9 changed files with 1313 additions and 19 deletions

View File

@@ -12,3 +12,4 @@ export * from './peer-connection.manager';
export * from './media.manager';
export * from './screen-share.manager';
export * from './noise-reduction.manager';
export * from './voice-leveling.manager';

View File

@@ -0,0 +1,359 @@
/**
* VoiceLevelingManager — manages per-speaker automatic gain control
* pipelines for remote voice streams.
*
* ═══════════════════════════════════════════════════════════════════
* ARCHITECTURE
* ═══════════════════════════════════════════════════════════════════
*
* For every remote MediaStream a dedicated processing chain is built:
*
* Remote MediaStreamTrack
* ↓
* MediaStreamSource (AudioContext)
* ↓
* AudioWorkletNode (VoiceLevelingProcessor — per-speaker AGC)
* ↓
* GainNode (post fine-tuning — master volume knob)
* ↓
* MediaStreamDestination → leveled MediaStream
*
* Each speaker gets its own AudioWorkletNode instance so that the
* AGC adapts independently to each person's microphone level.
*
* A fallback mode using {@link DynamicsCompressorNode} is provided
* for browsers that don't support AudioWorklet or SharedArrayBuffer.
*
* ═══════════════════════════════════════════════════════════════════
* DESIGN — mirrors the NoiseReductionManager pattern
* ═══════════════════════════════════════════════════════════════════
*
* • `enable(peerId, rawStream)` builds the pipeline and returns a
* processed stream.
* • `disable(peerId)` tears down the pipeline. The caller swaps
* the Audio element's srcObject back to the raw stream.
* • `disableAll()` tears down every pipeline at once.
*
* The calling component keeps a reference to the original raw stream
* and swaps the Audio element's `srcObject` between the raw stream
* and the leveled stream when the user toggles the feature — exactly
* like noise reduction does for the local mic.
*
* ═══════════════════════════════════════════════════════════════════
*/
import { WebRTCLogger } from './webrtc-logger';
/* ──────────────────────────────────────────────────────────────── */
/* Types */
/* ──────────────────────────────────────────────────────────────── */
/** User-configurable voice leveling parameters. */
export interface VoiceLevelingSettings {
/** Master on/off toggle. When false, audio passes through unchanged. */
enabled: boolean;
/** Target loudness in dBFS (30 … 12). Default 18. */
targetDbfs: number;
/** AGC strength preset. Default 'medium'. */
strength: 'low' | 'medium' | 'high';
/** Maximum gain boost in dB (3 … 20). Default 12. */
maxGainDb: number;
/** Gain response speed preset. Default 'medium'. */
speed: 'slow' | 'medium' | 'fast';
/** Whether the silence noise gate is active. Default false. */
noiseGate: boolean;
}
/** Default settings used when none are explicitly provided. */
export const DEFAULT_VOICE_LEVELING_SETTINGS: VoiceLevelingSettings = {
enabled: false,
targetDbfs: -18,
strength: 'medium',
maxGainDb: 12,
speed: 'medium',
noiseGate: false,
};
/**
* Internal bookkeeping for a single speaker's processing chain.
*/
interface SpeakerPipeline {
ctx: AudioContext;
source: MediaStreamAudioSourceNode;
workletNode: AudioWorkletNode | null;
compressorNode: DynamicsCompressorNode | null;
gainNode: GainNode;
destination: MediaStreamAudioDestinationNode;
originalStream: MediaStream;
isFallback: boolean;
}
/** AudioWorklet module path (served from public/). */
const WORKLET_MODULE_PATH = 'voice-leveling-worklet.js';
/** Processor name — must match `registerProcessor` in the worklet. */
const WORKLET_PROCESSOR_NAME = 'VoiceLevelingProcessor';
/* ──────────────────────────────────────────────────────────────── */
/* Manager */
/* ──────────────────────────────────────────────────────────────── */
export class VoiceLevelingManager {
/** Active per-speaker pipelines keyed by peer ID. */
private readonly pipelines = new Map<string, SpeakerPipeline>();
/** Cached DSP settings pushed to worklets. */
private _settings: VoiceLevelingSettings = { ...DEFAULT_VOICE_LEVELING_SETTINGS };
/** Whether the AudioWorklet module is available. */
private _workletAvailable: boolean | null = null;
/** Shared AudioContext (avoids browser per-page limits). */
private _sharedCtx: AudioContext | null = null;
/** Whether the worklet module has been loaded. */
private _workletLoaded = false;
constructor(private readonly logger: WebRTCLogger) {}
/* ── Public API ─────────────────────────────────────────────── */
get settings(): Readonly<VoiceLevelingSettings> {
return this._settings;
}
get activePeerIds(): string[] {
return Array.from(this.pipelines.keys());
}
get activePipelineCount(): number {
return this.pipelines.size;
}
/**
* Update DSP settings and propagate to all active worklets.
* Only provided keys are updated; the rest stay unchanged.
*/
updateSettings(partial: Partial<VoiceLevelingSettings>): void {
this._settings = { ...this._settings, ...partial };
this.pipelines.forEach((p) => this._pushSettingsToPipeline(p));
}
/**
* Enable voice leveling for a single speaker.
*
* Builds the processing pipeline and returns the leveled
* {@link MediaStream}. The caller sets this as the Audio
* element's `srcObject`.
*
* If a pipeline already exists for this peer with the **same**
* raw stream, the existing leveled stream is returned (no rebuild).
*
* @param peerId Remote peer identifier.
* @param stream The remote peer's raw MediaStream.
* @returns The leveled MediaStream (or raw on failure).
*/
async enable(peerId: string, stream: MediaStream): Promise<MediaStream> {
// Reuse existing pipeline if it targets the same stream
const existing = this.pipelines.get(peerId);
if (existing && existing.originalStream === stream) {
return existing.destination.stream;
}
// Tear down stale pipeline for this peer
if (existing) {
this._disposePipeline(existing);
this.pipelines.delete(peerId);
}
// No audio tracks → nothing to process
if (stream.getAudioTracks().length === 0) {
this.logger.info('VoiceLeveling: no audio tracks, skipping', { peerId });
return stream;
}
try {
const pipeline = await this._buildPipeline(stream);
this.pipelines.set(peerId, pipeline);
this.logger.info('VoiceLeveling: pipeline created', {
peerId,
fallback: pipeline.isFallback,
});
return pipeline.destination.stream;
} catch (err) {
this.logger.error('VoiceLeveling: pipeline build failed, returning raw stream', err);
return stream;
}
}
/**
* Disable voice leveling for a single speaker.
*
* Tears down the pipeline. The caller is responsible for swapping
* the Audio element's `srcObject` back to the raw stream.
*/
disable(peerId: string): void {
const pipeline = this.pipelines.get(peerId);
if (!pipeline) return;
this._disposePipeline(pipeline);
this.pipelines.delete(peerId);
this.logger.info('VoiceLeveling: pipeline removed', { peerId });
}
/** Tear down ALL speaker pipelines. */
disableAll(): void {
this.pipelines.forEach((p) => this._disposePipeline(p));
this.pipelines.clear();
}
setSpeakerVolume(peerId: string, volume: number): void {
const pipeline = this.pipelines.get(peerId);
if (!pipeline) return;
pipeline.gainNode.gain.setValueAtTime(
Math.max(0, Math.min(1, volume)),
pipeline.ctx.currentTime,
);
}
setMasterVolume(volume: number): void {
const clamped = Math.max(0, Math.min(1, volume));
this.pipelines.forEach((pipeline) => {
pipeline.gainNode.gain.setValueAtTime(clamped, pipeline.ctx.currentTime);
});
}
/** Tear down all pipelines and release all resources. */
destroy(): void {
this.disableAll();
if (this._sharedCtx && this._sharedCtx.state !== 'closed') {
this._sharedCtx.close().catch(() => { /* best-effort */ });
}
this._sharedCtx = null;
this._workletLoaded = false;
this._workletAvailable = null;
}
/* ── Pipeline construction ──────────────────────────────────── */
private async _buildPipeline(stream: MediaStream): Promise<SpeakerPipeline> {
const ctx = await this._getOrCreateContext();
if (ctx.state === 'suspended') {
await ctx.resume();
}
const source = ctx.createMediaStreamSource(stream);
const gainNode = ctx.createGain();
gainNode.gain.value = 1.0;
const destination = ctx.createMediaStreamDestination();
const workletOk = await this._ensureWorkletLoaded(ctx);
if (workletOk) {
const workletNode = new AudioWorkletNode(ctx, WORKLET_PROCESSOR_NAME);
source.connect(workletNode);
workletNode.connect(gainNode);
gainNode.connect(destination);
const pipeline: SpeakerPipeline = {
ctx,
source,
workletNode,
compressorNode: null,
gainNode,
destination,
originalStream: stream,
isFallback: false,
};
this._pushSettingsToPipeline(pipeline);
return pipeline;
} else {
this.logger.warn('VoiceLeveling: AudioWorklet unavailable, using fallback compressor');
const compressor = this._createFallbackCompressor(ctx);
source.connect(compressor);
compressor.connect(gainNode);
gainNode.connect(destination);
return {
ctx,
source,
workletNode: null,
compressorNode: compressor,
gainNode,
destination,
originalStream: stream,
isFallback: true,
};
}
}
/**
* Get or create the shared AudioContext.
*
* Uses the system default sample rate (instead of forcing 48 kHz)
* to avoid resampling issues with remote WebRTC streams whose
* sample rate is determined by the sender's codec.
*/
private async _getOrCreateContext(): Promise<AudioContext> {
if (this._sharedCtx && this._sharedCtx.state !== 'closed') {
return this._sharedCtx;
}
this._sharedCtx = new AudioContext();
this._workletLoaded = false;
return this._sharedCtx;
}
private async _ensureWorkletLoaded(ctx: AudioContext): Promise<boolean> {
if (this._workletAvailable === false) return false;
if (this._workletLoaded && this._workletAvailable === true) return true;
try {
await ctx.audioWorklet.addModule(WORKLET_MODULE_PATH);
this._workletLoaded = true;
this._workletAvailable = true;
this.logger.info('VoiceLeveling: worklet module loaded');
return true;
} catch (err) {
this.logger.error('VoiceLeveling: worklet module failed to load', err);
this._workletAvailable = false;
return false;
}
}
private _createFallbackCompressor(ctx: AudioContext): DynamicsCompressorNode {
const compressor = ctx.createDynamicsCompressor();
compressor.threshold.setValueAtTime(-24, ctx.currentTime);
compressor.knee.setValueAtTime(30, ctx.currentTime);
compressor.ratio.setValueAtTime(3, ctx.currentTime);
compressor.attack.setValueAtTime(0.01, ctx.currentTime);
compressor.release.setValueAtTime(0.25, ctx.currentTime);
return compressor;
}
/* ── Settings propagation ───────────────────────────────────── */
private _pushSettingsToPipeline(pipeline: SpeakerPipeline): void {
if (pipeline.workletNode) {
pipeline.workletNode.port.postMessage({
enabled: true, // Pipeline only exists when leveling is on; DSP always active
targetDbfs: this._settings.targetDbfs,
maxGainDb: this._settings.maxGainDb,
strength: this._settings.strength,
speed: this._settings.speed,
noiseGate: this._settings.noiseGate,
});
}
}
/* ── Cleanup ────────────────────────────────────────────────── */
private _disposePipeline(pipeline: SpeakerPipeline): void {
try { pipeline.source.disconnect(); } catch { /* already disconnected */ }
try { pipeline.workletNode?.disconnect(); } catch { /* ok */ }
try { pipeline.compressorNode?.disconnect(); } catch { /* ok */ }
try { pipeline.gainNode.disconnect(); } catch { /* ok */ }
try { pipeline.destination.disconnect(); } catch { /* ok */ }
}
}