@@ -67,9 +67,65 @@ const (
6767 deessIntensityMax = 0.8 // Maximum intensity limit
6868 deessIntensityMin = 0.3 // Minimum before disabling
6969
70- // Gate threshold safety bounds (applied after data-driven calculation)
71- gateThresholdMinDB = - 70.0 // dB - professional studio floor
72- gateThresholdMaxDB = - 25.0 // dB - never gate above this (would cut speech)
70+ // Gate tuning constants
71+ // Threshold calculation: sits above noise/bleed peaks, below quiet speech
72+ gateThresholdMinDB = - 70.0 // dB - professional studio floor
73+ gateThresholdMaxDB = - 25.0 // dB - never gate above this (would cut speech)
74+ gateCrestFactorThreshold = 20.0 // dB - above this, use peak reference instead of RMS
75+ gateHeadroomClean = 3.0 // dB - headroom above reference for clean recordings
76+ gateHeadroomModerate = 6.0 // dB - headroom for moderate noise
77+ gateHeadroomNoisy = 10.0 // dB - headroom for noisy recordings
78+
79+ // Ratio: based on LRA (loudness range)
80+ gateLRAWide = 15.0 // LU - above: wide dynamics, gentle ratio
81+ gateLRAModerate = 10.0 // LU - above: moderate dynamics
82+ gateRatioGentle = 1.5 // For wide LRA (preserve expression)
83+ gateRatioMod = 2.0 // For moderate LRA
84+ gateRatioTight = 2.5 // For narrow LRA (tighter control OK)
85+
86+ // Attack: based on MaxDifference (transient indicator)
87+ // Fast transients need fast attack to avoid clipping word onsets
88+ gateMaxDiffHigh = 25.0 // % - sharp transients
89+ gateMaxDiffMod = 10.0 // % - moderate transients
90+ gateAttackFast = 7.0 // ms - for sharp transients
91+ gateAttackMod = 12.0 // ms - standard speech
92+ gateAttackSlow = 17.0 // ms - soft onsets
93+ gateFluxDynamicThres = 0.05 // SpectralFlux threshold for dynamic content
94+
95+ // Release: based on flux, ZCR, and noise character
96+ // No hold parameter exists - release must compensate
97+ gateFluxLow = 0.01 // Low flux threshold
98+ gateZCRLow = 0.08 // Low zero crossings rate
99+ gateFluxHigh = 0.05 // High flux threshold
100+ gateReleaseSustained = 400 // ms - for sustained speech
101+ gateReleaseMod = 300 // ms - standard
102+ gateReleaseDynamic = 200 // ms - for dynamic content
103+ gateReleaseHoldComp = 50 // ms - compensation for lack of hold parameter
104+ gateReleaseTonalComp = 75 // ms - extra for tonal bleed (hide pump)
105+ gateReleaseMin = 150 // ms - minimum release
106+ gateReleaseMax = 500 // ms - maximum release
107+
108+ // Range: based on silence entropy and noise floor
109+ // Tonal noise sounds worse when hard-gated - gentler range hides pumping
110+ gateEntropyTonal = 0.3 // Below: tonal noise (bleed/hum)
111+ gateEntropyMixed = 0.6 // Below: mixed noise
112+ gateRangeTonalDB = - 16 // dB - gentle for tonal noise
113+ gateRangeMixedDB = - 21 // dB - moderate for mixed
114+ gateRangeBroadbandDB = - 27 // dB - aggressive for broadband
115+ gateRangeCleanBoost = - 6 // dB - extra depth for very clean
116+ gateRangeMinDB = - 36 // dB - minimum (deepest)
117+ gateRangeMaxDB = - 12 // dB - maximum (gentlest)
118+
119+ // Knee: based on spectral crest
120+ gateSpectralCrestHigh = 35.0 // High crest threshold
121+ gateSpectralCrestMod = 20.0 // Moderate crest threshold
122+ gateKneeSoft = 5.0 // For dynamic content with prominent peaks
123+ gateKneeMod = 3.0 // Standard
124+ gateKneeSharp = 2.0 // For less dynamic content
125+
126+ // Detection: based on silence entropy and crest factor
127+ gateSilenceCrestThreshold = 25.0 // dB - above: use RMS (noise has spikes)
128+ gateEntropyClean = 0.7 // Above: can use peak detection
73129
74130 // Noise floor quality thresholds
75131 noiseFloorClean = - 60.0 // dBFS - very clean recording
@@ -584,31 +640,231 @@ func tuneDeesserCentroidOnly(config *FilterChainConfig, measurements *AudioMeasu
584640 }
585641}
586642
587- // tuneGateThreshold adapts noise gate based on pre-calculated threshold from Pass 1.
643+ // tuneGate adapts all noise gate parameters based on Pass 1 measurements .
588644//
589- // The SuggestedGateThreshold is calculated during analysis using actual measurements:
590- // - Noise floor (measured from silence regions or RMS trough)
591- // - Quiet speech level (RMS trough - quietest segments with speech)
592- // - The threshold is placed adaptively between noise and quiet speech
593- //
594- // This function applies safety bounds for extreme cases.
595- func tuneGateThreshold (config * FilterChainConfig , measurements * AudioMeasurements ) {
596- // Use the data-driven threshold calculated during Pass 1 analysis
597- // SuggestedGateThreshold is already in linear amplitude
598- if measurements .SuggestedGateThreshold > 0 {
599- config .GateThreshold = measurements .SuggestedGateThreshold
645+ // Parameters are tuned as follows:
646+ // - Threshold: above silence peak (if crest > 20dB) or noise floor, with headroom
647+ // - Ratio: based on LRA (wide dynamics = gentle ratio)
648+ // - Attack: based on MaxDifference (fast transients = fast attack to avoid clipping onsets)
649+ // - Release: based on flux/ZCR + hold compensation (no hold param in agate)
650+ // - Range: based on silence entropy (tonal noise = gentle range to hide pumping)
651+ // - Knee: based on spectral crest (dynamic content = soft knee)
652+ // - Detection: RMS for tonal bleed/noisy silence, peak for clean recordings
653+ // - Makeup: 1.0 (loudness normalisation handles level compensation)
654+ func tuneGate (config * FilterChainConfig , measurements * AudioMeasurements ) {
655+ // Determine if we have tonal noise (likely bleed/hum)
656+ var tonalNoise bool
657+ var silenceEntropy , silenceCrest , silencePeak float64
658+
659+ if measurements .NoiseProfile != nil {
660+ silenceEntropy = measurements .NoiseProfile .Entropy
661+ silenceCrest = measurements .NoiseProfile .CrestFactor
662+ silencePeak = measurements .NoiseProfile .PeakLevel
663+ tonalNoise = silenceEntropy < gateEntropyTonal
664+ }
665+
666+ // 1. Threshold: sits above noise/bleed peaks, below quiet speech
667+ config .GateThreshold = calculateGateThreshold (
668+ measurements .NoiseFloor ,
669+ silencePeak ,
670+ silenceCrest ,
671+ )
672+
673+ // 2. Ratio: based on LRA (loudness range)
674+ config .GateRatio = calculateGateRatio (measurements .InputLRA )
675+
676+ // 3. Attack: based on MaxDifference (transient indicator)
677+ config .GateAttack = calculateGateAttack (
678+ measurements .MaxDifference ,
679+ measurements .SpectralFlux ,
680+ )
681+
682+ // 4. Release: based on flux, ZCR, and noise character
683+ config .GateRelease = calculateGateRelease (
684+ measurements .SpectralFlux ,
685+ measurements .ZeroCrossingsRate ,
686+ tonalNoise ,
687+ )
688+
689+ // 5. Range: based on silence entropy and noise floor
690+ config .GateRange = calculateGateRange (
691+ silenceEntropy ,
692+ measurements .NoiseFloor ,
693+ )
694+
695+ // 6. Knee: based on spectral crest
696+ config .GateKnee = calculateGateKnee (measurements .SpectralCrest )
697+
698+ // 7. Detection: RMS for bleed, peak for clean
699+ config .GateDetection = calculateGateDetection (silenceEntropy , silenceCrest )
700+
701+ // 8. Makeup: 1.0 (loudness normalisation handles it)
702+ config .GateMakeup = 1.0
703+ }
704+
705+ // calculateGateThreshold determines the gate threshold based on noise characteristics.
706+ // When silence has high crest factor (transient spikes), use peak as reference.
707+ // Otherwise use noise floor. Add headroom based on noise severity.
708+ func calculateGateThreshold (noiseFloorDB , silencePeakDB , silenceCrestDB float64 ) float64 {
709+ var referenceDB float64
710+
711+ // Determine reference level based on crest factor
712+ if silenceCrestDB > gateCrestFactorThreshold && silencePeakDB != 0 {
713+ // Noise has transients (e.g., bleed) - use peak as reference
714+ referenceDB = silencePeakDB
600715 } else {
601- // Fallback if SuggestedGateThreshold not available (shouldn't happen)
602- // Use a conservative threshold: noise floor + 6dB
603- gateThresholdDB := measurements .NoiseFloor + 6.0
604- config .GateThreshold = dbToLinear (gateThresholdDB )
716+ // Stable noise - use floor
717+ referenceDB = noiseFloorDB
718+ }
719+
720+ // Determine headroom based on reference level (higher = more noisy = more headroom)
721+ var headroomDB float64
722+ switch {
723+ case referenceDB < - 70 :
724+ // Very clean - tight threshold safe
725+ headroomDB = gateHeadroomClean
726+ case referenceDB < - 50 :
727+ // Moderate - standard headroom
728+ headroomDB = gateHeadroomModerate
729+ default :
730+ // Noisy - generous headroom to avoid cutting quiet speech
731+ headroomDB = gateHeadroomNoisy
732+ }
733+
734+ thresholdDB := referenceDB + headroomDB
735+
736+ // Safety limits
737+ thresholdDB = clamp (thresholdDB , gateThresholdMinDB , gateThresholdMaxDB )
738+
739+ return dbToLinear (thresholdDB )
740+ }
741+
742+ // calculateGateRatio determines ratio based on LRA (loudness range).
743+ // Wide dynamics = gentle ratio to preserve expression.
744+ func calculateGateRatio (lra float64 ) float64 {
745+ switch {
746+ case lra > gateLRAWide :
747+ return gateRatioGentle // Wide dynamics - preserve expression
748+ case lra > gateLRAModerate :
749+ return gateRatioMod // Moderate dynamics
750+ default :
751+ return gateRatioTight // Narrow dynamics - tighter control OK
752+ }
753+ }
754+
755+ // calculateGateAttack determines attack time based on transient characteristics.
756+ // Fast transients need fast attack to avoid clipping word onsets.
757+ // MaxDifference is expressed as a fraction (0.0-1.0), convert to percentage.
758+ func calculateGateAttack (maxDiff , spectralFlux float64 ) float64 {
759+ // MaxDifference is 0.0-1.0 fraction, convert to percentage for comparison
760+ maxDiffPercent := maxDiff * 100.0
761+
762+ var baseAttack float64
763+ switch {
764+ case maxDiffPercent > gateMaxDiffHigh :
765+ baseAttack = gateAttackFast // Sharp transients - fast opening
766+ case maxDiffPercent > gateMaxDiffMod :
767+ baseAttack = gateAttackMod // Standard speech
768+ default :
769+ baseAttack = gateAttackSlow // Soft onsets - gentler OK
770+ }
771+
772+ // Bias faster for dynamic content
773+ if spectralFlux > gateFluxDynamicThres {
774+ baseAttack *= 0.8
775+ }
776+
777+ return clamp (baseAttack , 5.0 , 25.0 )
778+ }
779+
780+ // calculateGateRelease determines release time based on content and noise character.
781+ // Compensates for lack of hold parameter by extending release.
782+ // Tonal bleed needs slower release to hide the pumping artifact.
783+ func calculateGateRelease (spectralFlux , zcr float64 , tonalNoise bool ) float64 {
784+ var baseRelease float64
785+
786+ switch {
787+ case spectralFlux < gateFluxLow && zcr < gateZCRLow :
788+ // Sustained speech with low activity
789+ baseRelease = gateReleaseSustained
790+ case spectralFlux > gateFluxHigh :
791+ // Dynamic content - more responsive
792+ baseRelease = gateReleaseDynamic
793+ default :
794+ baseRelease = gateReleaseMod
795+ }
796+
797+ // Compensate for lack of hold parameter
798+ baseRelease += gateReleaseHoldComp
799+
800+ // Tonal bleed needs slower release to hide pumping
801+ if tonalNoise {
802+ baseRelease += gateReleaseTonalComp
605803 }
606804
607- // Safety limits for extreme cases
608- minThresholdLinear := dbToLinear (gateThresholdMinDB )
609- maxThresholdLinear := dbToLinear (gateThresholdMaxDB )
805+ return clamp (baseRelease , float64 (gateReleaseMin ), float64 (gateReleaseMax ))
806+ }
807+
808+ // calculateGateRange determines maximum attenuation depth based on noise character.
809+ // Tonal noise (bleed, hum) sounds worse when hard-gated - use gentler range.
810+ // Broadband noise can be gated more aggressively.
811+ func calculateGateRange (silenceEntropy , noiseFloorDB float64 ) float64 {
812+ var rangeDB float64
813+
814+ switch {
815+ case silenceEntropy < gateEntropyTonal :
816+ rangeDB = gateRangeTonalDB // Tonal - gentle
817+ case silenceEntropy < gateEntropyMixed :
818+ rangeDB = gateRangeMixedDB // Mixed - moderate
819+ default :
820+ rangeDB = gateRangeBroadbandDB // Broadband - aggressive
821+ }
822+
823+ // Can go deeper if very clean recording
824+ if noiseFloorDB < - 70 {
825+ rangeDB += gateRangeCleanBoost // More negative = deeper
826+ }
610827
611- config .GateThreshold = clamp (config .GateThreshold , minThresholdLinear , maxThresholdLinear )
828+ rangeDB = clamp (rangeDB , float64 (gateRangeMinDB ), float64 (gateRangeMaxDB ))
829+
830+ return dbToLinear (rangeDB )
831+ }
832+
833+ // calculateGateKnee determines knee softness based on spectral crest.
834+ // Dynamic content with prominent peaks benefits from softer knee.
835+ func calculateGateKnee (spectralCrest float64 ) float64 {
836+ switch {
837+ case spectralCrest > gateSpectralCrestHigh :
838+ return gateKneeSoft // Dynamic - soft engagement
839+ case spectralCrest > gateSpectralCrestMod :
840+ return gateKneeMod // Standard
841+ default :
842+ return gateKneeSharp // Less dynamic - sharper OK
843+ }
844+ }
845+
846+ // calculateGateDetection determines whether to use RMS or peak detection.
847+ // RMS is safer for speech and handles tonal bleed better.
848+ // Peak provides tighter tracking for very clean recordings.
849+ func calculateGateDetection (silenceEntropy , silenceCrestDB float64 ) string {
850+ // Tonal noise or high crest in silence - use RMS
851+ if silenceEntropy < gateEntropyTonal || silenceCrestDB > gateSilenceCrestThreshold {
852+ return "rms"
853+ }
854+
855+ // Very clean with low crest - can use peak for tighter tracking
856+ if silenceEntropy > gateEntropyClean && silenceCrestDB < 15 {
857+ return "peak"
858+ }
859+
860+ // Default: RMS is safer for speech
861+ return "rms"
862+ }
863+
864+ // tuneGateThreshold is deprecated - use tuneGate instead.
865+ // Kept for backwards compatibility during transition.
866+ func tuneGateThreshold (config * FilterChainConfig , measurements * AudioMeasurements ) {
867+ tuneGate (config , measurements )
612868}
613869
614870// tuneCompression adapts dynamics processing based on:
0 commit comments