Skip to content

Commit 4c5ef32

Browse files
committed
feat(processor): implement comprehensive adaptive gate tuning
- Add GateDetection field to FilterChainConfig for RMS/peak selection - Implement tuneGate() with 8 parameter calculation functions: - calculateGateThreshold: uses silence peak (high crest) or floor + headroom - calculateGateRatio: 1.5-2.5 based on LRA (preserves expression) - calculateGateAttack: 5-17ms based on MaxDifference (protects transients) - calculateGateRelease: 150-500ms with hold/tonal compensation - calculateGateRange: -16 to -27dB based on silence entropy - calculateGateKnee: 2-5 based on spectral crest - calculateGateDetection: RMS for tonal/bleed, peak for clean - Add ~60 gate tuning constants for data-driven adaptation - Update buildAgateFilter() with configurable detection and higher precision - Enhance formatAgateFilter() to show detection mode and measurement rationale - Replace TestTuneGateThreshold with comprehensive TestTuneGate - Enable gate by default in DefaultFilterConfig Achieves 15-26dB noise floor reduction in silence gaps while preserving speech dynamics through measurement-driven parameter selection.
1 parent 2f0fb9b commit 4c5ef32

6 files changed

Lines changed: 598 additions & 157 deletions

File tree

internal/logging/report.go

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -853,13 +853,47 @@ func formatAgateFilter(f *os.File, cfg *processor.FilterChainConfig, m *processo
853853
thresholdDB := linearToDb(cfg.GateThreshold)
854854
rangeDB := linearToDb(cfg.GateRange)
855855

856-
fmt.Fprintf(f, "%sagate: threshold %.1f dB, ratio %.1f:1\n", prefix, thresholdDB, cfg.GateRatio)
856+
detection := cfg.GateDetection
857+
if detection == "" {
858+
detection = "rms"
859+
}
860+
861+
fmt.Fprintf(f, "%sagate: threshold %.1f dB, ratio %.1f:1, detection %s\n", prefix, thresholdDB, cfg.GateRatio, detection)
857862
fmt.Fprintf(f, " Timing: attack %.0fms, release %.0fms\n", cfg.GateAttack, cfg.GateRelease)
858863
fmt.Fprintf(f, " Range: %.1f dB reduction, knee %.1f\n", rangeDB, cfg.GateKnee)
859864

860-
// Show rationale
865+
// Show rationale based on measurements
861866
if m != nil {
862-
fmt.Fprintf(f, " Rationale: noise floor %.1f dB + margin\n", m.NoiseFloor)
867+
var rationale []string
868+
869+
// Threshold rationale
870+
if m.NoiseProfile != nil && m.NoiseProfile.CrestFactor > 20 {
871+
rationale = append(rationale, fmt.Sprintf("peak ref %.1f dB (crest %.1f dB)", m.NoiseProfile.PeakLevel, m.NoiseProfile.CrestFactor))
872+
} else {
873+
rationale = append(rationale, fmt.Sprintf("noise floor %.1f dB", m.NoiseFloor))
874+
}
875+
876+
// Ratio rationale
877+
if m.InputLRA > 0 {
878+
lraType := "moderate"
879+
if m.InputLRA > 15 {
880+
lraType = "wide"
881+
} else if m.InputLRA < 10 {
882+
lraType = "narrow"
883+
}
884+
rationale = append(rationale, fmt.Sprintf("LRA %.1f LU (%s)", m.InputLRA, lraType))
885+
}
886+
887+
// Noise character for range/detection
888+
if m.NoiseProfile != nil {
889+
if m.NoiseProfile.Entropy < 0.3 {
890+
rationale = append(rationale, "tonal noise detected")
891+
}
892+
}
893+
894+
if len(rationale) > 0 {
895+
fmt.Fprintf(f, " Rationale: %s\n", strings.Join(rationale, ", "))
896+
}
863897
}
864898
}
865899

internal/processor/adaptive.go

Lines changed: 279 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,65 @@ const (
6767
deessIntensityMax = 0.8 // Maximum intensity limit
6868
deessIntensityMin = 0.3 // Minimum before disabling
6969

70-
// Gate threshold safety bounds (applied after data-driven calculation)
71-
gateThresholdMinDB = -70.0 // dB - professional studio floor
72-
gateThresholdMaxDB = -25.0 // dB - never gate above this (would cut speech)
70+
// Gate tuning constants
71+
// Threshold calculation: sits above noise/bleed peaks, below quiet speech
72+
gateThresholdMinDB = -70.0 // dB - professional studio floor
73+
gateThresholdMaxDB = -25.0 // dB - never gate above this (would cut speech)
74+
gateCrestFactorThreshold = 20.0 // dB - above this, use peak reference instead of RMS
75+
gateHeadroomClean = 3.0 // dB - headroom above reference for clean recordings
76+
gateHeadroomModerate = 6.0 // dB - headroom for moderate noise
77+
gateHeadroomNoisy = 10.0 // dB - headroom for noisy recordings
78+
79+
// Ratio: based on LRA (loudness range)
80+
gateLRAWide = 15.0 // LU - above: wide dynamics, gentle ratio
81+
gateLRAModerate = 10.0 // LU - above: moderate dynamics
82+
gateRatioGentle = 1.5 // For wide LRA (preserve expression)
83+
gateRatioMod = 2.0 // For moderate LRA
84+
gateRatioTight = 2.5 // For narrow LRA (tighter control OK)
85+
86+
// Attack: based on MaxDifference (transient indicator)
87+
// Fast transients need fast attack to avoid clipping word onsets
88+
gateMaxDiffHigh = 25.0 // % - sharp transients
89+
gateMaxDiffMod = 10.0 // % - moderate transients
90+
gateAttackFast = 7.0 // ms - for sharp transients
91+
gateAttackMod = 12.0 // ms - standard speech
92+
gateAttackSlow = 17.0 // ms - soft onsets
93+
gateFluxDynamicThres = 0.05 // SpectralFlux threshold for dynamic content
94+
95+
// Release: based on flux, ZCR, and noise character
96+
// No hold parameter exists - release must compensate
97+
gateFluxLow = 0.01 // Low flux threshold
98+
gateZCRLow = 0.08 // Low zero crossings rate
99+
gateFluxHigh = 0.05 // High flux threshold
100+
gateReleaseSustained = 400 // ms - for sustained speech
101+
gateReleaseMod = 300 // ms - standard
102+
gateReleaseDynamic = 200 // ms - for dynamic content
103+
gateReleaseHoldComp = 50 // ms - compensation for lack of hold parameter
104+
gateReleaseTonalComp = 75 // ms - extra for tonal bleed (hide pump)
105+
gateReleaseMin = 150 // ms - minimum release
106+
gateReleaseMax = 500 // ms - maximum release
107+
108+
// Range: based on silence entropy and noise floor
109+
// Tonal noise sounds worse when hard-gated - gentler range hides pumping
110+
gateEntropyTonal = 0.3 // Below: tonal noise (bleed/hum)
111+
gateEntropyMixed = 0.6 // Below: mixed noise
112+
gateRangeTonalDB = -16 // dB - gentle for tonal noise
113+
gateRangeMixedDB = -21 // dB - moderate for mixed
114+
gateRangeBroadbandDB = -27 // dB - aggressive for broadband
115+
gateRangeCleanBoost = -6 // dB - extra depth for very clean
116+
gateRangeMinDB = -36 // dB - minimum (deepest)
117+
gateRangeMaxDB = -12 // dB - maximum (gentlest)
118+
119+
// Knee: based on spectral crest
120+
gateSpectralCrestHigh = 35.0 // High crest threshold
121+
gateSpectralCrestMod = 20.0 // Moderate crest threshold
122+
gateKneeSoft = 5.0 // For dynamic content with prominent peaks
123+
gateKneeMod = 3.0 // Standard
124+
gateKneeSharp = 2.0 // For less dynamic content
125+
126+
// Detection: based on silence entropy and crest factor
127+
gateSilenceCrestThreshold = 25.0 // dB - above: use RMS (noise has spikes)
128+
gateEntropyClean = 0.7 // Above: can use peak detection
73129

74130
// Noise floor quality thresholds
75131
noiseFloorClean = -60.0 // dBFS - very clean recording
@@ -584,31 +640,231 @@ func tuneDeesserCentroidOnly(config *FilterChainConfig, measurements *AudioMeasu
584640
}
585641
}
586642

587-
// tuneGateThreshold adapts noise gate based on pre-calculated threshold from Pass 1.
643+
// tuneGate adapts all noise gate parameters based on Pass 1 measurements.
588644
//
589-
// The SuggestedGateThreshold is calculated during analysis using actual measurements:
590-
// - Noise floor (measured from silence regions or RMS trough)
591-
// - Quiet speech level (RMS trough - quietest segments with speech)
592-
// - The threshold is placed adaptively between noise and quiet speech
593-
//
594-
// This function applies safety bounds for extreme cases.
595-
func tuneGateThreshold(config *FilterChainConfig, measurements *AudioMeasurements) {
596-
// Use the data-driven threshold calculated during Pass 1 analysis
597-
// SuggestedGateThreshold is already in linear amplitude
598-
if measurements.SuggestedGateThreshold > 0 {
599-
config.GateThreshold = measurements.SuggestedGateThreshold
645+
// Parameters are tuned as follows:
646+
// - Threshold: above silence peak (if crest > 20dB) or noise floor, with headroom
647+
// - Ratio: based on LRA (wide dynamics = gentle ratio)
648+
// - Attack: based on MaxDifference (fast transients = fast attack to avoid clipping onsets)
649+
// - Release: based on flux/ZCR + hold compensation (no hold param in agate)
650+
// - Range: based on silence entropy (tonal noise = gentle range to hide pumping)
651+
// - Knee: based on spectral crest (dynamic content = soft knee)
652+
// - Detection: RMS for tonal bleed/noisy silence, peak for clean recordings
653+
// - Makeup: 1.0 (loudness normalisation handles level compensation)
654+
func tuneGate(config *FilterChainConfig, measurements *AudioMeasurements) {
655+
// Determine if we have tonal noise (likely bleed/hum)
656+
var tonalNoise bool
657+
var silenceEntropy, silenceCrest, silencePeak float64
658+
659+
if measurements.NoiseProfile != nil {
660+
silenceEntropy = measurements.NoiseProfile.Entropy
661+
silenceCrest = measurements.NoiseProfile.CrestFactor
662+
silencePeak = measurements.NoiseProfile.PeakLevel
663+
tonalNoise = silenceEntropy < gateEntropyTonal
664+
}
665+
666+
// 1. Threshold: sits above noise/bleed peaks, below quiet speech
667+
config.GateThreshold = calculateGateThreshold(
668+
measurements.NoiseFloor,
669+
silencePeak,
670+
silenceCrest,
671+
)
672+
673+
// 2. Ratio: based on LRA (loudness range)
674+
config.GateRatio = calculateGateRatio(measurements.InputLRA)
675+
676+
// 3. Attack: based on MaxDifference (transient indicator)
677+
config.GateAttack = calculateGateAttack(
678+
measurements.MaxDifference,
679+
measurements.SpectralFlux,
680+
)
681+
682+
// 4. Release: based on flux, ZCR, and noise character
683+
config.GateRelease = calculateGateRelease(
684+
measurements.SpectralFlux,
685+
measurements.ZeroCrossingsRate,
686+
tonalNoise,
687+
)
688+
689+
// 5. Range: based on silence entropy and noise floor
690+
config.GateRange = calculateGateRange(
691+
silenceEntropy,
692+
measurements.NoiseFloor,
693+
)
694+
695+
// 6. Knee: based on spectral crest
696+
config.GateKnee = calculateGateKnee(measurements.SpectralCrest)
697+
698+
// 7. Detection: RMS for bleed, peak for clean
699+
config.GateDetection = calculateGateDetection(silenceEntropy, silenceCrest)
700+
701+
// 8. Makeup: 1.0 (loudness normalisation handles it)
702+
config.GateMakeup = 1.0
703+
}
704+
705+
// calculateGateThreshold determines the gate threshold based on noise characteristics.
706+
// When silence has high crest factor (transient spikes), use peak as reference.
707+
// Otherwise use noise floor. Add headroom based on noise severity.
708+
func calculateGateThreshold(noiseFloorDB, silencePeakDB, silenceCrestDB float64) float64 {
709+
var referenceDB float64
710+
711+
// Determine reference level based on crest factor
712+
if silenceCrestDB > gateCrestFactorThreshold && silencePeakDB != 0 {
713+
// Noise has transients (e.g., bleed) - use peak as reference
714+
referenceDB = silencePeakDB
600715
} else {
601-
// Fallback if SuggestedGateThreshold not available (shouldn't happen)
602-
// Use a conservative threshold: noise floor + 6dB
603-
gateThresholdDB := measurements.NoiseFloor + 6.0
604-
config.GateThreshold = dbToLinear(gateThresholdDB)
716+
// Stable noise - use floor
717+
referenceDB = noiseFloorDB
718+
}
719+
720+
// Determine headroom based on reference level (higher = more noisy = more headroom)
721+
var headroomDB float64
722+
switch {
723+
case referenceDB < -70:
724+
// Very clean - tight threshold safe
725+
headroomDB = gateHeadroomClean
726+
case referenceDB < -50:
727+
// Moderate - standard headroom
728+
headroomDB = gateHeadroomModerate
729+
default:
730+
// Noisy - generous headroom to avoid cutting quiet speech
731+
headroomDB = gateHeadroomNoisy
732+
}
733+
734+
thresholdDB := referenceDB + headroomDB
735+
736+
// Safety limits
737+
thresholdDB = clamp(thresholdDB, gateThresholdMinDB, gateThresholdMaxDB)
738+
739+
return dbToLinear(thresholdDB)
740+
}
741+
742+
// calculateGateRatio determines ratio based on LRA (loudness range).
743+
// Wide dynamics = gentle ratio to preserve expression.
744+
func calculateGateRatio(lra float64) float64 {
745+
switch {
746+
case lra > gateLRAWide:
747+
return gateRatioGentle // Wide dynamics - preserve expression
748+
case lra > gateLRAModerate:
749+
return gateRatioMod // Moderate dynamics
750+
default:
751+
return gateRatioTight // Narrow dynamics - tighter control OK
752+
}
753+
}
754+
755+
// calculateGateAttack determines attack time based on transient characteristics.
756+
// Fast transients need fast attack to avoid clipping word onsets.
757+
// MaxDifference is expressed as a fraction (0.0-1.0), convert to percentage.
758+
func calculateGateAttack(maxDiff, spectralFlux float64) float64 {
759+
// MaxDifference is 0.0-1.0 fraction, convert to percentage for comparison
760+
maxDiffPercent := maxDiff * 100.0
761+
762+
var baseAttack float64
763+
switch {
764+
case maxDiffPercent > gateMaxDiffHigh:
765+
baseAttack = gateAttackFast // Sharp transients - fast opening
766+
case maxDiffPercent > gateMaxDiffMod:
767+
baseAttack = gateAttackMod // Standard speech
768+
default:
769+
baseAttack = gateAttackSlow // Soft onsets - gentler OK
770+
}
771+
772+
// Bias faster for dynamic content
773+
if spectralFlux > gateFluxDynamicThres {
774+
baseAttack *= 0.8
775+
}
776+
777+
return clamp(baseAttack, 5.0, 25.0)
778+
}
779+
780+
// calculateGateRelease determines release time based on content and noise character.
781+
// Compensates for lack of hold parameter by extending release.
782+
// Tonal bleed needs slower release to hide the pumping artifact.
783+
func calculateGateRelease(spectralFlux, zcr float64, tonalNoise bool) float64 {
784+
var baseRelease float64
785+
786+
switch {
787+
case spectralFlux < gateFluxLow && zcr < gateZCRLow:
788+
// Sustained speech with low activity
789+
baseRelease = gateReleaseSustained
790+
case spectralFlux > gateFluxHigh:
791+
// Dynamic content - more responsive
792+
baseRelease = gateReleaseDynamic
793+
default:
794+
baseRelease = gateReleaseMod
795+
}
796+
797+
// Compensate for lack of hold parameter
798+
baseRelease += gateReleaseHoldComp
799+
800+
// Tonal bleed needs slower release to hide pumping
801+
if tonalNoise {
802+
baseRelease += gateReleaseTonalComp
605803
}
606804

607-
// Safety limits for extreme cases
608-
minThresholdLinear := dbToLinear(gateThresholdMinDB)
609-
maxThresholdLinear := dbToLinear(gateThresholdMaxDB)
805+
return clamp(baseRelease, float64(gateReleaseMin), float64(gateReleaseMax))
806+
}
807+
808+
// calculateGateRange determines maximum attenuation depth based on noise character.
809+
// Tonal noise (bleed, hum) sounds worse when hard-gated - use gentler range.
810+
// Broadband noise can be gated more aggressively.
811+
func calculateGateRange(silenceEntropy, noiseFloorDB float64) float64 {
812+
var rangeDB float64
813+
814+
switch {
815+
case silenceEntropy < gateEntropyTonal:
816+
rangeDB = gateRangeTonalDB // Tonal - gentle
817+
case silenceEntropy < gateEntropyMixed:
818+
rangeDB = gateRangeMixedDB // Mixed - moderate
819+
default:
820+
rangeDB = gateRangeBroadbandDB // Broadband - aggressive
821+
}
822+
823+
// Can go deeper if very clean recording
824+
if noiseFloorDB < -70 {
825+
rangeDB += gateRangeCleanBoost // More negative = deeper
826+
}
610827

611-
config.GateThreshold = clamp(config.GateThreshold, minThresholdLinear, maxThresholdLinear)
828+
rangeDB = clamp(rangeDB, float64(gateRangeMinDB), float64(gateRangeMaxDB))
829+
830+
return dbToLinear(rangeDB)
831+
}
832+
833+
// calculateGateKnee determines knee softness based on spectral crest.
834+
// Dynamic content with prominent peaks benefits from softer knee.
835+
func calculateGateKnee(spectralCrest float64) float64 {
836+
switch {
837+
case spectralCrest > gateSpectralCrestHigh:
838+
return gateKneeSoft // Dynamic - soft engagement
839+
case spectralCrest > gateSpectralCrestMod:
840+
return gateKneeMod // Standard
841+
default:
842+
return gateKneeSharp // Less dynamic - sharper OK
843+
}
844+
}
845+
846+
// calculateGateDetection determines whether to use RMS or peak detection.
847+
// RMS is safer for speech and handles tonal bleed better.
848+
// Peak provides tighter tracking for very clean recordings.
849+
func calculateGateDetection(silenceEntropy, silenceCrestDB float64) string {
850+
// Tonal noise or high crest in silence - use RMS
851+
if silenceEntropy < gateEntropyTonal || silenceCrestDB > gateSilenceCrestThreshold {
852+
return "rms"
853+
}
854+
855+
// Very clean with low crest - can use peak for tighter tracking
856+
if silenceEntropy > gateEntropyClean && silenceCrestDB < 15 {
857+
return "peak"
858+
}
859+
860+
// Default: RMS is safer for speech
861+
return "rms"
862+
}
863+
864+
// tuneGateThreshold is deprecated - use tuneGate instead.
865+
// Kept for backwards compatibility during transition.
866+
func tuneGateThreshold(config *FilterChainConfig, measurements *AudioMeasurements) {
867+
tuneGate(config, measurements)
612868
}
613869

614870
// tuneCompression adapts dynamics processing based on:

0 commit comments

Comments
 (0)