@@ -716,57 +716,89 @@ func tuneDC1Declick(config *FilterChainConfig, measurements *AudioMeasurements)
716716 config .DC1DeclickReason += "; +threshold (compressed)"
717717 }
718718
719+ // Prefer speech-specific centroid for window sizing
720+ centroid := measurements .SpectralCentroid
721+ if measurements .SpeechProfile != nil {
722+ centroid = preferSpeechMetric (centroid , measurements .SpeechProfile .SpectralCentroid )
723+ }
724+
719725 // Window adaptation based on content type
720726 switch {
721- case measurements . SpectralCentroid > dc1CentroidFast :
727+ case centroid > dc1CentroidFast :
722728 // Fast speech/plosives - shorter window preserves transients
723729 config .DC1DeclickWindow = dc1WindowShort
724- case measurements . SpectralCentroid < dc1CentroidSlow :
730+ case centroid < dc1CentroidSlow :
725731 // Bass-heavy content - longer window for better LF reconstruction
726732 config .DC1DeclickWindow = dc1WindowLong
727733 default :
728734 config .DC1DeclickWindow = dc1WindowDefault
729735 }
730736}
731737
732- // tuneNoiseRemove configures NoiseRemove compand parameters for residual suppression.
733- // The anlmdn parameters (strength, patch, research, smooth) are kept constant from spike validation.
734- //
735- // POST-ANLMDN COMPAND STRATEGY (2024-12-24):
736- // Since anlmdn now handles the heavy noise reduction (achieving "digital black" for clean
737- // sources and 37+ dB reduction for noisy sources), compand's role is now:
738- // - Residual noise suppression in silence regions
739- // - Breath noise attenuation between speech
740- // - NOT primary noise reduction
738+ // tuneNoiseRemove adjusts compand parameters based on measured noise floor.
739+ // Uses silence region measurements for accurate noise characterisation.
741740//
742- // Spike testing (mcompand-spike.sh at 1500s) validated these settings:
743- // - Fixed 6 dB expansion: provides ~5-6 dB breath/residual attenuation
744- // - Fixed -55 dB threshold: catches breaths without affecting speech
745- // - Single-band compand: more transparent than mcompand (0% spectral change )
741+ // The anlmdn parameters (strength, patch, research, smooth) are kept constant from spike validation.
742+ // Compand parameters adapt to the measured noise floor:
743+ // - Threshold: 5dB above noise floor ( catches breaths but not speech)
744+ // - Expansion: scales with noise severity (gentle for clean, aggressive for noisy )
746745//
747746// anlmdn remains constant because spike testing validated these parameters:
748747// - strength: 0.00001 (minimum)
749748// - patch: 6ms (context window)
750749// - research: 5.8ms (search window)
751750// - smooth: 11 (weight smoothing)
752- func tuneNoiseRemove (config * FilterChainConfig , measurements * AudioMeasurements ) {
751+ func tuneNoiseRemove (config * FilterChainConfig , m * AudioMeasurements ) {
753752 if ! config .NoiseRemoveEnabled {
754753 return
755754 }
756755
757- // Fixed compand parameters validated in spike testing (mcompand-spike.sh )
758- // These are intentionally NOT adaptive — anlmdn handles the adaptive part
759- const (
760- compandThreshold = - 55.0 // dB - catches breaths without affecting speech
761- compandExpansion = 6.0 // dB - gentle push for residual/breath attenuation
762- )
756+ // Default values (fallback if no noise profile )
757+ threshold := - 55.0
758+ expansion := 6.0
759+
760+ if m . NoiseProfile != nil && m . NoiseProfile . MeasuredNoiseFloor < 0 {
761+ noiseFloor := m . NoiseProfile . MeasuredNoiseFloor
763762
764- config .NoiseRemoveCompandThreshold = compandThreshold
765- config .NoiseRemoveCompandExpansion = compandExpansion
763+ // Threshold: 5dB above noise floor (catches breaths but not speech)
764+ threshold = noiseFloor + 5.0
765+ // Clamp to reasonable range
766+ threshold = clamp (threshold , - 70.0 , - 40.0 )
767+
768+ // Expansion: scale with noise severity
769+ expansion = scaleExpansion (noiseFloor )
770+ }
771+
772+ config .NoiseRemoveCompandThreshold = threshold
773+ config .NoiseRemoveCompandExpansion = expansion
766774
767775 // attack, decay, knee stay constant (validated in spike testing)
768776}
769777
778+ // preferSpeechMetric returns speech-specific measurement if available,
779+ // otherwise falls back to full-file measurement.
780+ func preferSpeechMetric (fullFile , speechProfile float64 ) float64 {
781+ if speechProfile > 0 {
782+ return speechProfile
783+ }
784+ return fullFile
785+ }
786+
787+ // scaleExpansion returns expansion depth based on noise severity.
788+ // Noisier recordings need more aggressive expansion to suppress residuals.
789+ func scaleExpansion (noiseFloor float64 ) float64 {
790+ switch {
791+ case noiseFloor > - 45.0 :
792+ return 12.0 // Very noisy - aggressive
793+ case noiseFloor > - 55.0 :
794+ return 8.0 // Moderate noise
795+ case noiseFloor > - 65.0 :
796+ return 6.0 // Typical
797+ default :
798+ return 4.0 // Very clean - gentle
799+ }
800+ }
801+
770802// tuneDeesser adapts de-esser intensity based on spectral analysis.
771803// Uses both spectral centroid (energy concentration) and rolloff (HF extension)
772804// to detect likelihood of harsh sibilance.
@@ -793,31 +825,39 @@ func tuneDeesser(config *FilterChainConfig, measurements *AudioMeasurements) {
793825
794826// tuneDeesserFull uses both centroid and rolloff for precise de-esser tuning
795827func tuneDeesserFull (config * FilterChainConfig , measurements * AudioMeasurements ) {
828+ // Prefer speech-specific measurements for sibilance detection
829+ centroid := measurements .SpectralCentroid
830+ rolloff := measurements .SpectralRolloff
831+ if measurements .SpeechProfile != nil {
832+ centroid = preferSpeechMetric (centroid , measurements .SpeechProfile .SpectralCentroid )
833+ rolloff = preferSpeechMetric (rolloff , measurements .SpeechProfile .SpectralRolloff )
834+ }
835+
796836 // Determine baseline intensity from centroid
797837 var baseIntensity float64
798838 switch {
799- case measurements . SpectralCentroid > centroidVeryBright :
839+ case centroid > centroidVeryBright :
800840 baseIntensity = deessIntensityBright // Bright voice
801- case measurements . SpectralCentroid > centroidBright :
841+ case centroid > centroidBright :
802842 baseIntensity = deessIntensityNormal // Normal voice
803843 default :
804844 baseIntensity = deessIntensityDark // Dark voice
805845 }
806846
807847 // Refine based on spectral rolloff (HF extension)
808848 switch {
809- case measurements . SpectralRolloff < rolloffNoSibilance :
849+ case rolloff < rolloffNoSibilance :
810850 // Very limited HF content - no sibilance expected
811851 config .DeessIntensity = 0.0
812852
813- case measurements . SpectralRolloff < rolloffLimited :
853+ case rolloff < rolloffLimited :
814854 // Limited HF extension - reduce intensity
815855 config .DeessIntensity = baseIntensity * 0.7
816856 if config .DeessIntensity < deessIntensityMin {
817857 config .DeessIntensity = 0.0 // Skip if too low
818858 }
819859
820- case measurements . SpectralRolloff > rolloffExtensive :
860+ case rolloff > rolloffExtensive :
821861 // Extensive HF content - likely sibilance
822862 config .DeessIntensity = math .Min (baseIntensity * 1.2 , deessIntensityMax )
823863
@@ -1216,6 +1256,12 @@ func tuneLA2AAttack(config *FilterChainConfig, measurements *AudioMeasurements)
12161256// - Narrow LRA + low flux = compressed/monotone, faster release OK
12171257// - Warm voices (high skewness) get extra release to preserve body
12181258func tuneLA2ARelease (config * FilterChainConfig , measurements * AudioMeasurements ) {
1259+ // Prefer speech-specific flux for timing decisions
1260+ flux := measurements .SpectralFlux
1261+ if measurements .SpeechProfile != nil {
1262+ flux = preferSpeechMetric (flux , measurements .SpeechProfile .SpectralFlux )
1263+ }
1264+
12191265 // Start with standard LA-2A-style release
12201266 release := la2aReleaseStandard
12211267
@@ -1230,12 +1276,12 @@ func tuneLA2ARelease(config *FilterChainConfig, measurements *AudioMeasurements)
12301276 }
12311277
12321278 // Adjust based on spectral flux (frame-to-frame variation)
1233- if measurements . SpectralFlux > 0 {
1279+ if flux > 0 {
12341280 switch {
1235- case measurements . SpectralFlux > la2aFluxDynamic :
1281+ case flux > la2aFluxDynamic :
12361282 // Dynamic/expressive content - add release time
12371283 release = math .Max (release , la2aReleaseExpressive )
1238- case measurements . SpectralFlux < la2aFluxStatic :
1284+ case flux < la2aFluxStatic :
12391285 // Static/monotone content - can use shorter release
12401286 release = math .Min (release , la2aReleaseCompact )
12411287 }
@@ -1265,16 +1311,22 @@ func tuneLA2ARelease(config *FilterChainConfig, measurements *AudioMeasurements)
12651311// - Peaked/tonal content (high kurtosis) = gentler ratio, preserve character
12661312// - Flat/noise-like content (low kurtosis) = firmer ratio, more levelling
12671313func tuneLA2ARatio (config * FilterChainConfig , measurements * AudioMeasurements ) {
1314+ // Prefer speech-specific kurtosis for harmonic structure
1315+ kurtosis := measurements .SpectralKurtosis
1316+ if measurements .SpeechProfile != nil {
1317+ kurtosis = preferSpeechMetric (kurtosis , measurements .SpeechProfile .SpectralKurtosis )
1318+ }
1319+
12681320 // Start with LA-2A baseline ratio
12691321 ratio := la2aRatioBase
12701322
12711323 // Adjust based on spectral kurtosis (peakedness)
1272- if measurements . SpectralKurtosis > 0 {
1324+ if kurtosis > 0 {
12731325 switch {
1274- case measurements . SpectralKurtosis > la2aKurtosisHighPeak :
1326+ case kurtosis > la2aKurtosisHighPeak :
12751327 // Highly peaked harmonics - gentler ratio preserves character
12761328 ratio = la2aRatioPeaked
1277- case measurements . SpectralKurtosis < la2aKurtosisLowPeak :
1329+ case kurtosis < la2aKurtosisLowPeak :
12781330 // Flat spectrum - firmer ratio for consistent levelling
12791331 ratio = la2aRatioFlat
12801332 }
0 commit comments