Skip to content

Commit afb4385

Browse files
committed
feat(report): enhance report formatting to show input-output comparisons
- Add helper functions to format measurement comparisons with tolerance - Replace separate "Changes from Input" section with inline comparisons - Improve noise character reporting to show meaningful changes - Ensure consistent analysis by downmixing to mono in both passes - Make silence sample output show unchanged values without deltas - Update true peak test threshold to allow for inter-sample peaks
1 parent f4d7360 commit afb4385

4 files changed

Lines changed: 110 additions & 38 deletions

File tree

internal/logging/report.go

Lines changed: 92 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,27 @@ func linearToDb(linear float64) float64 {
2121
return 20.0 * math.Log10(linear)
2222
}
2323

24+
// formatComparison returns "(unchanged)" if values match within tolerance, otherwise "(was X unit)"
25+
func formatComparison(output, input float64, unit string, decimals int) string {
26+
// Use tolerance based on decimal places shown
27+
tolerance := math.Pow(10, -float64(decimals)) * 0.5
28+
if math.Abs(output-input) < tolerance {
29+
return "(unchanged)"
30+
}
31+
format := fmt.Sprintf("(was %%.%df %s)", decimals, unit)
32+
return fmt.Sprintf(format, input)
33+
}
34+
35+
// formatComparisonNoUnit returns "(unchanged)" if values match within tolerance, otherwise "(was X)"
36+
func formatComparisonNoUnit(output, input float64, decimals int) string {
37+
tolerance := math.Pow(10, -float64(decimals)) * 0.5
38+
if math.Abs(output-input) < tolerance {
39+
return "(unchanged)"
40+
}
41+
format := fmt.Sprintf("(was %%.%df)", decimals)
42+
return fmt.Sprintf(format, input)
43+
}
44+
2445
// ReportData contains all the information needed to generate an analysis report
2546
type ReportData struct {
2647
InputPath string
@@ -104,7 +125,7 @@ func GenerateReport(data ReportData) error {
104125
} else if m.NoiseProfile.Entropy < 0.9 {
105126
noiseType = "mixed"
106127
}
107-
fmt.Fprintf(f, " Entropy: %.3f (%s)\n", m.NoiseProfile.Entropy, noiseType)
128+
fmt.Fprintf(f, " Noise Character: %s (entropy %.3f)\n", noiseType, m.NoiseProfile.Entropy)
108129
}
109130
if m.NoiseProfile.ExtractionWarning != "" {
110131
fmt.Fprintf(f, " Warning: %s\n", m.NoiseProfile.ExtractionWarning)
@@ -134,33 +155,73 @@ func GenerateReport(data ReportData) error {
134155

135156
if data.Result.OutputMeasurements != nil {
136157
om := data.Result.OutputMeasurements
137-
fmt.Fprintf(f, "Integrated Loudness: %.1f LUFS\n", om.OutputI)
138-
fmt.Fprintf(f, "True Peak: %.1f dBTP\n", om.OutputTP)
139-
fmt.Fprintf(f, "Loudness Range: %.1f LU\n", om.OutputLRA)
140-
fmt.Fprintf(f, "Dynamic Range: %.1f dB\n", om.DynamicRange)
141-
fmt.Fprintf(f, "RMS Level: %.1f dBFS\n", om.RMSLevel)
142-
fmt.Fprintf(f, "Peak Level: %.1f dBFS\n", om.PeakLevel)
143-
fmt.Fprintf(f, "Spectral Centroid: %.0f Hz\n", om.SpectralCentroid)
144-
fmt.Fprintf(f, "Spectral Rolloff: %.0f Hz\n", om.SpectralRolloff)
158+
m := data.Result.Measurements // Input measurements for comparison
159+
160+
if m != nil {
161+
fmt.Fprintf(f, "Integrated Loudness: %.1f LUFS %s\n", om.OutputI, formatComparison(om.OutputI, m.InputI, "LUFS", 1))
162+
fmt.Fprintf(f, "True Peak: %.1f dBTP %s\n", om.OutputTP, formatComparison(om.OutputTP, m.InputTP, "dBTP", 1))
163+
fmt.Fprintf(f, "Loudness Range: %.1f LU %s\n", om.OutputLRA, formatComparison(om.OutputLRA, m.InputLRA, "LU", 1))
164+
fmt.Fprintf(f, "Dynamic Range: %.1f dB %s\n", om.DynamicRange, formatComparison(om.DynamicRange, m.DynamicRange, "dB", 1))
165+
fmt.Fprintf(f, "RMS Level: %.1f dBFS %s\n", om.RMSLevel, formatComparison(om.RMSLevel, m.RMSLevel, "dBFS", 1))
166+
fmt.Fprintf(f, "Peak Level: %.1f dBFS %s\n", om.PeakLevel, formatComparison(om.PeakLevel, m.PeakLevel, "dBFS", 1))
167+
fmt.Fprintf(f, "Spectral Centroid: %.0f Hz %s\n", om.SpectralCentroid, formatComparison(om.SpectralCentroid, m.SpectralCentroid, "Hz", 0))
168+
fmt.Fprintf(f, "Spectral Rolloff: %.0f Hz %s\n", om.SpectralRolloff, formatComparison(om.SpectralRolloff, m.SpectralRolloff, "Hz", 0))
169+
} else {
170+
fmt.Fprintf(f, "Integrated Loudness: %.1f LUFS\n", om.OutputI)
171+
fmt.Fprintf(f, "True Peak: %.1f dBTP\n", om.OutputTP)
172+
fmt.Fprintf(f, "Loudness Range: %.1f LU\n", om.OutputLRA)
173+
fmt.Fprintf(f, "Dynamic Range: %.1f dB\n", om.DynamicRange)
174+
fmt.Fprintf(f, "RMS Level: %.1f dBFS\n", om.RMSLevel)
175+
fmt.Fprintf(f, "Peak Level: %.1f dBFS\n", om.PeakLevel)
176+
fmt.Fprintf(f, "Spectral Centroid: %.0f Hz\n", om.SpectralCentroid)
177+
fmt.Fprintf(f, "Spectral Rolloff: %.0f Hz\n", om.SpectralRolloff)
178+
}
145179
if om.ZeroCrossingsRate > 0 {
146-
fmt.Fprintf(f, "Zero Crossings Rate: %.4f\n", om.ZeroCrossingsRate)
180+
if m != nil && m.ZeroCrossingsRate > 0 {
181+
fmt.Fprintf(f, "Zero Crossings Rate: %.4f %s\n", om.ZeroCrossingsRate, formatComparisonNoUnit(om.ZeroCrossingsRate, m.ZeroCrossingsRate, 4))
182+
} else {
183+
fmt.Fprintf(f, "Zero Crossings Rate: %.4f\n", om.ZeroCrossingsRate)
184+
}
147185
}
148186
if om.MaxDifference > 0 {
149187
maxDiffPercent := (om.MaxDifference / 32768.0) * 100.0
150-
fmt.Fprintf(f, "Max Difference: %.1f%% FS (transient indicator)\n", maxDiffPercent)
188+
if m != nil && m.MaxDifference > 0 {
189+
inputMaxDiffPercent := (m.MaxDifference / 32768.0) * 100.0
190+
fmt.Fprintf(f, "Max Difference: %.1f%% FS %s\n", maxDiffPercent, formatComparison(maxDiffPercent, inputMaxDiffPercent, "% FS", 1))
191+
} else {
192+
fmt.Fprintf(f, "Max Difference: %.1f%% FS (transient indicator)\n", maxDiffPercent)
193+
}
151194
}
152195

153196
// Show silence sample comparison (same region as Pass 1)
154197
if om.SilenceSample != nil && data.Result.Measurements != nil && data.Result.Measurements.NoiseProfile != nil {
155198
ss := om.SilenceSample
156199
np := data.Result.Measurements.NoiseProfile
157200
fmt.Fprintf(f, "Silence Sample: %.1fs at %.1fs\n", ss.Duration.Seconds(), ss.Start.Seconds())
158-
fmt.Fprintf(f, " Noise Floor: %.1f dBFS (was %.1f dBFS, %+.1f dB)\n",
159-
ss.NoiseFloor, np.MeasuredNoiseFloor, ss.NoiseFloor-np.MeasuredNoiseFloor)
160-
fmt.Fprintf(f, " Peak Level: %.1f dBFS (was %.1f dBFS, %+.1f dB)\n",
161-
ss.PeakLevel, np.PeakLevel, ss.PeakLevel-np.PeakLevel)
162-
fmt.Fprintf(f, " Crest Factor: %.1f dB (was %.1f dB)\n",
163-
ss.CrestFactor, np.CrestFactor)
201+
202+
// Noise Floor with delta if changed
203+
if math.Abs(ss.NoiseFloor-np.MeasuredNoiseFloor) < 0.05 {
204+
fmt.Fprintf(f, " Noise Floor: %.1f dBFS (unchanged)\n", ss.NoiseFloor)
205+
} else {
206+
fmt.Fprintf(f, " Noise Floor: %.1f dBFS (was %.1f dBFS, %+.1f dB)\n",
207+
ss.NoiseFloor, np.MeasuredNoiseFloor, ss.NoiseFloor-np.MeasuredNoiseFloor)
208+
}
209+
210+
// Peak Level with delta if changed
211+
if math.Abs(ss.PeakLevel-np.PeakLevel) < 0.05 {
212+
fmt.Fprintf(f, " Peak Level: %.1f dBFS (unchanged)\n", ss.PeakLevel)
213+
} else {
214+
fmt.Fprintf(f, " Peak Level: %.1f dBFS (was %.1f dBFS, %+.1f dB)\n",
215+
ss.PeakLevel, np.PeakLevel, ss.PeakLevel-np.PeakLevel)
216+
}
217+
218+
// Crest Factor
219+
if math.Abs(ss.CrestFactor-np.CrestFactor) < 0.05 {
220+
fmt.Fprintf(f, " Crest Factor: %.1f dB (unchanged)\n", ss.CrestFactor)
221+
} else {
222+
fmt.Fprintf(f, " Crest Factor: %.1f dB %s\n", ss.CrestFactor, formatComparison(ss.CrestFactor, np.CrestFactor, "dB", 1))
223+
}
224+
164225
if ss.Entropy > 0 {
165226
// Classify noise type based on entropy
166227
noiseType := "broadband (hiss)"
@@ -169,22 +230,22 @@ func GenerateReport(data ReportData) error {
169230
} else if ss.Entropy < 0.9 {
170231
noiseType = "mixed"
171232
}
172-
fmt.Fprintf(f, " Entropy: %.3f (%s)\n", ss.Entropy, noiseType)
233+
// Show with comparison to input
234+
inputNoiseType := "broadband (hiss)"
235+
if np.Entropy < 0.7 {
236+
inputNoiseType = "tonal (hum/buzz)"
237+
} else if np.Entropy < 0.9 {
238+
inputNoiseType = "mixed"
239+
}
240+
if noiseType == inputNoiseType && math.Abs(ss.Entropy-np.Entropy) < 0.0005 {
241+
fmt.Fprintf(f, " Noise Character: %s (unchanged)\n", noiseType)
242+
} else if noiseType == inputNoiseType {
243+
fmt.Fprintf(f, " Noise Character: %s (entropy %.3f, was %.3f)\n", noiseType, ss.Entropy, np.Entropy)
244+
} else {
245+
fmt.Fprintf(f, " Noise Character: %s (was %s)\n", noiseType, inputNoiseType)
246+
}
173247
}
174248
}
175-
176-
// Show deltas vs input for easy comparison
177-
if data.Result.Measurements != nil {
178-
m := data.Result.Measurements
179-
fmt.Fprintln(f, "")
180-
fmt.Fprintln(f, "Changes from Input:")
181-
fmt.Fprintf(f, " LUFS: %+.1f dB\n", om.OutputI-m.InputI)
182-
fmt.Fprintf(f, " True Peak: %+.1f dB\n", om.OutputTP-m.InputTP)
183-
fmt.Fprintf(f, " Loudness Range: %+.1f LU\n", om.OutputLRA-m.InputLRA)
184-
fmt.Fprintf(f, " Dynamic Range: %+.1f dB\n", om.DynamicRange-m.DynamicRange)
185-
fmt.Fprintf(f, " Spectral Centroid: %+.0f Hz\n", om.SpectralCentroid-m.SpectralCentroid)
186-
}
187-
188249
} else {
189250
fmt.Fprintln(f, "Note: Output measurements not available")
190251
}

internal/processor/analyzer.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -765,21 +765,24 @@ func createAnalysisFilterGraph(
765765
) (*ffmpeg.AVFilterGraph, *ffmpeg.AVFilterContext, *ffmpeg.AVFilterContext, error) {
766766
// Build filter string for analysis pass
767767
// Filter chain order:
768-
// 1. silencedetect - detect silence regions for noise profile extraction
768+
// 1. aformat - downmix to mono for consistent analysis with Pass 2
769+
// This ensures spectral measurements (centroid/rolloff) are identical between passes
770+
// since Pass 2 also measures mono audio after processing
771+
// 2. silencedetect - detect silence regions for noise profile extraction
769772
// - noise=-50dB: threshold for silence detection (fairly sensitive)
770773
// - duration=0.5: minimum silence duration to detect (0.5s catches most pauses)
771-
// 2. astats - provides noise floor, dynamic range, and additional measurements for adaptive processing:
774+
// 3. astats - provides noise floor, dynamic range, and additional measurements for adaptive processing:
772775
// - Noise_floor, Dynamic_range, RMS_level, Peak_level: core measurements
773776
// - DC_offset: detects DC bias needing removal
774777
// - Flat_factor: detects pre-existing clipping/limiting
775778
// - Zero_crossings_rate: helps classify noise type
776779
// - Max_difference: detects impulsive sounds (clicks/pops)
777-
// 3. aspectralstats - measures spectral centroid and rolloff for adaptive de-esser targeting
778-
// 4. ebur128 - provides integrated loudness (LUFS), true peak, and LRA via metadata
780+
// 4. aspectralstats - measures spectral centroid and rolloff for adaptive de-esser targeting
781+
// 5. ebur128 - provides integrated loudness (LUFS), true peak, and LRA via metadata
779782
// Note: reset=0 (default) allows astats to accumulate statistics across all frames for Overall measurements
780783
// ebur128 metadata=1 writes per-frame loudness data to frame metadata (lavfi.r128.* keys)
781784
// peak=true enables true peak measurement (required for lavfi.r128.true_peak metadata)
782-
filterSpec := fmt.Sprintf("silencedetect=noise=-50dB:duration=0.5,astats=metadata=1:measure_perchannel=Noise_floor+Dynamic_range+RMS_level+Peak_level+DC_offset+Flat_factor+Zero_crossings_rate+Max_difference,aspectralstats=win_size=2048:win_func=hann:measure=centroid+rolloff,ebur128=metadata=1:peak=true:target=%.0f",
785+
filterSpec := fmt.Sprintf("aformat=channel_layouts=mono,silencedetect=noise=-50dB:duration=0.5,astats=metadata=1:measure_perchannel=Noise_floor+Dynamic_range+RMS_level+Peak_level+DC_offset+Flat_factor+Zero_crossings_rate+Max_difference,aspectralstats=win_size=2048:win_func=hann:measure=centroid+rolloff,ebur128=metadata=1:peak=true:target=%.0f",
783786
targetI)
784787

785788
return setupFilterGraph(decCtx, filterSpec)

internal/processor/analyzer_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,9 @@ func TestAnalyzeAudio(t *testing.T) {
8888
t.Errorf("InputI out of reasonable range: %.2f", measurements.InputI)
8989
}
9090

91-
if measurements.InputTP > 0 || measurements.InputTP < -100 {
91+
// True peak can exceed 0 dBFS due to inter-sample peaks in hot recordings
92+
// Allow up to +3 dBTP which is typical for unprocessed podcast audio
93+
if measurements.InputTP > 3 || measurements.InputTP < -100 {
9294
t.Errorf("InputTP out of reasonable range: %.2f", measurements.InputTP)
9395
}
9496

internal/processor/filters.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -669,11 +669,13 @@ func (cfg *FilterChainConfig) buildOutputAnalysisFilters() string {
669669
return ""
670670
}
671671
// Same filter chain as Pass 1 analysis, minus silencedetect (not needed for output)
672+
// aformat: downmix to mono first for consistent analysis with Pass 1
673+
// This ensures spectral measurements (centroid/rolloff) are identical between passes
672674
// astats: provides noise floor, dynamic range, RMS, peak, DC offset, flat factor, zero crossings, max difference
673675
// aspectralstats: provides spectral centroid and rolloff
674676
// ebur128: provides integrated loudness (LUFS), true peak, and LRA
675677
// peak=true enables true peak measurement (required for lavfi.r128.true_peak metadata)
676-
return "astats=metadata=1:measure_perchannel=Noise_floor+Dynamic_range+RMS_level+Peak_level+DC_offset+Flat_factor+Zero_crossings_rate+Max_difference,aspectralstats=win_size=2048:win_func=hann:measure=centroid+rolloff,ebur128=metadata=1:peak=true:target=-16"
678+
return "aformat=channel_layouts=mono,astats=metadata=1:measure_perchannel=Noise_floor+Dynamic_range+RMS_level+Peak_level+DC_offset+Flat_factor+Zero_crossings_rate+Max_difference,aspectralstats=win_size=2048:win_func=hann:measure=centroid+rolloff,ebur128=metadata=1:peak=true:target=-16"
677679
}
678680

679681
// BuildFilterSpec builds the FFmpeg filter specification string for Pass 2 processing.
@@ -715,12 +717,16 @@ func (cfg *FilterChainConfig) BuildFilterSpec() string {
715717
// Add output analysis filters if enabled (for Pass 2 measurement comparison)
716718
// These MUST come BEFORE aformat/asetnsamples because ebur128 can change frame sizes
717719
// The filters write to frame metadata which is preserved through the filter chain
720+
// Note: Analysis filters include aformat=channel_layouts=mono to ensure consistent
721+
// spectral measurements with Pass 1 (which also downmixes to mono before analysis)
718722
if analysisFilters := cfg.buildOutputAnalysisFilters(); analysisFilters != "" {
719723
filters = append(filters, analysisFilters)
720724
}
721725

722726
// Add output format filter (always enabled, must be after ebur128 which outputs f64)
723727
// aformat: podcast-standard output (44.1kHz, mono, s16)
728+
// Note: channel_layouts=mono is redundant when OutputAnalysisEnabled (already mono from
729+
// analysis filters), but included for robustness when analysis is disabled
724730
filters = append(filters, "aformat=sample_rates=44100:channel_layouts=mono:sample_fmts=s16")
725731

726732
// asetnsamples: fixed frame size for FLAC encoder (must be last to ensure consistent frame sizes)

0 commit comments

Comments
 (0)