Skip to content

Commit 25ba905

Browse files
committed
refactor(processor): extract silence detection magic numbers to constants
- Add roomToneAmplitudeDecayDB, roomToneAmplitudeWeight, roomToneFluxWeight - Add silenceThresholdMinIntervals, silenceSearchPercent - Add roomToneCandidatePercent, roomToneCandidateMinCount - Add silenceThresholdHeadroomDB, interruptionToleranceIntervals - Add roomToneScoreThreshold - Update roomToneScore, calculateSilenceThresholdFromIntervals, findSilenceCandidatesFromIntervals to use named constants - Remove redundant local const declarations
1 parent 4fc8701 commit 25ba905

1 file changed

Lines changed: 50 additions & 27 deletions

File tree

internal/processor/analyzer.go

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,39 @@ const (
332332
// excludeFirstSeconds: ignore candidates starting in this initial period
333333
// (typically contains preamble before intentional room tone recording)
334334
excludeFirstSeconds = 15.0
335+
336+
// roomToneAmplitudeDecayDB is the dB range above median where amplitude score decays from 1.0 to 0.0.
337+
// 6dB above median = score of 0.0.
338+
roomToneAmplitudeDecayDB = 6.0
339+
340+
// roomToneAmplitudeWeight is the weighting factor for amplitude in room tone scoring.
341+
// Amplitude is weighted more heavily (0.6) since it's the primary discriminator.
342+
roomToneAmplitudeWeight = 0.6
343+
344+
// roomToneFluxWeight is the weighting factor for spectral flux in room tone scoring.
345+
roomToneFluxWeight = 0.4
346+
347+
// silenceThresholdMinIntervals is the minimum number of intervals required for threshold calculation.
348+
silenceThresholdMinIntervals = 10
349+
350+
// silenceSearchPercent is the percentage of recording to search for silence candidates (15%).
351+
silenceSearchPercent = 15
352+
353+
// roomToneCandidatePercent is the percentage of top-scored intervals to use as room tone candidates (20%).
354+
roomToneCandidatePercent = 5 // divisor: len/5 = 20%
355+
356+
// roomToneCandidateMinCount is the minimum number of room tone candidate intervals.
357+
roomToneCandidateMinCount = 8
358+
359+
// silenceThresholdHeadroomDB is additional dB added to the detected room tone level for headroom.
360+
silenceThresholdHeadroomDB = 1.0
361+
362+
// interruptionToleranceIntervals is the number of consecutive non-silent intervals allowed
363+
// within a silence region without breaking it. 3 intervals = 750ms tolerance.
364+
interruptionToleranceIntervals = 3
365+
366+
// roomToneScoreThreshold is the minimum score (0-1) for an interval to be considered room tone.
367+
roomToneScoreThreshold = 0.5
335368
)
336369

337370
// roomToneScore calculates a 0-1 score indicating how likely an interval is room tone.
@@ -344,11 +377,10 @@ const (
344377
func roomToneScore(interval IntervalSample, rmsP50, fluxP50 float64) float64 {
345378
// Amplitude component: quieter = more likely room tone
346379
// Score 1.0 if at or below median, decreasing above
347-
// Use a soft threshold: 6dB above median still gets partial credit
348380
amplitudeScore := 1.0
349381
if interval.RMSLevel > rmsP50 {
350-
// Linear decay: 0dB above = 1.0, 6dB above = 0.0
351-
amplitudeScore = 1.0 - (interval.RMSLevel-rmsP50)/6.0
382+
// Linear decay: 0dB above = 1.0, roomToneAmplitudeDecayDB above = 0.0
383+
amplitudeScore = 1.0 - (interval.RMSLevel-rmsP50)/roomToneAmplitudeDecayDB
352384
if amplitudeScore < 0 {
353385
amplitudeScore = 0
354386
}
@@ -367,8 +399,7 @@ func roomToneScore(interval IntervalSample, rmsP50, fluxP50 float64) float64 {
367399
}
368400

369401
// Combine scores: both must be reasonable for a good room tone score
370-
// Weight amplitude more heavily since it's the primary discriminator
371-
return 0.6*amplitudeScore + 0.4*fluxScore
402+
return roomToneAmplitudeWeight*amplitudeScore + roomToneFluxWeight*fluxScore
372403
}
373404

374405
// calculateSilenceThresholdFromIntervals derives the silence threshold from interval data.
@@ -381,14 +412,14 @@ func roomToneScore(interval IntervalSample, rmsP50, fluxP50 float64) float64 {
381412
//
382413
// We compute a "room tone score" for each interval and use that to find the threshold.
383414
func calculateSilenceThresholdFromIntervals(intervals []IntervalSample, fallbackThreshold float64) float64 {
384-
if len(intervals) < 10 {
415+
if len(intervals) < silenceThresholdMinIntervals {
385416
return fallbackThreshold
386417
}
387418

388-
// Only use the first 15% of intervals for threshold calculation
389-
searchLimit := len(intervals) * 15 / 100
390-
if searchLimit < 10 {
391-
searchLimit = 10
419+
// Only use the first silenceSearchPercent% of intervals for threshold calculation
420+
searchLimit := len(intervals) * silenceSearchPercent / 100
421+
if searchLimit < silenceThresholdMinIntervals {
422+
searchLimit = silenceThresholdMinIntervals
392423
}
393424
searchIntervals := intervals[:searchLimit]
394425

@@ -426,25 +457,25 @@ func calculateSilenceThresholdFromIntervals(intervals []IntervalSample, fallback
426457
})
427458

428459
// Take the top 20% of scored intervals as room tone candidates
429-
// (or at least 8 intervals for statistical relevance)
430-
candidateCount := len(scored) / 5
431-
if candidateCount < 8 {
432-
candidateCount = 8
460+
// (or at least roomToneCandidateMinCount intervals for statistical relevance)
461+
candidateCount := len(scored) / roomToneCandidatePercent
462+
if candidateCount < roomToneCandidateMinCount {
463+
candidateCount = roomToneCandidateMinCount
433464
}
434465
if candidateCount > len(scored) {
435466
candidateCount = len(scored)
436467
}
437468

438469
// Threshold is the maximum RMS among high-confidence room tone intervals
439-
// Add small headroom (1dB) to catch edge cases
470+
// Add small headroom to catch edge cases
440471
maxRoomToneRMS := -120.0
441472
for i := 0; i < candidateCount; i++ {
442473
if scored[i].rms > maxRoomToneRMS {
443474
maxRoomToneRMS = scored[i].rms
444475
}
445476
}
446477

447-
return maxRoomToneRMS + 1.0
478+
return maxRoomToneRMS + silenceThresholdHeadroomDB
448479
}
449480

450481
// findSilenceCandidatesFromIntervals identifies silence regions from interval samples.
@@ -464,8 +495,8 @@ func findSilenceCandidatesFromIntervals(intervals []IntervalSample, threshold fl
464495
return nil
465496
}
466497

467-
// Only search the first 15% of the recording
468-
searchLimit := len(intervals) * 15 / 100
498+
// Only search the first silenceSearchPercent% of the recording
499+
searchLimit := len(intervals) * silenceSearchPercent / 100
469500
if searchLimit < minimumSilenceIntervals {
470501
searchLimit = minimumSilenceIntervals
471502
}
@@ -484,14 +515,6 @@ func findSilenceCandidatesFromIntervals(intervals []IntervalSample, threshold fl
484515
rmsP50 := rmsLevels[len(rmsLevels)/2]
485516
fluxP50 := fluxValues[len(fluxValues)/2]
486517

487-
// Tolerance: allow up to N consecutive intervals below score threshold without breaking
488-
// This handles brief transitional moments in otherwise quiet regions
489-
// 3 intervals = 750ms of tolerance
490-
const interruptionTolerance = 3
491-
492-
// Room tone score threshold - intervals scoring above this are considered room tone
493-
const roomToneScoreThreshold = 0.5
494-
495518
var candidates []SilenceRegion
496519
var silenceStart time.Duration
497520
var silentIntervalCount int
@@ -522,7 +545,7 @@ func findSilenceCandidatesFromIntervals(intervals []IntervalSample, threshold fl
522545
// Not room tone - count as interruption
523546
interruptionCount++
524547

525-
if interruptionCount > interruptionTolerance {
548+
if interruptionCount > interruptionToleranceIntervals {
526549
// Too many consecutive interruptions - end silence region
527550
// Calculate end time from last silent interval (before interruptions started)
528551
lastSilentIdx := i - interruptionCount

0 commit comments

Comments
 (0)