@@ -1506,6 +1506,78 @@ func (b *baseMetadataAccumulators) accumulateSpectral(spectral spectralMetrics)
15061506 b .spectralFrameCount ++
15071507}
15081508
1509+ // extractAstatsMetadata extracts all astats measurements from FFmpeg metadata.
1510+ // These are cumulative values, so we keep the latest from each frame.
1511+ // Includes conversions: linearRatioToDB for CrestFactor, linearSampleToDBFS for MinLevel/MaxLevel.
1512+ func (b * baseMetadataAccumulators ) extractAstatsMetadata (metadata * ffmpeg.AVDictionary ) {
1513+ if value , ok := getFloatMetadata (metadata , metaKeyDynamicRange ); ok {
1514+ b .astatsDynamicRange = value
1515+ b .astatsFound = true
1516+ }
1517+ if value , ok := getFloatMetadata (metadata , metaKeyRMSLevel ); ok {
1518+ b .astatsRMSLevel = value
1519+ }
1520+ if value , ok := getFloatMetadata (metadata , metaKeyPeakLevel ); ok {
1521+ b .astatsPeakLevel = value
1522+ }
1523+ if value , ok := getFloatMetadata (metadata , metaKeyRMSTrough ); ok {
1524+ b .astatsRMSTrough = value
1525+ }
1526+ if value , ok := getFloatMetadata (metadata , metaKeyRMSPeak ); ok {
1527+ b .astatsRMSPeak = value
1528+ }
1529+ if value , ok := getFloatMetadata (metadata , metaKeyDCOffset ); ok {
1530+ b .astatsDCOffset = value
1531+ }
1532+ if value , ok := getFloatMetadata (metadata , metaKeyFlatFactor ); ok {
1533+ b .astatsFlatFactor = value
1534+ }
1535+ // CrestFactor: FFmpeg reports as linear ratio (peak/RMS), convert to dB
1536+ if value , ok := getFloatMetadata (metadata , metaKeyCrestFactor ); ok {
1537+ b .astatsCrestFactor = linearRatioToDB (value )
1538+ }
1539+ if value , ok := getFloatMetadata (metadata , metaKeyZeroCrossingsRate ); ok {
1540+ b .astatsZeroCrossingsRate = value
1541+ }
1542+ if value , ok := getFloatMetadata (metadata , metaKeyZeroCrossings ); ok {
1543+ b .astatsZeroCrossings = value
1544+ }
1545+ if value , ok := getFloatMetadata (metadata , metaKeyMaxDifference ); ok {
1546+ b .astatsMaxDifference = value
1547+ }
1548+ if value , ok := getFloatMetadata (metadata , metaKeyMinDifference ); ok {
1549+ b .astatsMinDifference = value
1550+ }
1551+ if value , ok := getFloatMetadata (metadata , metaKeyMeanDifference ); ok {
1552+ b .astatsMeanDifference = value
1553+ }
1554+ if value , ok := getFloatMetadata (metadata , metaKeyRMSDifference ); ok {
1555+ b .astatsRMSDifference = value
1556+ }
1557+ if value , ok := getFloatMetadata (metadata , metaKeyEntropy ); ok {
1558+ b .astatsEntropy = value
1559+ }
1560+ // MinLevel/MaxLevel: FFmpeg reports as linear sample values, convert to dBFS
1561+ if value , ok := getFloatMetadata (metadata , metaKeyMinLevel ); ok {
1562+ b .astatsMinLevel = linearSampleToDBFS (value )
1563+ }
1564+ if value , ok := getFloatMetadata (metadata , metaKeyMaxLevel ); ok {
1565+ b .astatsMaxLevel = linearSampleToDBFS (value )
1566+ }
1567+ if value , ok := getFloatMetadata (metadata , metaKeyNoiseFloor ); ok {
1568+ b .astatsNoiseFloor = value
1569+ }
1570+ if value , ok := getFloatMetadata (metadata , metaKeyNoiseFloorCount ); ok {
1571+ b .astatsNoiseFloorCount = value
1572+ }
1573+ if value , ok := getFloatMetadata (metadata , metaKeyBitDepth ); ok {
1574+ b .astatsBitDepth = value
1575+ }
1576+ if value , ok := getFloatMetadata (metadata , metaKeyNumberOfSamples ); ok {
1577+ b .astatsNumberOfSamples = value
1578+ }
1579+ }
1580+
15091581// metadataAccumulators holds accumulator variables for Pass 1 frame metadata extraction.
15101582// Uses baseMetadataAccumulators for spectral and astats fields shared with output analysis.
15111583type metadataAccumulators struct {
@@ -1704,111 +1776,7 @@ func extractFrameMetadata(metadata *ffmpeg.AVDictionary, acc *metadataAccumulato
17041776
17051777 // Extract astats measurements (cumulative, so we keep the latest)
17061778 // For mono audio, stats are under channel .1
1707- if value , ok := getFloatMetadata (metadata , metaKeyDynamicRange ); ok {
1708- acc .astatsDynamicRange = value
1709- acc .astatsFound = true
1710- }
1711-
1712- if value , ok := getFloatMetadata (metadata , metaKeyRMSLevel ); ok {
1713- acc .astatsRMSLevel = value
1714- }
1715-
1716- if value , ok := getFloatMetadata (metadata , metaKeyPeakLevel ); ok {
1717- acc .astatsPeakLevel = value
1718- }
1719-
1720- // Extract RMS_trough - RMS level of quietest segments (best noise floor indicator for speech)
1721- // In speech audio, quiet inter-word periods contain primarily ambient/electronic noise
1722- if value , ok := getFloatMetadata (metadata , metaKeyRMSTrough ); ok {
1723- acc .astatsRMSTrough = value
1724- }
1725-
1726- // Extract RMS_peak - RMS level of loudest segments
1727- if value , ok := getFloatMetadata (metadata , metaKeyRMSPeak ); ok {
1728- acc .astatsRMSPeak = value
1729- }
1730-
1731- // Extract DC_offset - mean amplitude displacement from zero
1732- // High values indicate DC bias that should be removed before processing
1733- if value , ok := getFloatMetadata (metadata , metaKeyDCOffset ); ok {
1734- acc .astatsDCOffset = value
1735- }
1736-
1737- // Extract Flat_factor - consecutive samples at peak levels (indicates clipping)
1738- // High values suggest pre-existing limiting or clipping damage
1739- if value , ok := getFloatMetadata (metadata , metaKeyFlatFactor ); ok {
1740- acc .astatsFlatFactor = value
1741- }
1742-
1743- // Extract Crest_factor - FFmpeg reports as linear ratio (peak/RMS), convert to dB
1744- // High values indicate impulsive/dynamic content, low values indicate compressed/limited audio
1745- if value , ok := getFloatMetadata (metadata , metaKeyCrestFactor ); ok {
1746- acc .astatsCrestFactor = linearRatioToDB (value )
1747- }
1748-
1749- // Extract Zero_crossings_rate - rate of zero crossings per sample
1750- // Low ZCR = bass-heavy/sustained tones, High ZCR = noise/sibilance
1751- if value , ok := getFloatMetadata (metadata , metaKeyZeroCrossingsRate ); ok {
1752- acc .astatsZeroCrossingsRate = value
1753- }
1754-
1755- // Extract Zero_crossings - total number of zero crossings
1756- if value , ok := getFloatMetadata (metadata , metaKeyZeroCrossings ); ok {
1757- acc .astatsZeroCrossings = value
1758- }
1759-
1760- // Extract Max_difference - largest sample-to-sample change
1761- // High values indicate impulsive sounds (clicks, pops)
1762- if value , ok := getFloatMetadata (metadata , metaKeyMaxDifference ); ok {
1763- acc .astatsMaxDifference = value
1764- }
1765-
1766- // Extract Min_difference - smallest sample-to-sample change
1767- if value , ok := getFloatMetadata (metadata , metaKeyMinDifference ); ok {
1768- acc .astatsMinDifference = value
1769- }
1770-
1771- // Extract Mean_difference - average sample-to-sample change
1772- if value , ok := getFloatMetadata (metadata , metaKeyMeanDifference ); ok {
1773- acc .astatsMeanDifference = value
1774- }
1775-
1776- // Extract RMS_difference - RMS of sample-to-sample changes
1777- if value , ok := getFloatMetadata (metadata , metaKeyRMSDifference ); ok {
1778- acc .astatsRMSDifference = value
1779- }
1780-
1781- // Extract Entropy - signal randomness (1.0 = white noise, lower = more structured)
1782- if value , ok := getFloatMetadata (metadata , metaKeyEntropy ); ok {
1783- acc .astatsEntropy = value
1784- }
1785-
1786- // Extract Min_level and Max_level - FFmpeg reports as linear sample values, convert to dBFS
1787- if value , ok := getFloatMetadata (metadata , metaKeyMinLevel ); ok {
1788- acc .astatsMinLevel = linearSampleToDBFS (value )
1789- }
1790- if value , ok := getFloatMetadata (metadata , metaKeyMaxLevel ); ok {
1791- acc .astatsMaxLevel = linearSampleToDBFS (value )
1792- }
1793-
1794- // Extract Noise_floor - FFmpeg's own noise floor estimate (dBFS)
1795- // Very useful for adaptive gate/noise reduction thresholds
1796- if value , ok := getFloatMetadata (metadata , metaKeyNoiseFloor ); ok {
1797- acc .astatsNoiseFloor = value
1798- }
1799- if value , ok := getFloatMetadata (metadata , metaKeyNoiseFloorCount ); ok {
1800- acc .astatsNoiseFloorCount = value
1801- }
1802-
1803- // Extract Bit_depth - effective bit depth of audio
1804- if value , ok := getFloatMetadata (metadata , metaKeyBitDepth ); ok {
1805- acc .astatsBitDepth = value
1806- }
1807-
1808- // Extract Number_of_samples - total samples processed
1809- if value , ok := getFloatMetadata (metadata , metaKeyNumberOfSamples ); ok {
1810- acc .astatsNumberOfSamples = value
1811- }
1779+ acc .extractAstatsMetadata (metadata )
18121780
18131781 // Extract ebur128 measurements (cumulative loudness analysis)
18141782 // ebur128 provides: M (momentary 400ms), S (short-term 3s), I (integrated), LRA, sample_peak, true_peak
@@ -2048,72 +2016,7 @@ func extractOutputFrameMetadata(metadata *ffmpeg.AVDictionary, acc *outputMetada
20482016 acc .accumulateSpectral (extractSpectralMetrics (metadata ))
20492017
20502018 // Extract astats measurements (cumulative, so we keep the latest)
2051- if value , ok := getFloatMetadata (metadata , metaKeyDynamicRange ); ok {
2052- acc .astatsDynamicRange = value
2053- acc .astatsFound = true
2054- }
2055- if value , ok := getFloatMetadata (metadata , metaKeyRMSLevel ); ok {
2056- acc .astatsRMSLevel = value
2057- }
2058- if value , ok := getFloatMetadata (metadata , metaKeyPeakLevel ); ok {
2059- acc .astatsPeakLevel = value
2060- }
2061- if value , ok := getFloatMetadata (metadata , metaKeyRMSTrough ); ok {
2062- acc .astatsRMSTrough = value
2063- }
2064- if value , ok := getFloatMetadata (metadata , metaKeyRMSPeak ); ok {
2065- acc .astatsRMSPeak = value
2066- }
2067- if value , ok := getFloatMetadata (metadata , metaKeyDCOffset ); ok {
2068- acc .astatsDCOffset = value
2069- }
2070- if value , ok := getFloatMetadata (metadata , metaKeyFlatFactor ); ok {
2071- acc .astatsFlatFactor = value
2072- }
2073- // CrestFactor: FFmpeg reports as linear ratio (peak/RMS), convert to dB
2074- if value , ok := getFloatMetadata (metadata , metaKeyCrestFactor ); ok {
2075- acc .astatsCrestFactor = linearRatioToDB (value )
2076- }
2077- if value , ok := getFloatMetadata (metadata , metaKeyZeroCrossingsRate ); ok {
2078- acc .astatsZeroCrossingsRate = value
2079- }
2080- if value , ok := getFloatMetadata (metadata , metaKeyZeroCrossings ); ok {
2081- acc .astatsZeroCrossings = value
2082- }
2083- if value , ok := getFloatMetadata (metadata , metaKeyMaxDifference ); ok {
2084- acc .astatsMaxDifference = value
2085- }
2086- if value , ok := getFloatMetadata (metadata , metaKeyMinDifference ); ok {
2087- acc .astatsMinDifference = value
2088- }
2089- if value , ok := getFloatMetadata (metadata , metaKeyMeanDifference ); ok {
2090- acc .astatsMeanDifference = value
2091- }
2092- if value , ok := getFloatMetadata (metadata , metaKeyRMSDifference ); ok {
2093- acc .astatsRMSDifference = value
2094- }
2095- if value , ok := getFloatMetadata (metadata , metaKeyEntropy ); ok {
2096- acc .astatsEntropy = value
2097- }
2098- // MinLevel/MaxLevel: FFmpeg reports as linear sample values, convert to dBFS
2099- if value , ok := getFloatMetadata (metadata , metaKeyMinLevel ); ok {
2100- acc .astatsMinLevel = linearSampleToDBFS (value )
2101- }
2102- if value , ok := getFloatMetadata (metadata , metaKeyMaxLevel ); ok {
2103- acc .astatsMaxLevel = linearSampleToDBFS (value )
2104- }
2105- if value , ok := getFloatMetadata (metadata , metaKeyNoiseFloor ); ok {
2106- acc .astatsNoiseFloor = value
2107- }
2108- if value , ok := getFloatMetadata (metadata , metaKeyNoiseFloorCount ); ok {
2109- acc .astatsNoiseFloorCount = value
2110- }
2111- if value , ok := getFloatMetadata (metadata , metaKeyBitDepth ); ok {
2112- acc .astatsBitDepth = value
2113- }
2114- if value , ok := getFloatMetadata (metadata , metaKeyNumberOfSamples ); ok {
2115- acc .astatsNumberOfSamples = value
2116- }
2019+ acc .extractAstatsMetadata (metadata )
21172020
21182021 // Extract ebur128 measurements
21192022 if value , ok := getFloatMetadata (metadata , metaKeyEbur128I ); ok {
0 commit comments