Skip to content

Commit 8375104

Browse files
committed
refactor(processor): extract Encoder type to separate file
Move Encoder struct and related functions to encoder.go for better code organisation and navigation. Extracted to encoder.go: - Encoder struct - createOutputEncoder function - WriteFrame method - Flush method - receivePackets method - Close method - calculateFrameLevel function
1 parent c06e386 commit 8375104

2 files changed

Lines changed: 317 additions & 307 deletions

File tree

internal/processor/encoder.go

Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
// Package processor handles audio analysis and processing
2+
package processor
3+
4+
import (
5+
"errors"
6+
"fmt"
7+
"math"
8+
"unsafe"
9+
10+
ffmpeg "github.com/linuxmatters/ffmpeg-statigo"
11+
"github.com/linuxmatters/jivetalking/internal/audio"
12+
)
13+
14+
// Encoder wraps the audio encoding and muxing functionality
15+
type Encoder struct {
16+
fmtCtx *ffmpeg.AVFormatContext
17+
encCtx *ffmpeg.AVCodecContext
18+
stream *ffmpeg.AVStream
19+
packet *ffmpeg.AVPacket
20+
streamIdx int
21+
}
22+
23+
// createOutputEncoder creates an encoder for FLAC output
24+
// TODO: Add WAV fallback if FLAC encoder is not available
25+
func createOutputEncoder(outputPath string, metadata *audio.Metadata, bufferSinkCtx *ffmpeg.AVFilterContext) (*Encoder, error) {
26+
// Allocate output format context
27+
outputPathC := ffmpeg.ToCStr(outputPath)
28+
defer outputPathC.Free()
29+
30+
var fmtCtx *ffmpeg.AVFormatContext
31+
if _, err := ffmpeg.AVFormatAllocOutputContext2(&fmtCtx, nil, nil, outputPathC); err != nil {
32+
return nil, fmt.Errorf("failed to allocate output context: %w", err)
33+
}
34+
35+
// Find FLAC encoder
36+
codec := ffmpeg.AVCodecFindEncoder(ffmpeg.AVCodecIdFlac)
37+
if codec == nil {
38+
ffmpeg.AVFormatFreeContext(fmtCtx)
39+
return nil, fmt.Errorf("FLAC encoder not found for output: %s", outputPath)
40+
}
41+
42+
// Create stream
43+
stream := ffmpeg.AVFormatNewStream(fmtCtx, nil)
44+
if stream == nil {
45+
ffmpeg.AVFormatFreeContext(fmtCtx)
46+
return nil, fmt.Errorf("failed to create stream for output: %s", outputPath)
47+
}
48+
49+
// Allocate encoder context
50+
encCtx := ffmpeg.AVCodecAllocContext3(codec)
51+
if encCtx == nil {
52+
ffmpeg.AVFormatFreeContext(fmtCtx)
53+
return nil, fmt.Errorf("failed to allocate encoder context for output: %s", outputPath)
54+
}
55+
56+
// Get audio parameters from filter output (we only need sample rate, format is set to S16 via aformat filter)
57+
if _, err := ffmpeg.AVBuffersinkGetFormat(bufferSinkCtx); err != nil { // Verify filter output is configured
58+
ffmpeg.AVCodecFreeContext(&encCtx)
59+
ffmpeg.AVFormatFreeContext(fmtCtx)
60+
return nil, fmt.Errorf("failed to get sample format: %w", err)
61+
}
62+
63+
sampleRate, err := ffmpeg.AVBuffersinkGetSampleRate(bufferSinkCtx)
64+
if err != nil {
65+
ffmpeg.AVCodecFreeContext(&encCtx)
66+
ffmpeg.AVFormatFreeContext(fmtCtx)
67+
return nil, fmt.Errorf("failed to get sample rate: %w", err)
68+
}
69+
70+
timeBase := ffmpeg.AVBuffersinkGetTimeBase(bufferSinkCtx)
71+
72+
// Configure encoder - FLAC supports S16 and S32, we use S16 which matches our aformat filter
73+
encCtx.SetSampleFmt(ffmpeg.AVSampleFmtS16)
74+
encCtx.SetSampleRate(sampleRate)
75+
76+
// Get channel count from filter output and set default channel layout
77+
channels, err := ffmpeg.AVBuffersinkGetChannels(bufferSinkCtx)
78+
if err != nil {
79+
ffmpeg.AVCodecFreeContext(&encCtx)
80+
ffmpeg.AVFormatFreeContext(fmtCtx)
81+
return nil, fmt.Errorf("failed to get channels: %w", err)
82+
}
83+
84+
// Set default channel layout for the encoder
85+
ffmpeg.AVChannelLayoutDefault(encCtx.ChLayout(), channels)
86+
87+
// Set compression level for FLAC
88+
if codec.Id() == ffmpeg.AVCodecIdFlac {
89+
ffmpeg.AVOptSetInt(encCtx.RawPtr(), ffmpeg.GlobalCStr("compression_level"), 5, 0)
90+
// FLAC encoder requires fixed frame size - must match asetnsamples filter (4096)
91+
encCtx.SetFrameSize(4096)
92+
}
93+
94+
// Set global header flag if needed by the format
95+
if fmtCtx.Oformat().Flags()&ffmpeg.AVFmtGlobalheader != 0 {
96+
encCtx.SetFlags(encCtx.Flags() | ffmpeg.AVCodecFlagGlobalHeader)
97+
}
98+
99+
encCtx.SetTimeBase(timeBase)
100+
101+
// Open encoder
102+
if _, err := ffmpeg.AVCodecOpen2(encCtx, codec, nil); err != nil {
103+
ffmpeg.AVCodecFreeContext(&encCtx)
104+
ffmpeg.AVFormatFreeContext(fmtCtx)
105+
return nil, fmt.Errorf("failed to open encoder: %w", err)
106+
}
107+
108+
// Copy encoder parameters to stream
109+
if _, err := ffmpeg.AVCodecParametersFromContext(stream.Codecpar(), encCtx); err != nil {
110+
ffmpeg.AVCodecFreeContext(&encCtx)
111+
ffmpeg.AVFormatFreeContext(fmtCtx)
112+
return nil, fmt.Errorf("failed to copy encoder parameters: %w", err)
113+
}
114+
115+
stream.SetTimeBase(encCtx.TimeBase())
116+
117+
// Open output file
118+
if fmtCtx.Oformat().Flags()&ffmpeg.AVFmtNofile == 0 {
119+
var pb *ffmpeg.AVIOContext
120+
if _, err := ffmpeg.AVIOOpen(&pb, outputPathC, ffmpeg.AVIOFlagWrite); err != nil {
121+
ffmpeg.AVCodecFreeContext(&encCtx)
122+
ffmpeg.AVFormatFreeContext(fmtCtx)
123+
return nil, fmt.Errorf("failed to open output file: %w", err)
124+
}
125+
fmtCtx.SetPb(pb)
126+
}
127+
128+
// Write header
129+
if _, err := ffmpeg.AVFormatWriteHeader(fmtCtx, nil); err != nil {
130+
if fmtCtx.Pb() != nil {
131+
ffmpeg.AVIOClose(fmtCtx.Pb())
132+
}
133+
ffmpeg.AVCodecFreeContext(&encCtx)
134+
ffmpeg.AVFormatFreeContext(fmtCtx)
135+
return nil, fmt.Errorf("failed to write header: %w", err)
136+
}
137+
138+
packet := ffmpeg.AVPacketAlloc()
139+
if packet == nil {
140+
if fmtCtx.Pb() != nil {
141+
ffmpeg.AVIOClose(fmtCtx.Pb())
142+
}
143+
ffmpeg.AVCodecFreeContext(&encCtx)
144+
ffmpeg.AVFormatFreeContext(fmtCtx)
145+
return nil, fmt.Errorf("failed to allocate packet for output: %s", outputPath)
146+
}
147+
148+
return &Encoder{
149+
fmtCtx: fmtCtx,
150+
encCtx: encCtx,
151+
stream: stream,
152+
packet: packet,
153+
streamIdx: 0,
154+
}, nil
155+
}
156+
157+
// WriteFrame encodes and writes a single audio frame
158+
func (e *Encoder) WriteFrame(frame *ffmpeg.AVFrame) error {
159+
// Rescale PTS to encoder timebase if needed
160+
if frame.Pts() != ffmpeg.AVNoptsValue {
161+
frame.SetPts(
162+
ffmpeg.AVRescaleQ(frame.Pts(), frame.TimeBase(), e.encCtx.TimeBase()),
163+
)
164+
}
165+
166+
// Send frame to encoder
167+
if _, err := ffmpeg.AVCodecSendFrame(e.encCtx, frame); err != nil {
168+
return fmt.Errorf("failed to send frame to encoder: %w", err)
169+
}
170+
171+
// Receive encoded packets
172+
return e.receivePackets()
173+
}
174+
175+
// Flush flushes the encoder
176+
func (e *Encoder) Flush() error {
177+
// Send NULL frame to signal flush
178+
if _, err := ffmpeg.AVCodecSendFrame(e.encCtx, nil); err != nil {
179+
return fmt.Errorf("failed to flush encoder: %w", err)
180+
}
181+
182+
return e.receivePackets()
183+
}
184+
185+
// receivePackets receives and writes packets from the encoder
186+
func (e *Encoder) receivePackets() error {
187+
for {
188+
ffmpeg.AVPacketUnref(e.packet)
189+
190+
if _, err := ffmpeg.AVCodecReceivePacket(e.encCtx, e.packet); err != nil {
191+
if errors.Is(err, ffmpeg.EAgain) || errors.Is(err, ffmpeg.AVErrorEOF) {
192+
break
193+
}
194+
return fmt.Errorf("failed to receive packet: %w", err)
195+
}
196+
197+
// Set stream index
198+
e.packet.SetStreamIndex(e.streamIdx)
199+
200+
// Rescale timestamps
201+
ffmpeg.AVPacketRescaleTs(e.packet, e.encCtx.TimeBase(), e.stream.TimeBase())
202+
203+
// Write packet
204+
if _, err := ffmpeg.AVInterleavedWriteFrame(e.fmtCtx, e.packet); err != nil {
205+
return fmt.Errorf("failed to write packet: %w", err)
206+
}
207+
}
208+
209+
return nil
210+
}
211+
212+
// Close closes the encoder and output file
213+
func (e *Encoder) Close() error {
214+
// Write trailer
215+
if _, err := ffmpeg.AVWriteTrailer(e.fmtCtx); err != nil {
216+
return fmt.Errorf("failed to write trailer: %w", err)
217+
}
218+
219+
// Free resources
220+
ffmpeg.AVPacketFree(&e.packet)
221+
ffmpeg.AVCodecFreeContext(&e.encCtx)
222+
223+
// Close output file
224+
if e.fmtCtx.Oformat().Flags()&ffmpeg.AVFmtNofile == 0 {
225+
if e.fmtCtx.Pb() != nil {
226+
if _, err := ffmpeg.AVIOClose(e.fmtCtx.Pb()); err != nil {
227+
return fmt.Errorf("failed to close output file: %w", err)
228+
}
229+
e.fmtCtx.SetPb(nil)
230+
}
231+
}
232+
233+
ffmpeg.AVFormatFreeContext(e.fmtCtx)
234+
235+
return nil
236+
}
237+
238+
// calculateFrameLevel calculates the RMS (Root Mean Square) level of an audio frame in dB
239+
// This provides accurate audio level measurement for VU meter display
240+
func calculateFrameLevel(frame *ffmpeg.AVFrame) float64 {
241+
if frame == nil || frame.NbSamples() == 0 {
242+
return -60.0 // Silence threshold
243+
}
244+
245+
// Get sample format to know how to interpret the data
246+
sampleFmt := frame.Format()
247+
nbSamples := frame.NbSamples()
248+
nbChannels := frame.ChLayout().NbChannels()
249+
250+
// Get pointer to audio data (first plane for packed formats, or first channel for planar)
251+
dataPtr := frame.Data().Get(0)
252+
if dataPtr == nil {
253+
return -60.0
254+
}
255+
256+
// Calculate RMS based on sample format
257+
// Most common formats: S16 (signed 16-bit) and FLT (32-bit float)
258+
var sumSquares float64
259+
var sampleCount int64
260+
261+
switch ffmpeg.AVSampleFormat(sampleFmt) {
262+
case ffmpeg.AVSampleFmtS16, ffmpeg.AVSampleFmtS16P:
263+
// 16-bit signed integer samples
264+
samples := unsafe.Slice((*int16)(dataPtr), int(nbSamples)*int(nbChannels))
265+
for _, sample := range samples {
266+
normalized := float64(sample) / 32768.0 // Normalize to -1.0 to 1.0
267+
sumSquares += normalized * normalized
268+
sampleCount++
269+
}
270+
271+
case ffmpeg.AVSampleFmtFlt, ffmpeg.AVSampleFmtFltp:
272+
// 32-bit float samples (already normalized to -1.0 to 1.0)
273+
samples := unsafe.Slice((*float32)(dataPtr), int(nbSamples)*int(nbChannels))
274+
for _, sample := range samples {
275+
normalized := float64(sample)
276+
sumSquares += normalized * normalized
277+
sampleCount++
278+
}
279+
280+
case ffmpeg.AVSampleFmtS32, ffmpeg.AVSampleFmtS32P:
281+
// 32-bit signed integer samples
282+
samples := unsafe.Slice((*int32)(dataPtr), int(nbSamples)*int(nbChannels))
283+
for _, sample := range samples {
284+
normalized := float64(sample) / 2147483648.0 // Normalize to -1.0 to 1.0
285+
sumSquares += normalized * normalized
286+
sampleCount++
287+
}
288+
289+
default:
290+
// Unsupported format, return neutral value
291+
return -30.0
292+
}
293+
294+
if sampleCount == 0 {
295+
return -60.0
296+
}
297+
298+
// Calculate RMS (Root Mean Square)
299+
rms := math.Sqrt(sumSquares / float64(sampleCount))
300+
301+
// Convert to dB: 20 * log10(rms)
302+
// Add small epsilon to avoid log(0)
303+
if rms < 0.00001 { // Equivalent to -100 dB
304+
return -60.0 // Floor at -60 dB for silence
305+
}
306+
307+
levelDB := 20.0 * math.Log10(rms)
308+
309+
// Clamp to reasonable range for display (-60 dB to 0 dB)
310+
if levelDB < -60.0 {
311+
levelDB = -60.0
312+
} else if levelDB > 0.0 {
313+
levelDB = 0.0
314+
}
315+
316+
return levelDB
317+
}

0 commit comments

Comments
 (0)