I'm trying to stream audio data from Telnyx to AssemblyAI, but am running into issues. I know I have no connection issues or anything like that, it is simply an audio issue (maybe converting from base64 or something?). Once the call hits the webhook, I start streaming to the websocket. That works fine.
wss.on('connection', function connection(ws) {
console.log('Connected')
let chunks = [];
let text = '';
ws.on('message', function incoming(message) {
if (!assembly) {
console.log('No AssemblyAI WebSocket connection');
return;
}
const parsedMessage = JSON.parse(message);
if (parsedMessage.event === 'start') {
console.log('Received start event');
const texts = {};
assembly.onmessage = (assemblyMsg) => {
console.log('Received assembly message:', assemblyMsg.data)
const res = JSON.parse(assemblyMsg.data);
texts[res.audio_start] = res.text;
const keys = Object.keys(texts);
keys.sort((a, b) => a - b);
let msg = '';
for (const key of keys) {
if (texts[key]) {
msg += ` ${texts[key]}`;
}
}
console.log(msg)
text += msg;
};
}
if (parsedMessage.event === 'media') {
// find where to place the new chunk based upon ascending order of event.media.chunk
chunks.push(parsedMessage.media.payload);
if (chunks.length >= 5 && assembly.readyState === WebSocket.OPEN) {
// send raw audio data to AssemblyAI
assembly.send(Buffer.concat(chunks.map(decodeBase64ToBuffer)));
chunks = [];
}
}
});
});
function decodeBase64ToBuffer(base64String) {
const buff = Buffer.from(base64String, 'base64');
return buff;
}
I am currently getting these logs:
Received assembly message: {"message_type":"PartialTranscript","created":"2024-05-02T17:07:32.072113","audio_start":30,"audio_end":500,"confidence":0,"text":"","words":[]}
Received assembly message: {"message_type":"PartialTranscript","created":"2024-05-02T17:07:33.126404","audio_start":30,"audio_end":940,"confidence":0,"text":"","words":[]}
Received assembly message: {"message_type":"PartialTranscript","created":"2024-05-02T17:07:33.920097","audio_start":30,"audio_end":1380,"confidence":0,"text":"","words":[]}
This signifies that the problem is with the audio. Any ideas. Both audio sources are PCM16, so I don't think that is an issue. Both have a sample rate of 8000, so that isn't the issue. Any ideas? Maybe I'm doing something wrong concatenating it? Is there a header or something that needs to be removed?