Extract deepgram-config (#8793)

This commit is contained in:
Kristina 2025-05-01 07:04:46 +04:00 committed by GitHub
parent c2defd9638
commit 111c3c6dd1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 64 additions and 26 deletions

View File

@ -28,6 +28,14 @@ interface Config {
VadSilenceDurationMs: number VadSilenceDurationMs: number
VadPrefixPaddingMs: number VadPrefixPaddingMs: number
VadThreshold: number VadThreshold: number
DgEndpointing: number
DgUtteranceEndMs: number
DgInterimResults: boolean
DgVadEvents: boolean
DgPunctuate: boolean
DgSmartFormat: boolean
DgNoDelay: boolean
} }
const config: Config = (() => { const config: Config = (() => {
@ -43,7 +51,15 @@ const config: Config = (() => {
SttProvider: (process.env.STT_PROVIDER as SttProvider) ?? 'deepgram', SttProvider: (process.env.STT_PROVIDER as SttProvider) ?? 'deepgram',
VadSilenceDurationMs: parseInt(process.env.SILENCE_DURATION_MS ?? '1000'), VadSilenceDurationMs: parseInt(process.env.SILENCE_DURATION_MS ?? '1000'),
VadPrefixPaddingMs: parseInt(process.env.PREFIX_PADDING_MS ?? '1000'), VadPrefixPaddingMs: parseInt(process.env.PREFIX_PADDING_MS ?? '1000'),
VadThreshold: parseFloat(process.env.VAD_THRESHOLD ?? '0.5') VadThreshold: parseFloat(process.env.VAD_THRESHOLD ?? '0.5'),
DgEndpointing: parseInt(process.env.DG_ENDPOINTING ?? '100'),
DgInterimResults: process.env.DG_INTERIM_RESULTS === 'true',
DgVadEvents: process.env.DG_VAD_EVENTS === 'true',
DgPunctuate: process.env.DG_PUNCTUATE === 'true',
DgSmartFormat: process.env.DG_SMART_FORMAT === 'true',
DgUtteranceEndMs: parseInt(process.env.DG_UTTERANCE_END_MS ?? '0'),
DgNoDelay: process.env.DG_NO_DELAY === 'true'
} }
const missingEnv = (Object.keys(params) as Array<keyof Config>).filter((key) => params[key] === undefined) const missingEnv = (Object.keys(params) as Array<keyof Config>).filter((key) => params[key] === undefined)

View File

@ -29,19 +29,6 @@ import config from '../config.js'
const KEEP_ALIVE_INTERVAL = 10 * 1000 const KEEP_ALIVE_INTERVAL = 10 * 1000
const dgSchema: LiveSchema = {
model: config.DeepgramModel,
encoding: 'linear16',
smart_format: true,
endpointing: 500,
interim_results: true,
vad_events: true,
utterance_end_ms: 1000,
punctuate: true,
language: 'en'
}
export class STT implements Stt { export class STT implements Stt {
private readonly deepgram: DeepgramClient private readonly deepgram: DeepgramClient
@ -134,6 +121,47 @@ export class STT implements Stt {
} }
} }
getOptions (stream: AudioStream): LiveSchema {
const options: Partial<LiveSchema> = {}
if (config.DgEndpointing !== 0) {
options.endpointing = config.DgEndpointing
}
if (config.DgInterimResults) {
options.interim_results = true
}
if (config.DgVadEvents) {
options.vad_events = true
}
if (config.DgUtteranceEndMs !== 0) {
options.utterance_end_ms = config.DgUtteranceEndMs
}
if (config.DgPunctuate) {
options.punctuate = true
}
if (config.DgSmartFormat) {
options.smart_format = true
}
if (config.DgNoDelay) {
options.no_delay = true
}
return {
...options,
encoding: 'linear16',
channels: stream.numChannels,
sample_rate: stream.sampleRate,
language: 'multi',
model: config.DeepgramModel
}
}
processTrack (sid: string): void { processTrack (sid: string): void {
const track = this.trackBySid.get(sid) const track = this.trackBySid.get(sid)
if (track === undefined) return if (track === undefined) return
@ -141,14 +169,9 @@ export class STT implements Stt {
const stream = new AudioStream(track) const stream = new AudioStream(track)
// const language = this.language ?? 'en' // const language = this.language ?? 'en'
const dgConnection = this.deepgram.listen.live({ const options = this.getOptions(stream)
...dgSchema, const dgConnection = this.deepgram.listen.live(options)
channels: stream.numChannels, console.log('Starting deepgram for track', this.room.name, sid, options)
sample_rate: stream.sampleRate,
language: 'multi',
model: config.DeepgramModel
})
console.log('Starting deepgram for track', this.room.name, sid)
const interval = setInterval(() => { const interval = setInterval(() => {
dgConnection.keepAlive() dgConnection.keepAlive()
@ -167,14 +190,13 @@ export class STT implements Stt {
return return
} }
if (data.speech_final === true) { if (data.speech_final === true || data.is_final === true) {
void this.sendToPlatform(transcript, sid)
} else if (data.is_final === true) {
void this.sendToPlatform(transcript, sid) void this.sendToPlatform(transcript, sid)
} }
}) })
dgConnection.on(LiveTranscriptionEvents.Close, (d) => { dgConnection.on(LiveTranscriptionEvents.Close, (data) => {
console.log('Deepgram closed', data)
this.stopDeepgram(sid) this.stopDeepgram(sid)
}) })