mirror of
https://github.com/hcengineering/platform.git
synced 2025-05-10 17:30:51 +00:00
Extract deepgram-config (#8793)
This commit is contained in:
parent
c2defd9638
commit
111c3c6dd1
@ -28,6 +28,14 @@ interface Config {
|
|||||||
VadSilenceDurationMs: number
|
VadSilenceDurationMs: number
|
||||||
VadPrefixPaddingMs: number
|
VadPrefixPaddingMs: number
|
||||||
VadThreshold: number
|
VadThreshold: number
|
||||||
|
|
||||||
|
DgEndpointing: number
|
||||||
|
DgUtteranceEndMs: number
|
||||||
|
DgInterimResults: boolean
|
||||||
|
DgVadEvents: boolean
|
||||||
|
DgPunctuate: boolean
|
||||||
|
DgSmartFormat: boolean
|
||||||
|
DgNoDelay: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
const config: Config = (() => {
|
const config: Config = (() => {
|
||||||
@ -43,7 +51,15 @@ const config: Config = (() => {
|
|||||||
SttProvider: (process.env.STT_PROVIDER as SttProvider) ?? 'deepgram',
|
SttProvider: (process.env.STT_PROVIDER as SttProvider) ?? 'deepgram',
|
||||||
VadSilenceDurationMs: parseInt(process.env.SILENCE_DURATION_MS ?? '1000'),
|
VadSilenceDurationMs: parseInt(process.env.SILENCE_DURATION_MS ?? '1000'),
|
||||||
VadPrefixPaddingMs: parseInt(process.env.PREFIX_PADDING_MS ?? '1000'),
|
VadPrefixPaddingMs: parseInt(process.env.PREFIX_PADDING_MS ?? '1000'),
|
||||||
VadThreshold: parseFloat(process.env.VAD_THRESHOLD ?? '0.5')
|
VadThreshold: parseFloat(process.env.VAD_THRESHOLD ?? '0.5'),
|
||||||
|
|
||||||
|
DgEndpointing: parseInt(process.env.DG_ENDPOINTING ?? '100'),
|
||||||
|
DgInterimResults: process.env.DG_INTERIM_RESULTS === 'true',
|
||||||
|
DgVadEvents: process.env.DG_VAD_EVENTS === 'true',
|
||||||
|
DgPunctuate: process.env.DG_PUNCTUATE === 'true',
|
||||||
|
DgSmartFormat: process.env.DG_SMART_FORMAT === 'true',
|
||||||
|
DgUtteranceEndMs: parseInt(process.env.DG_UTTERANCE_END_MS ?? '0'),
|
||||||
|
DgNoDelay: process.env.DG_NO_DELAY === 'true'
|
||||||
}
|
}
|
||||||
|
|
||||||
const missingEnv = (Object.keys(params) as Array<keyof Config>).filter((key) => params[key] === undefined)
|
const missingEnv = (Object.keys(params) as Array<keyof Config>).filter((key) => params[key] === undefined)
|
||||||
|
@ -29,19 +29,6 @@ import config from '../config.js'
|
|||||||
|
|
||||||
const KEEP_ALIVE_INTERVAL = 10 * 1000
|
const KEEP_ALIVE_INTERVAL = 10 * 1000
|
||||||
|
|
||||||
const dgSchema: LiveSchema = {
|
|
||||||
model: config.DeepgramModel,
|
|
||||||
encoding: 'linear16',
|
|
||||||
smart_format: true,
|
|
||||||
endpointing: 500,
|
|
||||||
interim_results: true,
|
|
||||||
vad_events: true,
|
|
||||||
utterance_end_ms: 1000,
|
|
||||||
|
|
||||||
punctuate: true,
|
|
||||||
language: 'en'
|
|
||||||
}
|
|
||||||
|
|
||||||
export class STT implements Stt {
|
export class STT implements Stt {
|
||||||
private readonly deepgram: DeepgramClient
|
private readonly deepgram: DeepgramClient
|
||||||
|
|
||||||
@ -134,6 +121,47 @@ export class STT implements Stt {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getOptions (stream: AudioStream): LiveSchema {
|
||||||
|
const options: Partial<LiveSchema> = {}
|
||||||
|
|
||||||
|
if (config.DgEndpointing !== 0) {
|
||||||
|
options.endpointing = config.DgEndpointing
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.DgInterimResults) {
|
||||||
|
options.interim_results = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.DgVadEvents) {
|
||||||
|
options.vad_events = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.DgUtteranceEndMs !== 0) {
|
||||||
|
options.utterance_end_ms = config.DgUtteranceEndMs
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.DgPunctuate) {
|
||||||
|
options.punctuate = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.DgSmartFormat) {
|
||||||
|
options.smart_format = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.DgNoDelay) {
|
||||||
|
options.no_delay = true
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
...options,
|
||||||
|
encoding: 'linear16',
|
||||||
|
channels: stream.numChannels,
|
||||||
|
sample_rate: stream.sampleRate,
|
||||||
|
language: 'multi',
|
||||||
|
model: config.DeepgramModel
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
processTrack (sid: string): void {
|
processTrack (sid: string): void {
|
||||||
const track = this.trackBySid.get(sid)
|
const track = this.trackBySid.get(sid)
|
||||||
if (track === undefined) return
|
if (track === undefined) return
|
||||||
@ -141,14 +169,9 @@ export class STT implements Stt {
|
|||||||
|
|
||||||
const stream = new AudioStream(track)
|
const stream = new AudioStream(track)
|
||||||
// const language = this.language ?? 'en'
|
// const language = this.language ?? 'en'
|
||||||
const dgConnection = this.deepgram.listen.live({
|
const options = this.getOptions(stream)
|
||||||
...dgSchema,
|
const dgConnection = this.deepgram.listen.live(options)
|
||||||
channels: stream.numChannels,
|
console.log('Starting deepgram for track', this.room.name, sid, options)
|
||||||
sample_rate: stream.sampleRate,
|
|
||||||
language: 'multi',
|
|
||||||
model: config.DeepgramModel
|
|
||||||
})
|
|
||||||
console.log('Starting deepgram for track', this.room.name, sid)
|
|
||||||
|
|
||||||
const interval = setInterval(() => {
|
const interval = setInterval(() => {
|
||||||
dgConnection.keepAlive()
|
dgConnection.keepAlive()
|
||||||
@ -167,14 +190,13 @@ export class STT implements Stt {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data.speech_final === true) {
|
if (data.speech_final === true || data.is_final === true) {
|
||||||
void this.sendToPlatform(transcript, sid)
|
|
||||||
} else if (data.is_final === true) {
|
|
||||||
void this.sendToPlatform(transcript, sid)
|
void this.sendToPlatform(transcript, sid)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
dgConnection.on(LiveTranscriptionEvents.Close, (d) => {
|
dgConnection.on(LiveTranscriptionEvents.Close, (data) => {
|
||||||
|
console.log('Deepgram closed', data)
|
||||||
this.stopDeepgram(sid)
|
this.stopDeepgram(sid)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user