Fix transcript score (#8434)

Signed-off-by: Kristina Fefelova <kristin.fefelova@gmail.com>
This commit is contained in:
Kristina 2025-04-02 15:14:54 +04:00 committed by GitHub
parent fd10ab5f7d
commit 55c9dccdd3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 24 additions and 9 deletions

View File

@ -26,6 +26,9 @@ interface Config {
PlatformToken: string
PlatformUrl: string
SttProvider: SttProvider
VadSilenceDurationMs: number
VadPrefixPaddingMs: number
VadThreshold: number
}
const config: Config = (() => {
@ -39,7 +42,10 @@ const config: Config = (() => {
OpenaiProvideLanguage: (process.env.OPENAI_PROVIDE_LANGUAGE ?? 'true') === 'true',
PlatformToken: process.env.PLATFORM_TOKEN,
PlatformUrl: process.env.PLATFORM_URL,
SttProvider: (process.env.STT_PROVIDER as SttProvider) ?? 'deepgram'
SttProvider: (process.env.STT_PROVIDER as SttProvider) ?? 'deepgram',
VadSilenceDurationMs: parseInt(process.env.SILENCE_DURATION_MS ?? '1000'),
VadPrefixPaddingMs: parseInt(process.env.PREFIX_PADDING_MS ?? '1000'),
VadThreshold: parseFloat(process.env.VAD_THRESHOLD ?? '0.5')
}
const missingEnv = (Object.keys(params) as Array<keyof Config>).filter((key) => params[key] === undefined)

View File

@ -156,9 +156,9 @@ export class STT implements Stt {
},
turn_detection: {
type: 'server_vad',
threshold: 0.7,
prefix_padding_ms: 1000,
silence_duration_ms: 2000
threshold: config.VadThreshold,
prefix_padding_ms: config.VadPrefixPaddingMs,
silence_duration_ms: config.VadSilenceDurationMs
},
include: ['item.input_audio_transcription.logprobs']
}
@ -211,8 +211,12 @@ export class STT implements Stt {
private onTranscriptCompleted (sid: string, data: any): void {
if (data.transcript == null || data.transcript.trim() === '') return
const score = data.logprobs != null && Array.isArray(data.logprobs) ? getTranscriptProbability(data.logprobs.map((lp: any) => lp.logprob)) : undefined
const result = score !== undefined ? `${data.transcript} (${score.toFixed(2)})` : data.transcript
const logprobs: number[] =
data.logprobs != null && Array.isArray(data.logprobs) ? data.logprobs.map((lp: any) => lp.logprob) : []
const probability = getAvgProbability(logprobs)
const perplexity = getPerplexity(logprobs)
const result = probability !== undefined ? `${data.transcript} (${probability}, ${perplexity})` : data.transcript
void this.sendToPlatform(result, sid)
}
@ -282,7 +286,12 @@ export class STT implements Stt {
}
}
function getTranscriptProbability (logprobs: number[]): number {
const sum = logprobs.reduce((acc, lp) => acc + lp, 0)
return Math.exp(sum)
function getAvgProbability (logprobs: number[]): string {
const avgLogProb = logprobs.reduce((acc, lp) => acc + lp, 0) / logprobs.length
return Math.exp(avgLogProb).toFixed(2)
}
function getPerplexity (logprobs: number[]): string {
const avgLogProb = logprobs.reduce((acc, lp) => acc + lp, 0) / logprobs.length
return Math.exp(-avgLogProb).toFixed(2)
}