From 55c9dccdd3a9d04c71e8f2fc0ab25366aec3c09a Mon Sep 17 00:00:00 2001 From: Kristina Date: Wed, 2 Apr 2025 15:14:54 +0400 Subject: [PATCH] Fix transcript score (#8434) Signed-off-by: Kristina Fefelova --- services/ai-bot/love-agent/src/config.ts | 8 ++++++- services/ai-bot/love-agent/src/openai/stt.ts | 25 +++++++++++++------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/services/ai-bot/love-agent/src/config.ts b/services/ai-bot/love-agent/src/config.ts index 3572e640c2..fec9e33149 100644 --- a/services/ai-bot/love-agent/src/config.ts +++ b/services/ai-bot/love-agent/src/config.ts @@ -26,6 +26,9 @@ interface Config { PlatformToken: string PlatformUrl: string SttProvider: SttProvider + VadSilenceDurationMs: number + VadPrefixPaddingMs: number + VadThreshold: number } const config: Config = (() => { @@ -39,7 +42,10 @@ const config: Config = (() => { OpenaiProvideLanguage: (process.env.OPENAI_PROVIDE_LANGUAGE ?? 'true') === 'true', PlatformToken: process.env.PLATFORM_TOKEN, PlatformUrl: process.env.PLATFORM_URL, - SttProvider: (process.env.STT_PROVIDER as SttProvider) ?? 'deepgram' + SttProvider: (process.env.STT_PROVIDER as SttProvider) ?? 'deepgram', + VadSilenceDurationMs: parseInt(process.env.SILENCE_DURATION_MS ?? '1000'), + VadPrefixPaddingMs: parseInt(process.env.PREFIX_PADDING_MS ?? '1000'), + VadThreshold: parseFloat(process.env.VAD_THRESHOLD ?? '0.5') } const missingEnv = (Object.keys(params) as Array).filter((key) => params[key] === undefined) diff --git a/services/ai-bot/love-agent/src/openai/stt.ts b/services/ai-bot/love-agent/src/openai/stt.ts index 297c6e031e..0f42588c93 100644 --- a/services/ai-bot/love-agent/src/openai/stt.ts +++ b/services/ai-bot/love-agent/src/openai/stt.ts @@ -156,9 +156,9 @@ export class STT implements Stt { }, turn_detection: { type: 'server_vad', - threshold: 0.7, - prefix_padding_ms: 1000, - silence_duration_ms: 2000 + threshold: config.VadThreshold, + prefix_padding_ms: config.VadPrefixPaddingMs, + silence_duration_ms: config.VadSilenceDurationMs }, include: ['item.input_audio_transcription.logprobs'] } @@ -211,8 +211,12 @@ export class STT implements Stt { private onTranscriptCompleted (sid: string, data: any): void { if (data.transcript == null || data.transcript.trim() === '') return - const score = data.logprobs != null && Array.isArray(data.logprobs) ? getTranscriptProbability(data.logprobs.map((lp: any) => lp.logprob)) : undefined - const result = score !== undefined ? `${data.transcript} (${score.toFixed(2)})` : data.transcript + const logprobs: number[] = + data.logprobs != null && Array.isArray(data.logprobs) ? data.logprobs.map((lp: any) => lp.logprob) : [] + const probability = getAvgProbability(logprobs) + const perplexity = getPerplexity(logprobs) + + const result = probability !== undefined ? `${data.transcript} (${probability}, ${perplexity})` : data.transcript void this.sendToPlatform(result, sid) } @@ -282,7 +286,12 @@ export class STT implements Stt { } } -function getTranscriptProbability (logprobs: number[]): number { - const sum = logprobs.reduce((acc, lp) => acc + lp, 0) - return Math.exp(sum) +function getAvgProbability (logprobs: number[]): string { + const avgLogProb = logprobs.reduce((acc, lp) => acc + lp, 0) / logprobs.length + return Math.exp(avgLogProb).toFixed(2) +} + +function getPerplexity (logprobs: number[]): string { + const avgLogProb = logprobs.reduce((acc, lp) => acc + lp, 0) / logprobs.length + return Math.exp(-avgLogProb).toFixed(2) }