Love agent updates (#7153)

Signed-off-by: Kristina Fefelova <kristin.fefelova@gmail.com>
This commit is contained in:
Kristina 2024-11-11 17:22:38 +04:00 committed by GitHub
parent 9dcb6f23fe
commit 94f8b9f846
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 225 additions and 116 deletions

View File

@ -74,4 +74,10 @@ export interface PostTranscriptRequest {
transcript: string
participant: Ref<Person>
roomName: string
final: boolean
}
export interface IdentityResponse {
identity: Ref<Person>
name: string
}

View File

@ -58,6 +58,7 @@
connecting: boolean
muted: boolean
mirror: boolean
isAgent: boolean
}
let participants: ParticipantData[] = []
@ -80,7 +81,7 @@
const element = track.attach()
attachTrack(element, participant)
}
updateStyle(participants.length, $screenSharing)
updateStyle(getActiveParticipants(participants).length, $screenSharing)
} else {
const part = participants.find((p) => p._id === participant.identity)
if (part !== undefined) {
@ -105,7 +106,7 @@
} else {
track.detach(screen)
}
updateStyle(participants.length, $screenSharing)
updateStyle(getActiveParticipants(participants).length, $screenSharing)
}
}
@ -117,7 +118,7 @@
const element = publication.track.attach()
void attachTrack(element, participant)
}
updateStyle(participants.length, $screenSharing)
updateStyle(getActiveParticipants(participants).length, $screenSharing)
} else {
const part = participants.find((p) => p._id === participant.identity)
if (part !== undefined) {
@ -135,7 +136,8 @@
name: participant.name ?? '',
muted: !participant.isMicrophoneEnabled,
mirror: participant.isLocal,
connecting: false
connecting: false,
isAgent: participant.isAgent
})
}
participants = participants
@ -166,11 +168,12 @@
name: participant.name ?? '',
muted: !participant.isMicrophoneEnabled,
mirror: participant.isLocal,
connecting: false
connecting: false,
isAgent: participant.isAgent
}
participants.push(value)
participants = participants
updateStyle(participants.length, $screenSharing)
updateStyle(getActiveParticipants(participants).length, $screenSharing)
}
function handleParticipantDisconnected (participant: RemoteParticipant): void {
@ -179,7 +182,7 @@
participants.splice(index, 1)
participants = participants
}
updateStyle(participants.length, $screenSharing)
updateStyle(getActiveParticipants(participants).length, $screenSharing)
}
function muteHandler (publication: TrackPublication, participant: Participant): void {
@ -207,7 +210,7 @@
if (publication?.track?.kind === Track.Kind.Video) {
if (publication.track.source === Track.Source.ScreenShare) {
publication.track.detach(screen)
updateStyle(participants.length, $screenSharing)
updateStyle(getActiveParticipants(participants).length, $screenSharing)
} else {
const index = participants.findIndex((p) => p._id === participant.identity)
if (index !== -1) {
@ -285,10 +288,6 @@
onDestroy(
infos.subscribe((data) => {
const aiParticipant = aiPersonId !== undefined ? participants.find(({ _id }) => _id === aiPersonId) : undefined
if (aiParticipant && !data.some((it) => it.room === room._id && it.person === aiParticipant._id)) {
participants = participants.filter(({ _id }) => _id !== aiPersonId)
}
for (const info of data) {
if (info.room !== room._id) continue
const current = participants.find((p) => p._id === info.person)
@ -298,12 +297,13 @@
name: info.name,
muted: true,
mirror: false,
connecting: true
connecting: true,
isAgent: aiPersonId === info.person
}
participants.push(value)
}
participants = participants
updateStyle(participants.length, $screenSharing)
updateStyle(getActiveParticipants(participants).length, $screenSharing)
})
)
@ -342,6 +342,12 @@
}
}
$: if (((document.fullscreenElement && !$isFullScreen) || $isFullScreen) && roomEl) toggleFullscreen()
function getActiveParticipants (participants: ParticipantData[]): ParticipantData[] {
return participants.filter((p) => !p.isAgent || $infos.some(({ person }) => person === p._id))
}
$: activeParticipants = getActiveParticipants(participants)
</script>
<div bind:this={roomEl} class="flex-col-center w-full h-full right-navpanel-border" class:theme-dark={$isFullScreen}>
@ -376,7 +382,7 @@
style={$screenSharing ? '' : gridStyle}
class:scroll-m-0={$screenSharing}
>
{#each participants as participant, i (participant._id)}
{#each activeParticipants as participant, i (participant._id)}
<ParticipantView
bind:this={participantElements[i]}
{...participant}

View File

@ -19,11 +19,6 @@
"lint:fix": "eslint --fix src/**/*.ts",
"format": "prettier --write src/**/*.ts && pnpm lint:fix"
},
"pnpm": {
"overrides": {
"livekit-server-sdk": "2.7.3"
}
},
"devDependencies": {
"@types/node": "~20.11.16",
"@typescript-eslint/eslint-plugin": "^6.11.0",
@ -40,7 +35,7 @@
},
"dependencies": {
"@deepgram/sdk": "^3.9.0",
"@livekit/agents": "^0.3.5",
"@livekit/agents": "^0.4.1",
"@livekit/rtc-node": "^0.11.1",
"dotenv": "^16.4.5"
}

View File

@ -4,9 +4,6 @@ settings:
autoInstallPeers: true
excludeLinksFromLockfile: false
overrides:
livekit-server-sdk: 2.7.3
importers:
.:
@ -15,8 +12,8 @@ importers:
specifier: ^3.9.0
version: 3.9.0
'@livekit/agents':
specifier: ^0.3.5
version: 0.3.5
specifier: ^0.4.1
version: 0.4.1
'@livekit/rtc-node':
specifier: ^0.11.1
version: 0.11.1
@ -246,8 +243,8 @@ packages:
resolution: {integrity: sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA==}
deprecated: Use @eslint/object-schema instead
'@livekit/agents@0.3.5':
resolution: {integrity: sha512-qjEIRkr/HdOvEOnvLKZMfnQ472bShQ1Ai2KKng8a1caSDHiLhQBeSb1tzA1Evsmm1k8hilXqh+81/SDj4cfiDA==}
'@livekit/agents@0.4.1':
resolution: {integrity: sha512-zZnd19CWvm1i6PKzAgUw6gLdZOJ/QKzbIVSLNAZPQphCEEg3cPoe3fEf9XE7o9+n6e5sZ6FfmSUh/c7jxWmujw==}
'@livekit/mutex@1.1.0':
resolution: {integrity: sha512-XRLG+z/0uoyDioupjUiskjI06Y51U/IXVPJn7qJ+R3J75XX01irYVBM9MpxeJahpVoe9QhU4moIEolX+HO9U9g==}
@ -1021,8 +1018,8 @@ packages:
resolution: {integrity: sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==}
engines: {node: '>= 0.8.0'}
livekit-server-sdk@2.7.3:
resolution: {integrity: sha512-dBiyMJ2o3Adw7aBVuFxVOlYHmiZtGGS9zVksMuv/wiEVHY+6XSDzo0X67pZVkyGlq1moF4YZAReVY2Dbxve8NQ==}
livekit-server-sdk@2.8.1:
resolution: {integrity: sha512-l8egXU10jPuRJM2Df9Gk/KPEk6tBV0JEGG19cD5QeQtyIMgqULCCd/5yyG2FRvcWRf7pEyZZMXi63zDn7uaKHQ==}
engines: {node: '>=19'}
locate-path@6.0.0:
@ -1069,8 +1066,8 @@ packages:
encoding:
optional: true
object-inspect@1.13.2:
resolution: {integrity: sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==}
object-inspect@1.13.3:
resolution: {integrity: sha512-kDCGIbxkDSXE3euJZZXzc6to7fCrKHNI/hSRQnRuQ+BWjFNzZwiFF8fj/6o2t2G9/jTj8PSIYTfCLelLZEeRpA==}
engines: {node: '>= 0.4'}
object-keys@1.1.1:
@ -1559,13 +1556,14 @@ snapshots:
'@humanwhocodes/object-schema@2.0.3': {}
'@livekit/agents@0.3.5':
'@livekit/agents@0.4.1':
dependencies:
'@livekit/mutex': 1.1.0
'@livekit/protocol': 1.27.1
'@livekit/rtc-node': 0.11.1
'@livekit/typed-emitter': 3.0.0
commander: 12.1.0
livekit-server-sdk: 2.7.3
livekit-server-sdk: 2.8.1
pino: 8.21.0
pino-pretty: 11.3.0
ws: 8.18.0
@ -1977,7 +1975,7 @@ snapshots:
is-string: 1.0.7
is-typed-array: 1.1.13
is-weakref: 1.0.2
object-inspect: 1.13.2
object-inspect: 1.13.3
object-keys: 1.1.1
object.assign: 4.1.5
regexp.prototype.flags: 1.5.3
@ -2488,7 +2486,7 @@ snapshots:
prelude-ls: 1.2.1
type-check: 0.4.0
livekit-server-sdk@2.7.3:
livekit-server-sdk@2.8.1:
dependencies:
'@livekit/protocol': 1.27.1
camelcase-keys: 9.1.3
@ -2527,7 +2525,7 @@ snapshots:
dependencies:
whatwg-url: 5.0.0
object-inspect@1.13.2: {}
object-inspect@1.13.3: {}
object-keys@1.1.1: {}
@ -2748,7 +2746,7 @@ snapshots:
call-bind: 1.0.7
es-errors: 1.3.0
get-intrinsic: 1.2.4
object-inspect: 1.13.2
object-inspect: 1.13.3
slash@3.0.0: {}

View File

@ -13,12 +13,13 @@
// limitations under the License.
//
import { cli, defineAgent, type JobContext, WorkerOptions, WorkerPermissions } from '@livekit/agents'
import { cli, defineAgent, type JobContext, JobRequest, WorkerOptions } from '@livekit/agents'
import { fileURLToPath } from 'node:url'
import { RemoteParticipant, RemoteTrack, RemoteTrackPublication, RoomEvent, TrackKind } from '@livekit/rtc-node'
import { STT } from './stt.js'
import { Metadata, TranscriptionStatus } from './type.js'
import config from './config.js'
function parseMetadata (metadata: string): Metadata {
try {
@ -30,6 +31,45 @@ function parseMetadata (metadata: string): Metadata {
return {}
}
async function requestIdentity (roomName: string): Promise<{ identity: string, name: string } | undefined> {
try {
const res = await fetch(`${config.PlatformUrl}/love/${roomName}/identity`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: 'Bearer ' + config.PlatformToken
}
})
if (!res.ok) {
return undefined
}
return await res.json()
} catch (e) {
console.error('Error during request identity', e)
}
}
const requestFunc = async (req: JobRequest): Promise<void> => {
const roomName = req.room?.name
if (roomName == null) {
console.error('Room name is undefined', { room: req.room })
await req.reject()
return
}
const identity = await requestIdentity(roomName)
if (identity?.identity == null) {
console.error('No ai identity', { roomName })
await req.reject()
return
}
await req.accept(identity.name, identity.identity)
}
function applyMetadata (data: string | undefined, stt: STT): void {
if (data == null || data === '') return
const metadata = parseMetadata(data)
@ -39,11 +79,13 @@ function applyMetadata (data: string | undefined, stt: STT): void {
}
if (metadata.transcription === TranscriptionStatus.InProgress) {
console.log('Starting transcription', stt.name)
stt.start()
} else if (
metadata.transcription === TranscriptionStatus.Completed ||
metadata.transcription === TranscriptionStatus.Idle
) {
console.log('Stopping transcription', stt.name)
stt.stop()
}
}
@ -109,7 +151,7 @@ export function runAgent (): void {
cli.runApp(
new WorkerOptions({
agent: fileURLToPath(import.meta.url),
permissions: new WorkerPermissions(true, true, true, true, [], true)
requestFunc
})
)
}

View File

@ -55,25 +55,8 @@ export class STT {
private readonly dgConnectionBySid = new Map<string, ListenLiveClient>()
private readonly intervalBySid = new Map<string, NodeJS.Timeout>()
private readonly transcriptsBySid = new Map<string, { value: string, startedOn: number }>()
private readonly interval: NodeJS.Timeout
constructor (private readonly name: string) {
constructor (readonly name: string) {
this.deepgram = createClient(config.DeepgramApiKey)
this.interval = this.interval = setInterval(() => {
this.sendTranscriptToPlatform()
}, config.TranscriptDelay)
}
sendTranscriptToPlatform (): void {
const now = Date.now()
for (const [sid, transcript] of this.transcriptsBySid.entries()) {
if (now - transcript.startedOn > config.TranscriptDelay) {
void this.sendToPlatform(transcript.value, sid)
this.transcriptsBySid.delete(sid)
}
}
}
updateLanguage (language: string): void {
@ -175,32 +158,16 @@ export class STT {
dgConnection.on(LiveTranscriptionEvents.Open, () => {
dgConnection.on(LiveTranscriptionEvents.Transcript, (data: LiveTranscriptionEvent) => {
const transcript = data?.channel?.alternatives[0].transcript
if (transcript != null && transcript !== '') {
const prevData = this.transcriptsBySid.get(sid)
const prevValue = prevData?.value ?? ''
if (data.is_final === true) {
// TODO: how to join the final transcript ?
this.transcriptsBySid.set(sid, {
value: prevValue + ' ' + transcript,
startedOn: prevData?.startedOn ?? Date.now()
})
}
if (data.speech_final === true) {
const result = this.transcriptsBySid.get(sid)?.value
if (result != null) {
void this.sendToPlatform(result, sid)
}
this.transcriptsBySid.delete(sid)
}
const hasTranscript = transcript != null && transcript !== ''
if (!hasTranscript) {
return
}
})
dgConnection.addListener(LiveTranscriptionEvents.UtteranceEnd, () => {
const result = this.transcriptsBySid.get(sid)?.value ?? ''
if (result.length > 0) {
void this.sendToPlatform(result, sid)
this.transcriptsBySid.delete(sid)
if (data.speech_final === true) {
void this.sendToPlatform(transcript, sid, true)
} else if (data.is_final === true) {
void this.sendToPlatform(transcript, sid, false)
}
})
@ -232,11 +199,12 @@ export class STT {
}
}
async sendToPlatform (transcript: string, sid: string): Promise<void> {
async sendToPlatform (transcript: string, sid: string, isFinal = false): Promise<void> {
const request = {
transcript,
participant: this.participantBySid.get(sid)?.identity,
roomName: this.name
roomName: this.name,
final: isFinal
}
try {
@ -254,10 +222,9 @@ export class STT {
}
close (): void {
clearInterval(this.interval)
for (const sid of this.transcriptsBySid.keys()) {
this.trackBySid.delete(sid)
this.participantBySid.delete(sid)
this.trackBySid.clear()
this.participantBySid.clear()
for (const sid of this.dgConnectionBySid.keys()) {
this.stopDeepgram(sid)
}
}

View File

@ -21,6 +21,7 @@ import {
AITransferEventRequest,
ConnectMeetingRequest,
DisconnectMeetingRequest,
IdentityResponse,
OnboardingEvent,
OnboardingEventRequest,
OpenChatInSidebarData,
@ -282,17 +283,28 @@ export class AIControl {
await wsClient.loveDisconnect(request)
}
async processLoveTranscript (request: PostTranscriptRequest): Promise<void> {
const parsed = request.roomName.split('_')
if (parsed.length < 3) return
async getLoveIdentity (roomName: string): Promise<IdentityResponse | undefined> {
const parsed = roomName.split('_')
const workspace = parsed[0]
const roomId = parsed[parsed.length - 1]
if (workspace === null) return
const wsClient = await this.getWorkspaceClient(workspace)
if (wsClient === undefined) return
await wsClient.processLoveTranscript(request.transcript, request.participant, roomId as Ref<Room>)
return await wsClient.getLoveIdentity()
}
async processLoveTranscript (request: PostTranscriptRequest): Promise<void> {
const parsed = request.roomName.split('_')
const workspace = parsed[0]
const roomId = parsed[parsed.length - 1]
if (workspace === null || roomId === null) return
const wsClient = await this.getWorkspaceClient(workspace)
if (wsClient === undefined) return
await wsClient.processLoveTranscript(request.transcript, request.participant, roomId as Ref<Room>, request.final)
}
}

View File

@ -23,7 +23,8 @@ import {
AIEventRequest,
ConnectMeetingRequest,
DisconnectMeetingRequest,
PostTranscriptRequest
PostTranscriptRequest,
aiBotAccountEmail
} from '@hcengineering/ai-bot'
import { extractToken } from '@hcengineering/server-client'
@ -96,13 +97,18 @@ export function createServer (controller: AIControl): Express {
await controller.processEvent(token.workspace.name, events as AIEventRequest[])
})
)
app.post(
'/love/transcript',
wrapRequest(async (req, res) => {
wrapRequest(async (req, res, token) => {
if (req.body == null || Array.isArray(req.body) || typeof req.body !== 'object') {
throw new ApiError(400)
}
if (token.email !== aiBotAccountEmail) {
throw new ApiError(401)
}
await controller.processLoveTranscript(req.body as PostTranscriptRequest)
res.status(200)
@ -140,6 +146,25 @@ export function createServer (controller: AIControl): Express {
})
)
app.get(
'/love/:roomName/identity',
wrapRequest(async (req, res, token) => {
if (token.email !== aiBotAccountEmail) {
throw new ApiError(401)
}
const roomName = req.params.roomName
const resp = await controller.getLoveIdentity(roomName)
if (resp === undefined) {
throw new ApiError(404)
}
res.status(200)
res.json(resp)
})
)
app.post(
'/onboarding',
wrapRequest(async (req, res) => {

View File

@ -9,7 +9,8 @@ import core, {
TxCreateDoc,
TxUpdateDoc,
MeasureContext,
Markup
Markup,
generateId
} from '@hcengineering/core'
import { Person } from '@hcengineering/contact'
import love, {
@ -21,11 +22,27 @@ import love, {
TranscriptionStatus
} from '@hcengineering/love'
import { ConnectMeetingRequest } from '@hcengineering/ai-bot'
import chunter from '@hcengineering/chunter'
import chunter, { ChatMessage } from '@hcengineering/chunter'
import { jsonToMarkup, MarkupNodeType } from '@hcengineering/text'
import config from '../config'
class Transcriptions {
private readonly transcriptionByPerson = new Map<Ref<Person>, { _id: Ref<ChatMessage>, text: string }>()
get (person: Ref<Person>): { _id: Ref<ChatMessage>, text: string } | undefined {
return this.transcriptionByPerson.get(person)
}
set (person: Ref<Person>, value: { _id: Ref<ChatMessage>, text: string }): void {
this.transcriptionByPerson.set(person, value)
}
delete (person: Ref<Person>): void {
this.transcriptionByPerson.delete(person)
}
}
export class LoveController {
private readonly roomSidById = new Map<Ref<Room>, string>()
private readonly connectedRooms = new Set<Ref<Room>>()
@ -33,6 +50,7 @@ export class LoveController {
private participantsInfo: ParticipantInfo[] = []
private rooms: Room[] = []
private readonly meetingMinutes: MeetingMinutes[] = []
private readonly activeTranscriptions = new Map<Ref<Room>, Transcriptions>()
constructor (
private readonly workspace: string,
@ -47,6 +65,13 @@ export class LoveController {
}, 5000)
}
getIdentity (): { identity: Ref<Person>, name: string } {
return {
identity: this.currentPerson._id,
name: this.currentPerson.name
}
}
txHandler (txes: Tx[]): void {
const hierarchy = this.client.getHierarchy()
for (const tx of txes) {
@ -141,6 +166,8 @@ export class LoveController {
async disconnect (roomId: Ref<Room>): Promise<void> {
this.ctx.info('Disconnecting', { roomId })
this.activeTranscriptions.delete(roomId)
const participant = await this.getRoomParticipant(roomId, this.currentPerson._id)
if (participant !== undefined) {
await this.client.remove(participant)
@ -156,7 +183,7 @@ export class LoveController {
this.connectedRooms.delete(roomId)
}
async processTranscript (text: string, person: Ref<Person>, roomId: Ref<Room>): Promise<void> {
async processTranscript (text: string, person: Ref<Person>, roomId: Ref<Room>, final: boolean): Promise<void> {
const room = await this.getRoom(roomId)
const participant = await this.getRoomParticipant(roomId, person)
@ -168,19 +195,40 @@ export class LoveController {
const personAccount = this.client.getModel().getAccountByPersonId(participant.person)[0]
if (doc === undefined) return
await this.client.addCollection(
chunter.class.ChatMessage,
core.space.Workspace,
doc._id,
doc._class,
'transcription',
{
message: this.transcriptToMarkup(text)
},
undefined,
undefined,
personAccount._id
)
const transcriptions = this.activeTranscriptions.get(roomId) ?? new Transcriptions()
const activeTranscription = transcriptions.get(participant.person)
if (activeTranscription === undefined) {
const _id = generateId<ChatMessage>()
if (!final) {
transcriptions.set(participant.person, { _id, text })
this.activeTranscriptions.set(roomId, transcriptions)
}
await this.client.addCollection(
chunter.class.ChatMessage,
core.space.Workspace,
doc._id,
doc._class,
'transcription',
{
message: this.transcriptToMarkup(text)
},
_id,
undefined,
personAccount._id
)
} else {
const mergedText = activeTranscription.text + ' ' + text
if (!final) {
transcriptions.set(participant.person, { _id: activeTranscription._id, text: mergedText })
} else {
transcriptions.delete(participant.person)
}
await this.client.updateDoc(chunter.class.ChatMessage, core.space.Workspace, activeTranscription._id, {
message: this.transcriptToMarkup(mergedText)
})
}
}
hasActiveConnections (): boolean {

View File

@ -18,7 +18,8 @@ import aiBot, {
AIMessageEventRequest,
AITransferEventRequest,
ConnectMeetingRequest,
DisconnectMeetingRequest
DisconnectMeetingRequest,
IdentityResponse
} from '@hcengineering/ai-bot'
import chunter, {
ChatMessage,
@ -710,7 +711,7 @@ export class WorkspaceClient {
await this.love.disconnect(request.roomId)
}
async processLoveTranscript (text: string, participant: Ref<Person>, room: Ref<Room>): Promise<void> {
async processLoveTranscript (text: string, participant: Ref<Person>, room: Ref<Room>, final: boolean): Promise<void> {
// Just wait initialization
await this.opClient
@ -719,7 +720,16 @@ export class WorkspaceClient {
return
}
await this.love.processTranscript(text, participant, room)
await this.love.processTranscript(text, participant, room, final)
}
async getLoveIdentity (): Promise<IdentityResponse | undefined> {
// Just wait initialization
await this.opClient
if (this.love === undefined) return
return this.love.getIdentity()
}
canClose (): boolean {