mirror of
https://github.com/hcengineering/platform.git
synced 2025-05-31 20:57:31 +00:00
PDF importer (#7610)
Signed-off-by: Denis Bykhov <bykhov.denis@gmail.com>
This commit is contained in:
parent
52e44bfb9f
commit
d63db0fca6
3
.vscode/launch.json
vendored
3
.vscode/launch.json
vendored
@ -584,7 +584,8 @@
|
|||||||
"PASSWORD": "password",
|
"PASSWORD": "password",
|
||||||
"AVATAR_PATH": "./assets/avatar.png",
|
"AVATAR_PATH": "./assets/avatar.png",
|
||||||
"AVATAR_CONTENT_TYPE": ".png",
|
"AVATAR_CONTENT_TYPE": ".png",
|
||||||
"LOVE_ENDPOINT": "http://localhost:8096"
|
"STORAGE_CONFIG": "minio|localhost?accessKey=minioadmin&secretKey=minioadmin",
|
||||||
|
"LOVE_ENDPOINT": "http://localhost:8096",
|
||||||
},
|
},
|
||||||
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
|
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
|
||||||
"sourceMaps": true,
|
"sourceMaps": true,
|
||||||
|
@ -27298,7 +27298,7 @@ packages:
|
|||||||
dev: false
|
dev: false
|
||||||
|
|
||||||
file:projects/pod-ai-bot.tgz(bufferutil@4.0.8)(utf-8-validate@6.0.4)(zod@3.23.8):
|
file:projects/pod-ai-bot.tgz(bufferutil@4.0.8)(utf-8-validate@6.0.4)(zod@3.23.8):
|
||||||
resolution: {integrity: sha512-zCUFPyseaS/JuRQ5JMtR1n8ybdRgxQzRn2aP528X1x+jOBVEH2L7VjDGBx1jsJGuUKvCJBCoOAQlCHDgfRambQ==, tarball: file:projects/pod-ai-bot.tgz}
|
resolution: {integrity: sha512-OPo+KhRKsPQhO1eqOIgL30ef9s/TAnEf21bAwzxLLcB70AIwMyjfPRzzJHDOIbVjCzA8t4YFQ5MWWoLUveylFg==, tarball: file:projects/pod-ai-bot.tgz}
|
||||||
id: file:projects/pod-ai-bot.tgz
|
id: file:projects/pod-ai-bot.tgz
|
||||||
name: '@rush-temp/pod-ai-bot'
|
name: '@rush-temp/pod-ai-bot'
|
||||||
version: 0.0.0
|
version: 0.0.0
|
||||||
@ -27309,6 +27309,7 @@ packages:
|
|||||||
'@types/jest': 29.5.12
|
'@types/jest': 29.5.12
|
||||||
'@types/node': 20.11.19
|
'@types/node': 20.11.19
|
||||||
'@types/node-fetch': 2.6.11
|
'@types/node-fetch': 2.6.11
|
||||||
|
'@types/uuid': 8.3.4
|
||||||
'@types/ws': 8.5.11
|
'@types/ws': 8.5.11
|
||||||
'@typescript-eslint/eslint-plugin': 6.21.0(@typescript-eslint/parser@6.21.0)(eslint@8.56.0)(typescript@5.3.3)
|
'@typescript-eslint/eslint-plugin': 6.21.0(@typescript-eslint/parser@6.21.0)(eslint@8.56.0)(typescript@5.3.3)
|
||||||
'@typescript-eslint/parser': 6.21.0(eslint@8.56.0)(typescript@5.3.3)
|
'@typescript-eslint/parser': 6.21.0(eslint@8.56.0)(typescript@5.3.3)
|
||||||
@ -27333,6 +27334,7 @@ packages:
|
|||||||
ts-jest: 29.1.2(esbuild@0.24.2)(jest@29.7.0)(typescript@5.3.3)
|
ts-jest: 29.1.2(esbuild@0.24.2)(jest@29.7.0)(typescript@5.3.3)
|
||||||
ts-node: 10.9.2(@types/node@20.11.19)(typescript@5.3.3)
|
ts-node: 10.9.2(@types/node@20.11.19)(typescript@5.3.3)
|
||||||
typescript: 5.3.3
|
typescript: 5.3.3
|
||||||
|
uuid: 8.3.2
|
||||||
ws: 8.18.0(bufferutil@4.0.8)(utf-8-validate@6.0.4)
|
ws: 8.18.0(bufferutil@4.0.8)(utf-8-validate@6.0.4)
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- '@aws-sdk/credential-providers'
|
- '@aws-sdk/credential-providers'
|
||||||
|
@ -46,6 +46,7 @@
|
|||||||
"eslint-plugin-n": "^15.4.0",
|
"eslint-plugin-n": "^15.4.0",
|
||||||
"eslint-plugin-node": "^11.1.0",
|
"eslint-plugin-node": "^11.1.0",
|
||||||
"eslint-plugin-promise": "^6.1.1",
|
"eslint-plugin-promise": "^6.1.1",
|
||||||
|
"@types/uuid": "^8.3.1",
|
||||||
"jest": "^29.7.0",
|
"jest": "^29.7.0",
|
||||||
"prettier": "^3.1.0",
|
"prettier": "^3.1.0",
|
||||||
"ts-jest": "^29.1.1",
|
"ts-jest": "^29.1.1",
|
||||||
@ -56,6 +57,8 @@
|
|||||||
"@hcengineering/account": "^0.6.0",
|
"@hcengineering/account": "^0.6.0",
|
||||||
"@hcengineering/ai-bot": "^0.6.0",
|
"@hcengineering/ai-bot": "^0.6.0",
|
||||||
"@hcengineering/analytics-collector": "^0.6.0",
|
"@hcengineering/analytics-collector": "^0.6.0",
|
||||||
|
"@hcengineering/document": "^0.6.0",
|
||||||
|
"@hcengineering/attachment": "^0.6.14",
|
||||||
"@hcengineering/chunter": "^0.6.20",
|
"@hcengineering/chunter": "^0.6.20",
|
||||||
"@hcengineering/client": "^0.6.18",
|
"@hcengineering/client": "^0.6.18",
|
||||||
"@hcengineering/client-resources": "^0.6.27",
|
"@hcengineering/client-resources": "^0.6.27",
|
||||||
@ -72,6 +75,8 @@
|
|||||||
"@hcengineering/server-token": "^0.6.11",
|
"@hcengineering/server-token": "^0.6.11",
|
||||||
"@hcengineering/setting": "^0.6.17",
|
"@hcengineering/setting": "^0.6.17",
|
||||||
"@hcengineering/text": "^0.6.5",
|
"@hcengineering/text": "^0.6.5",
|
||||||
|
"@hcengineering/rank": "^0.6.4",
|
||||||
|
"@hcengineering/server-storage": "^0.6.0",
|
||||||
"@hcengineering/workbench": "^0.6.16",
|
"@hcengineering/workbench": "^0.6.16",
|
||||||
"@hcengineering/love": "^0.6.0",
|
"@hcengineering/love": "^0.6.0",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
@ -80,6 +85,7 @@
|
|||||||
"fast-equals": "^5.0.1",
|
"fast-equals": "^5.0.1",
|
||||||
"form-data": "^4.0.0",
|
"form-data": "^4.0.0",
|
||||||
"js-tiktoken": "^1.0.14",
|
"js-tiktoken": "^1.0.14",
|
||||||
|
"uuid": "^8.3.2",
|
||||||
"mongodb": "^6.12.0",
|
"mongodb": "^6.12.0",
|
||||||
"openai": "^4.56.0",
|
"openai": "^4.56.0",
|
||||||
"ws": "^8.18.0"
|
"ws": "^8.18.0"
|
||||||
|
@ -36,6 +36,7 @@ interface Config {
|
|||||||
MaxHistoryRecords: number
|
MaxHistoryRecords: number
|
||||||
Port: number
|
Port: number
|
||||||
LoveEndpoint: string
|
LoveEndpoint: string
|
||||||
|
DataLabApiKey: string
|
||||||
}
|
}
|
||||||
|
|
||||||
const parseNumber = (str: string | undefined): number | undefined => (str !== undefined ? Number(str) : undefined)
|
const parseNumber = (str: string | undefined): number | undefined => (str !== undefined ? Number(str) : undefined)
|
||||||
@ -61,7 +62,8 @@ const config: Config = (() => {
|
|||||||
MaxContentTokens: parseNumber(process.env.MAX_CONTENT_TOKENS) ?? 128 * 100,
|
MaxContentTokens: parseNumber(process.env.MAX_CONTENT_TOKENS) ?? 128 * 100,
|
||||||
MaxHistoryRecords: parseNumber(process.env.MAX_HISTORY_RECORDS) ?? 500,
|
MaxHistoryRecords: parseNumber(process.env.MAX_HISTORY_RECORDS) ?? 500,
|
||||||
Port: parseNumber(process.env.PORT) ?? 4010,
|
Port: parseNumber(process.env.PORT) ?? 4010,
|
||||||
LoveEndpoint: process.env.LOVE_ENDPOINT ?? ''
|
LoveEndpoint: process.env.LOVE_ENDPOINT ?? '',
|
||||||
|
DataLabApiKey: process.env.DATALAB_API_KEY ?? ''
|
||||||
}
|
}
|
||||||
|
|
||||||
const missingEnv = (Object.keys(params) as Array<keyof Config>).filter((key) => params[key] === undefined)
|
const missingEnv = (Object.keys(params) as Array<keyof Config>).filter((key) => params[key] === undefined)
|
||||||
|
@ -29,22 +29,24 @@ import {
|
|||||||
TranslateRequest,
|
TranslateRequest,
|
||||||
TranslateResponse
|
TranslateResponse
|
||||||
} from '@hcengineering/ai-bot'
|
} from '@hcengineering/ai-bot'
|
||||||
|
import { Markup, MeasureContext, Ref, WorkspaceId } from '@hcengineering/core'
|
||||||
|
import { Room } from '@hcengineering/love'
|
||||||
import { WorkspaceInfoRecord } from '@hcengineering/server-ai-bot'
|
import { WorkspaceInfoRecord } from '@hcengineering/server-ai-bot'
|
||||||
import { getTransactorEndpoint } from '@hcengineering/server-client'
|
import { getTransactorEndpoint } from '@hcengineering/server-client'
|
||||||
import { generateToken } from '@hcengineering/server-token'
|
import { generateToken } from '@hcengineering/server-token'
|
||||||
import OpenAI from 'openai'
|
|
||||||
import { encodingForModel } from 'js-tiktoken'
|
|
||||||
import { htmlToMarkup, markupToHTML } from '@hcengineering/text'
|
import { htmlToMarkup, markupToHTML } from '@hcengineering/text'
|
||||||
import { Markup, MeasureContext, Ref, WorkspaceId } from '@hcengineering/core'
|
import { encodingForModel } from 'js-tiktoken'
|
||||||
import { Room } from '@hcengineering/love'
|
import OpenAI from 'openai'
|
||||||
|
|
||||||
import { WorkspaceClient } from './workspace/workspaceClient'
|
import { StorageAdapter } from '@hcengineering/server-core'
|
||||||
|
import { buildStorageFromConfig, storageConfigFromEnv } from '@hcengineering/server-storage'
|
||||||
import config from './config'
|
import config from './config'
|
||||||
import { DbStorage } from './storage'
|
import { DbStorage } from './storage'
|
||||||
import { SupportWsClient } from './workspace/supportWsClient'
|
|
||||||
import { AIReplyTransferData } from './types'
|
import { AIReplyTransferData } from './types'
|
||||||
import { tryAssignToWorkspace } from './utils/account'
|
import { tryAssignToWorkspace } from './utils/account'
|
||||||
import { translateHtml } from './utils/openai'
|
import { translateHtml } from './utils/openai'
|
||||||
|
import { SupportWsClient } from './workspace/supportWsClient'
|
||||||
|
import { WorkspaceClient } from './workspace/workspaceClient'
|
||||||
|
|
||||||
const CLOSE_INTERVAL_MS = 10 * 60 * 1000 // 10 minutes
|
const CLOSE_INTERVAL_MS = 10 * 60 * 1000 // 10 minutes
|
||||||
|
|
||||||
@ -54,6 +56,7 @@ export class AIControl {
|
|||||||
private readonly connectingWorkspaces = new Map<string, Promise<void>>()
|
private readonly connectingWorkspaces = new Map<string, Promise<void>>()
|
||||||
|
|
||||||
readonly aiClient?: OpenAI
|
readonly aiClient?: OpenAI
|
||||||
|
readonly storageAdapter: StorageAdapter
|
||||||
readonly encoding = encodingForModel(config.OpenAIModel)
|
readonly encoding = encodingForModel(config.OpenAIModel)
|
||||||
|
|
||||||
supportClient: SupportWsClient | undefined = undefined
|
supportClient: SupportWsClient | undefined = undefined
|
||||||
@ -70,6 +73,7 @@ export class AIControl {
|
|||||||
})
|
})
|
||||||
: undefined
|
: undefined
|
||||||
void this.connectSupportWorkspace()
|
void this.connectSupportWorkspace()
|
||||||
|
this.storageAdapter = buildStorageFromConfig(storageConfigFromEnv())
|
||||||
}
|
}
|
||||||
|
|
||||||
async getWorkspaceRecord (workspace: string): Promise<WorkspaceInfoRecord> {
|
async getWorkspaceRecord (workspace: string): Promise<WorkspaceInfoRecord> {
|
||||||
@ -125,10 +129,26 @@ export class AIControl {
|
|||||||
this.ctx.info('Listen workspace: ', { workspace })
|
this.ctx.info('Listen workspace: ', { workspace })
|
||||||
|
|
||||||
if (workspace === config.SupportWorkspace) {
|
if (workspace === config.SupportWorkspace) {
|
||||||
return new SupportWsClient(endpoint, token, workspace, this, this.ctx.newChild(workspace, {}), info)
|
return new SupportWsClient(
|
||||||
|
this.storageAdapter,
|
||||||
|
endpoint,
|
||||||
|
token,
|
||||||
|
workspace,
|
||||||
|
this,
|
||||||
|
this.ctx.newChild(workspace, {}),
|
||||||
|
info
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
return new WorkspaceClient(endpoint, token, workspace, this, this.ctx.newChild(workspace, {}), info)
|
return new WorkspaceClient(
|
||||||
|
this.storageAdapter,
|
||||||
|
endpoint,
|
||||||
|
token,
|
||||||
|
workspace,
|
||||||
|
this,
|
||||||
|
this.ctx.newChild(workspace, {}),
|
||||||
|
info
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
async initWorkspaceClient (workspace: string): Promise<void> {
|
async initWorkspaceClient (workspace: string): Promise<void> {
|
||||||
|
@ -13,12 +13,13 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
//
|
//
|
||||||
|
|
||||||
import OpenAI from 'openai'
|
|
||||||
import { countTokens } from '@hcengineering/openai'
|
import { countTokens } from '@hcengineering/openai'
|
||||||
import { Tiktoken } from 'js-tiktoken'
|
import { Tiktoken } from 'js-tiktoken'
|
||||||
|
import OpenAI from 'openai'
|
||||||
import config from '../config'
|
import config from '../config'
|
||||||
import { HistoryRecord } from '../types'
|
import { HistoryRecord } from '../types'
|
||||||
|
import { WorkspaceClient } from '../workspace/workspaceClient'
|
||||||
|
import { getTools } from './tools'
|
||||||
|
|
||||||
export async function translateHtml (client: OpenAI, html: string, lang: string): Promise<string | undefined> {
|
export async function translateHtml (client: OpenAI, html: string, lang: string): Promise<string | undefined> {
|
||||||
const response = await client.chat.completions.create({
|
const response = await client.chat.completions.create({
|
||||||
@ -66,6 +67,58 @@ export async function createChatCompletion (
|
|||||||
return undefined
|
return undefined
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function createChatCompletionWithTools (
|
||||||
|
workspaceClient: WorkspaceClient,
|
||||||
|
client: OpenAI,
|
||||||
|
message: OpenAI.ChatCompletionMessageParam,
|
||||||
|
user?: string,
|
||||||
|
history: OpenAI.ChatCompletionMessageParam[] = [],
|
||||||
|
skipCache = true
|
||||||
|
): Promise<
|
||||||
|
| {
|
||||||
|
completion: string | undefined
|
||||||
|
usage: number
|
||||||
|
}
|
||||||
|
| undefined
|
||||||
|
> {
|
||||||
|
const opt: OpenAI.RequestOptions = {}
|
||||||
|
if (skipCache) {
|
||||||
|
opt.headers = { 'cf-skip-cache': 'true' }
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const res = client.beta.chat.completions
|
||||||
|
.runTools(
|
||||||
|
{
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: 'system',
|
||||||
|
content: 'Use tools if possible, don`t use previous information after success using tool for user request'
|
||||||
|
},
|
||||||
|
...history,
|
||||||
|
message
|
||||||
|
],
|
||||||
|
model: config.OpenAIModel,
|
||||||
|
user,
|
||||||
|
tools: getTools(workspaceClient, user)
|
||||||
|
},
|
||||||
|
opt
|
||||||
|
)
|
||||||
|
.on('message', (message) => {
|
||||||
|
console.log(message)
|
||||||
|
})
|
||||||
|
const str = await res.finalContent()
|
||||||
|
const usage = (await res.totalUsage()).completion_tokens
|
||||||
|
return {
|
||||||
|
completion: str ?? undefined,
|
||||||
|
usage
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
export async function requestSummary (
|
export async function requestSummary (
|
||||||
aiClient: OpenAI,
|
aiClient: OpenAI,
|
||||||
encoding: Tiktoken,
|
encoding: Tiktoken,
|
||||||
|
243
services/ai-bot/pod-ai-bot/src/utils/tools.ts
Normal file
243
services/ai-bot/pod-ai-bot/src/utils/tools.ts
Normal file
@ -0,0 +1,243 @@
|
|||||||
|
import { Account, MarkupBlobRef, Ref } from '@hcengineering/core'
|
||||||
|
import document, { Document, getFirstRank, Teamspace } from '@hcengineering/document'
|
||||||
|
import { makeRank } from '@hcengineering/rank'
|
||||||
|
import { parseMessageMarkdown } from '@hcengineering/text'
|
||||||
|
import {
|
||||||
|
BaseFunctionsArgs,
|
||||||
|
RunnableFunctionWithoutParse,
|
||||||
|
RunnableFunctionWithParse,
|
||||||
|
RunnableToolFunction,
|
||||||
|
RunnableToolFunctionWithoutParse,
|
||||||
|
RunnableToolFunctionWithParse,
|
||||||
|
RunnableTools
|
||||||
|
} from 'openai/lib/RunnableFunction'
|
||||||
|
import { Stream } from 'stream'
|
||||||
|
import { v4 as uuid } from 'uuid'
|
||||||
|
import config from '../config'
|
||||||
|
import { WorkspaceClient } from '../workspace/workspaceClient'
|
||||||
|
|
||||||
|
async function stream2buffer (stream: Stream): Promise<Buffer> {
|
||||||
|
return await new Promise<Buffer>((resolve, reject) => {
|
||||||
|
const _buf = Array<any>()
|
||||||
|
stream.on('data', (chunk) => {
|
||||||
|
_buf.push(chunk)
|
||||||
|
})
|
||||||
|
stream.on('end', () => {
|
||||||
|
resolve(Buffer.concat(_buf))
|
||||||
|
})
|
||||||
|
stream.on('error', (err) => {
|
||||||
|
reject(new Error(`error converting stream - ${err}`))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async function pdfToMarkdown (
|
||||||
|
workspaceClient: WorkspaceClient,
|
||||||
|
fileId: string,
|
||||||
|
name: string | undefined
|
||||||
|
): Promise<string | undefined> {
|
||||||
|
if (config.DataLabApiKey !== '') {
|
||||||
|
try {
|
||||||
|
const stat = await workspaceClient.storage.stat(workspaceClient.ctx, { name: workspaceClient.workspace }, fileId)
|
||||||
|
if (stat?.contentType !== 'application/pdf') {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const file = await workspaceClient.storage.get(workspaceClient.ctx, { name: workspaceClient.workspace }, fileId)
|
||||||
|
const buffer = await stream2buffer(file)
|
||||||
|
|
||||||
|
const url = 'https://www.datalab.to/api/v1/marker'
|
||||||
|
const formData = new FormData()
|
||||||
|
formData.append('file', new Blob([buffer], { type: 'application/pdf' }), name ?? 'test.pdf')
|
||||||
|
formData.append('force_ocr', 'false')
|
||||||
|
formData.append('paginate', 'false')
|
||||||
|
formData.append('output_format', 'markdown')
|
||||||
|
formData.append('use_llm', 'false')
|
||||||
|
formData.append('strip_existing_ocr', 'false')
|
||||||
|
formData.append('disable_image_extraction', 'false')
|
||||||
|
|
||||||
|
const headers = { 'X-Api-Key': config.DataLabApiKey }
|
||||||
|
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'POST',
|
||||||
|
body: formData,
|
||||||
|
headers
|
||||||
|
})
|
||||||
|
|
||||||
|
const data = await response.json()
|
||||||
|
console.log('data', data)
|
||||||
|
if (data.request_check_url !== undefined) {
|
||||||
|
for (let attempt = 0; attempt < 10; attempt++) {
|
||||||
|
const resp = await fetch(data.request_check_url, { headers })
|
||||||
|
const result = await resp.json()
|
||||||
|
if (result.status === 'complete' && result.markdown !== undefined) {
|
||||||
|
return result.markdown
|
||||||
|
}
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 2000))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveFile (
|
||||||
|
workspaceClient: WorkspaceClient,
|
||||||
|
user: string | undefined,
|
||||||
|
args: { fileId: string, folder: string | undefined, parent: string | undefined, name: string }
|
||||||
|
): Promise<string> {
|
||||||
|
console.log('Save file', args)
|
||||||
|
const content = await pdfToMarkdown(workspaceClient, args.fileId, args.name)
|
||||||
|
if (content === undefined) {
|
||||||
|
return 'Error while converting pdf to markdown'
|
||||||
|
}
|
||||||
|
const converted = JSON.stringify(parseMessageMarkdown(content, 'image://'))
|
||||||
|
|
||||||
|
const client = await workspaceClient.opClient
|
||||||
|
const fileId = uuid()
|
||||||
|
await workspaceClient.storage.put(
|
||||||
|
workspaceClient.ctx,
|
||||||
|
{ name: workspaceClient.workspace },
|
||||||
|
fileId,
|
||||||
|
converted,
|
||||||
|
'application/json'
|
||||||
|
)
|
||||||
|
|
||||||
|
const teamspaces = await client.findAll(document.class.Teamspace, {})
|
||||||
|
const parent = await client.findOne(document.class.Document, { _id: args.parent as Ref<Document> })
|
||||||
|
const teamspaceId = getTeamspace(args.folder, parent, teamspaces)
|
||||||
|
const parentId = parent?._id ?? document.ids.NoParent
|
||||||
|
const lastRank = await getFirstRank(client, teamspaceId, parentId)
|
||||||
|
const rank = makeRank(lastRank, undefined)
|
||||||
|
const _id = await client.createDoc(document.class.Document, teamspaceId, {
|
||||||
|
title: args.name,
|
||||||
|
parent: parentId,
|
||||||
|
content: fileId as MarkupBlobRef,
|
||||||
|
rank
|
||||||
|
})
|
||||||
|
|
||||||
|
return `File saved as ${args.name} with id ${_id}, always provide mention link as: [](ref://?_class=document%3Aclass%3ADocument&_id=${_id}&label=${args.name})`
|
||||||
|
}
|
||||||
|
|
||||||
|
function getTeamspace (
|
||||||
|
folder: string | undefined,
|
||||||
|
parent: Document | undefined,
|
||||||
|
teamspaces: Teamspace[]
|
||||||
|
): Ref<Teamspace> {
|
||||||
|
if (parent !== undefined) return parent.space
|
||||||
|
if (folder !== undefined) {
|
||||||
|
const teamspace = teamspaces.find(
|
||||||
|
(p) => p.name.trim().toLowerCase() === folder.trim().toLowerCase() || p._id === folder
|
||||||
|
)
|
||||||
|
if (teamspace !== undefined) return teamspace._id
|
||||||
|
}
|
||||||
|
return teamspaces[0]._id
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getFoldersForDocuments (
|
||||||
|
workspaceClient: WorkspaceClient,
|
||||||
|
user: string | undefined,
|
||||||
|
args: Record<string, any>
|
||||||
|
): Promise<string> {
|
||||||
|
const client = await workspaceClient.opClient
|
||||||
|
const spaces = await client.findAll(
|
||||||
|
document.class.Teamspace,
|
||||||
|
user !== undefined ? { members: user as Ref<Account>, archived: false } : { archived: false }
|
||||||
|
)
|
||||||
|
let res = 'Folders:\n'
|
||||||
|
for (const space of spaces) {
|
||||||
|
res += `Id: ${space._id} Name: ${space.name}\n`
|
||||||
|
}
|
||||||
|
res += 'Parents:\n'
|
||||||
|
const parents = await client.findAll(document.class.Document, { space: { $in: spaces.map((p) => p._id) } })
|
||||||
|
for (const parent of parents) {
|
||||||
|
res += `Id: ${parent._id} Name: ${parent.title}\n`
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
type ChangeFields<T, R> = Omit<T, keyof R> & R
|
||||||
|
type PredefinedTool<T extends object | string> = ChangeFields<
|
||||||
|
RunnableToolFunction<T>,
|
||||||
|
{
|
||||||
|
function: PredefinedToolFunction<T>
|
||||||
|
}
|
||||||
|
>
|
||||||
|
type PredefinedToolFunction<T extends object | string> = Omit<
|
||||||
|
T extends string ? RunnableFunctionWithoutParse : RunnableFunctionWithParse<any>,
|
||||||
|
'function'
|
||||||
|
>
|
||||||
|
type ToolFunc = (workspaceClient: WorkspaceClient, user: string | undefined, args: any) => Promise<string> | string
|
||||||
|
|
||||||
|
const tools: [PredefinedTool<any>, ToolFunc][] = []
|
||||||
|
|
||||||
|
export function registerTool<T extends object | string> (tool: PredefinedTool<T>, func: ToolFunc): void {
|
||||||
|
tools.push([tool, func])
|
||||||
|
}
|
||||||
|
|
||||||
|
registerTool(
|
||||||
|
{
|
||||||
|
type: 'function',
|
||||||
|
function: {
|
||||||
|
name: 'getDataBeforeImport',
|
||||||
|
parameters: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {}
|
||||||
|
},
|
||||||
|
description:
|
||||||
|
'Get folders and parents for documents. This step necessery before saveFile tool. YOU MUST USE IT BEFORE import file.'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
getFoldersForDocuments
|
||||||
|
)
|
||||||
|
|
||||||
|
registerTool<object>(
|
||||||
|
{
|
||||||
|
type: 'function',
|
||||||
|
function: {
|
||||||
|
name: 'saveFile',
|
||||||
|
parse: JSON.parse,
|
||||||
|
parameters: {
|
||||||
|
type: 'object',
|
||||||
|
required: ['fileId, folder, name'],
|
||||||
|
properties: {
|
||||||
|
fileId: { type: 'string', description: 'File id to parse' },
|
||||||
|
folder: {
|
||||||
|
type: 'string',
|
||||||
|
default: '',
|
||||||
|
description:
|
||||||
|
'Folder, id from getDataBeforeImport. If not provided you can guess by file name and folder name, or by another file names, if you can`t, just ask user. Don`t provide empty, this field is required. If no folders at all, you should stop pipeline execution and ask user to create teamspace'
|
||||||
|
},
|
||||||
|
parent: {
|
||||||
|
type: 'string',
|
||||||
|
default: '',
|
||||||
|
description:
|
||||||
|
'Parent document, use id from getDataBeforeImport, leave empty string if not provided, it is not necessery, please feel free to pass empty string'
|
||||||
|
},
|
||||||
|
name: {
|
||||||
|
type: 'string',
|
||||||
|
description: 'Name for file, try to recognize from user input, if not provided use attached file name'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
description:
|
||||||
|
'Parse pdf to markdown and save it, using for import files. Use only if provide file in current message and user require to import/save, if file not provided ask user to attach it. You MUST call getDataBeforeImport tool before for get ids. Use file name as name if user not provide it, don`t use old parameters. You can ask user about folder if you have not enough data to get folder id'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
saveFile
|
||||||
|
)
|
||||||
|
|
||||||
|
export function getTools (workspaceClient: WorkspaceClient, user: string | undefined): RunnableTools<BaseFunctionsArgs> {
|
||||||
|
const result: (RunnableToolFunctionWithoutParse | RunnableToolFunctionWithParse<any>)[] = []
|
||||||
|
for (const tool of tools) {
|
||||||
|
const res: RunnableToolFunctionWithoutParse | RunnableToolFunctionWithParse<any> = {
|
||||||
|
...tool[0],
|
||||||
|
function: {
|
||||||
|
...tool[0].function,
|
||||||
|
function: (args: any) => tool[1](workspaceClient, user, args)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.push(res)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
@ -22,6 +22,7 @@ import aiBot, {
|
|||||||
IdentityResponse
|
IdentityResponse
|
||||||
} from '@hcengineering/ai-bot'
|
} from '@hcengineering/ai-bot'
|
||||||
import analyticsCollector, { OnboardingChannel } from '@hcengineering/analytics-collector'
|
import analyticsCollector, { OnboardingChannel } from '@hcengineering/analytics-collector'
|
||||||
|
import attachment, { Attachment } from '@hcengineering/attachment'
|
||||||
import chunter, {
|
import chunter, {
|
||||||
ChatMessage,
|
ChatMessage,
|
||||||
type ChatWidgetTab,
|
type ChatWidgetTab,
|
||||||
@ -59,7 +60,7 @@ import { Room } from '@hcengineering/love'
|
|||||||
import { countTokens } from '@hcengineering/openai'
|
import { countTokens } from '@hcengineering/openai'
|
||||||
import { WorkspaceInfoRecord } from '@hcengineering/server-ai-bot'
|
import { WorkspaceInfoRecord } from '@hcengineering/server-ai-bot'
|
||||||
import { getOrCreateOnboardingChannel } from '@hcengineering/server-analytics-collector-resources'
|
import { getOrCreateOnboardingChannel } from '@hcengineering/server-analytics-collector-resources'
|
||||||
import { BlobClient, login } from '@hcengineering/server-client'
|
import { login } from '@hcengineering/server-client'
|
||||||
import { generateToken } from '@hcengineering/server-token'
|
import { generateToken } from '@hcengineering/server-token'
|
||||||
import { jsonToMarkup, MarkdownParser, markupToText } from '@hcengineering/text'
|
import { jsonToMarkup, MarkdownParser, markupToText } from '@hcengineering/text'
|
||||||
import workbench, { SidebarEvent, TxSidebarEvent } from '@hcengineering/workbench'
|
import workbench, { SidebarEvent, TxSidebarEvent } from '@hcengineering/workbench'
|
||||||
@ -67,10 +68,11 @@ import fs from 'fs'
|
|||||||
import { WithId } from 'mongodb'
|
import { WithId } from 'mongodb'
|
||||||
import OpenAI from 'openai'
|
import OpenAI from 'openai'
|
||||||
|
|
||||||
|
import { StorageAdapter } from '@hcengineering/server-core'
|
||||||
import config from '../config'
|
import config from '../config'
|
||||||
import { AIControl } from '../controller'
|
import { AIControl } from '../controller'
|
||||||
import { HistoryRecord } from '../types'
|
import { HistoryRecord } from '../types'
|
||||||
import { createChatCompletion, requestSummary } from '../utils/openai'
|
import { createChatCompletionWithTools, requestSummary } from '../utils/openai'
|
||||||
import { connectPlatform, getDirect } from '../utils/platform'
|
import { connectPlatform, getDirect } from '../utils/platform'
|
||||||
import { LoveController } from './love'
|
import { LoveController } from './love'
|
||||||
|
|
||||||
@ -81,8 +83,6 @@ export class WorkspaceClient {
|
|||||||
client: Client | undefined
|
client: Client | undefined
|
||||||
opClient: Promise<TxOperations> | TxOperations
|
opClient: Promise<TxOperations> | TxOperations
|
||||||
|
|
||||||
blobClient: BlobClient
|
|
||||||
|
|
||||||
loginTimeout: NodeJS.Timeout | undefined
|
loginTimeout: NodeJS.Timeout | undefined
|
||||||
loginDelayMs = 2 * 1000
|
loginDelayMs = 2 * 1000
|
||||||
|
|
||||||
@ -103,6 +103,7 @@ export class WorkspaceClient {
|
|||||||
love: LoveController | undefined
|
love: LoveController | undefined
|
||||||
|
|
||||||
constructor (
|
constructor (
|
||||||
|
readonly storage: StorageAdapter,
|
||||||
readonly transactorUrl: string,
|
readonly transactorUrl: string,
|
||||||
readonly token: string,
|
readonly token: string,
|
||||||
readonly workspace: string,
|
readonly workspace: string,
|
||||||
@ -110,7 +111,6 @@ export class WorkspaceClient {
|
|||||||
readonly ctx: MeasureContext,
|
readonly ctx: MeasureContext,
|
||||||
readonly info: WorkspaceInfoRecord | undefined
|
readonly info: WorkspaceInfoRecord | undefined
|
||||||
) {
|
) {
|
||||||
this.blobClient = new BlobClient(transactorUrl, token, { name: this.workspace })
|
|
||||||
this.opClient = this.initClient()
|
this.opClient = this.initClient()
|
||||||
void this.opClient.then((opClient) => {
|
void this.opClient.then((opClient) => {
|
||||||
this.opClient = opClient
|
this.opClient = opClient
|
||||||
@ -154,7 +154,14 @@ export class WorkspaceClient {
|
|||||||
if (!isAlreadyUploaded) {
|
if (!isAlreadyUploaded) {
|
||||||
const data = fs.readFileSync(config.AvatarPath)
|
const data = fs.readFileSync(config.AvatarPath)
|
||||||
|
|
||||||
await this.blobClient.upload(this.ctx, config.AvatarName, data.length, config.AvatarContentType, data)
|
await this.storage.put(
|
||||||
|
this.ctx,
|
||||||
|
{ name: this.workspace },
|
||||||
|
config.AvatarName,
|
||||||
|
data,
|
||||||
|
config.AvatarContentType,
|
||||||
|
data.length
|
||||||
|
)
|
||||||
await this.controller.updateAvatarInfo(this.workspace, config.AvatarPath, lastModified)
|
await this.controller.updateAvatarInfo(this.workspace, config.AvatarPath, lastModified)
|
||||||
this.ctx.info('Avatar file uploaded successfully', { workspace: this.workspace, path: config.AvatarPath })
|
this.ctx.info('Avatar file uploaded successfully', { workspace: this.workspace, path: config.AvatarPath })
|
||||||
}
|
}
|
||||||
@ -209,9 +216,9 @@ export class WorkspaceClient {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
const exist = await this.blobClient.checkFile(this.ctx, config.AvatarName)
|
const exist = await this.storage.stat(this.ctx, { name: this.workspace }, config.AvatarName)
|
||||||
|
|
||||||
if (!exist) {
|
if (exist === undefined) {
|
||||||
this.ctx.error('Cannot find file', { file: config.AvatarName, workspace: this.workspace })
|
this.ctx.error('Cannot find file', { file: config.AvatarName, workspace: this.workspace })
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -449,11 +456,23 @@ export class WorkspaceClient {
|
|||||||
this.historyMap.set(objectId, currentHistory)
|
this.historyMap.set(objectId, currentHistory)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async getAttachments (client: TxOperations, objectId: Ref<Doc>): Promise<Attachment[]> {
|
||||||
|
return await client.findAll(attachment.class.Attachment, { attachedTo: objectId })
|
||||||
|
}
|
||||||
|
|
||||||
async processMessageEvent (event: AIMessageEventRequest): Promise<void> {
|
async processMessageEvent (event: AIMessageEventRequest): Promise<void> {
|
||||||
if (this.controller.aiClient === undefined) return
|
if (this.controller.aiClient === undefined) return
|
||||||
|
|
||||||
const { user, objectId, objectClass, messageClass } = event
|
const { user, objectId, objectClass, messageClass } = event
|
||||||
const promptText = markupToText(event.message)
|
const client = await this.opClient
|
||||||
|
let promptText = markupToText(event.message)
|
||||||
|
const files = await this.getAttachments(client, event.messageId)
|
||||||
|
if (files.length > 0) {
|
||||||
|
promptText += '\n\nAttachments:'
|
||||||
|
for (const file of files) {
|
||||||
|
promptText += `\nName:${file.name} FileId:${file.file} Type:${file.type}`
|
||||||
|
}
|
||||||
|
}
|
||||||
const prompt: OpenAI.ChatCompletionMessageParam = { content: promptText, role: 'user' }
|
const prompt: OpenAI.ChatCompletionMessageParam = { content: promptText, role: 'user' }
|
||||||
const promptTokens = countTokens([prompt], this.controller.encoding)
|
const promptTokens = countTokens([prompt], this.controller.encoding)
|
||||||
|
|
||||||
@ -462,7 +481,6 @@ export class WorkspaceClient {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
const client = await this.opClient
|
|
||||||
const op = client.apply(undefined, 'AIMessageRequestEvent')
|
const op = client.apply(undefined, 'AIMessageRequestEvent')
|
||||||
const hierarchy = client.getHierarchy()
|
const hierarchy = client.getHierarchy()
|
||||||
|
|
||||||
@ -479,16 +497,15 @@ export class WorkspaceClient {
|
|||||||
|
|
||||||
void this.pushHistory(promptText, prompt.role, promptTokens, user, objectId, objectClass)
|
void this.pushHistory(promptText, prompt.role, promptTokens, user, objectId, objectClass)
|
||||||
|
|
||||||
const chatCompletion = await createChatCompletion(this.controller.aiClient, prompt, user, history)
|
const chatCompletion = await createChatCompletionWithTools(this, this.controller.aiClient, prompt, user, history)
|
||||||
const response = chatCompletion?.choices[0].message.content
|
const response = chatCompletion?.completion
|
||||||
|
|
||||||
if (response == null) {
|
if (response == null) {
|
||||||
await this.finishTyping(client, objectId)
|
await this.finishTyping(client, objectId)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
const responseTokens =
|
const responseTokens =
|
||||||
chatCompletion?.usage?.completion_tokens ??
|
chatCompletion?.usage ?? countTokens([{ content: response, role: 'assistant' }], this.controller.encoding)
|
||||||
countTokens([{ content: response, role: 'assistant' }], this.controller.encoding)
|
|
||||||
|
|
||||||
void this.pushHistory(response, 'assistant', responseTokens, user, objectId, objectClass)
|
void this.pushHistory(response, 'assistant', responseTokens, user, objectId, objectClass)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user