From 8b9c90b388e8d879445f8f6dbdc8897a455bb346 Mon Sep 17 00:00:00 2001 From: Alexander Onnikov Date: Wed, 11 Oct 2023 10:41:37 +0700 Subject: [PATCH] UBER-853 Add ydoc indexation (#3817) Signed-off-by: Alexander Onnikov --- common/config/rush/pnpm-lock.yaml | 2 +- dev/client-resources/src/connection.ts | 12 +++-- dev/server/src/server.ts | 12 +++-- packages/text/src/index.ts | 1 + packages/text/src/text.ts | 23 +++++++++ pods/server/src/server.ts | 16 ++++-- server/core/src/content.ts | 59 ++++++++++++++++++++++ server/core/src/indexer/content.ts | 4 +- server/core/src/storage.ts | 15 +++--- server/core/src/types.ts | 29 +++++++---- server/mongo/src/__tests__/storage.test.ts | 12 +++-- server/server/package.json | 1 + server/server/src/index.ts | 1 + server/server/src/rekoni.ts | 2 +- server/server/src/ydoc.ts | 51 +++++++++++++++++++ 15 files changed, 203 insertions(+), 37 deletions(-) create mode 100644 packages/text/src/text.ts create mode 100644 server/core/src/content.ts create mode 100644 server/server/src/ydoc.ts diff --git a/common/config/rush/pnpm-lock.yaml b/common/config/rush/pnpm-lock.yaml index bbb9ff403c..79c407401d 100644 --- a/common/config/rush/pnpm-lock.yaml +++ b/common/config/rush/pnpm-lock.yaml @@ -22087,7 +22087,7 @@ packages: dev: false file:projects/server.tgz(esbuild@0.16.17): - resolution: {integrity: sha512-F5vPayuIg/XetPchRjf7nZfQx94qPAVqlSDxF4n05hfitEqNYIAReEwCYweF4vn1ffNnyFKG/K2KYtdilte+QQ==, tarball: file:projects/server.tgz} + resolution: {integrity: sha512-JcHQRXWF7KNRinn91dPMFPhF2ZyfxgASxU1DSKhR5Lzh4NnJhKNkNNaKNYGiJrG4+tBypfwIuqwzgkbxiab86A==, tarball: file:projects/server.tgz} id: file:projects/server.tgz name: '@rush-temp/server' version: 0.0.0 diff --git a/dev/client-resources/src/connection.ts b/dev/client-resources/src/connection.ts index 06393ef186..de550f7a30 100644 --- a/dev/client-resources/src/connection.ts +++ b/dev/client-resources/src/connection.ts @@ -101,7 +101,7 @@ async function createNullFullTextAdapter (): Promise { } async function createNullContentTextAdapter (): Promise { return { - async fetch (name: string, type: string, doc) { + async content (name: string, type: string, doc) { return '' }, metrics () { @@ -132,10 +132,14 @@ export async function connect (handler: (tx: Tx) => void): Promise [] }, - contentAdapter: { - url: '', - factory: createNullContentTextAdapter + contentAdapters: { + default: { + factory: createNullContentTextAdapter, + contentType: '', + url: '' + } }, + defaultContentAdapter: 'default', workspace: getWorkspaceId('') } const serverStorage = await createServerStorage(conf, { diff --git a/dev/server/src/server.ts b/dev/server/src/server.ts index f33093a416..d2e62ad0b1 100644 --- a/dev/server/src/server.ts +++ b/dev/server/src/server.ts @@ -31,7 +31,7 @@ async function createNullFullTextAdapter (): Promise { } async function createNullContentTextAdapter (): Promise { return { - async fetch (name: string, type: string, doc) { + async content (name: string, type: string, doc) { return '' }, metrics: () => new MeasureMetricsContext('', {}) @@ -66,10 +66,14 @@ export async function start (port: number, host?: string): Promise { stages: () => [] }, metrics: new MeasureMetricsContext('', {}), - contentAdapter: { - url: '', - factory: createNullContentTextAdapter + contentAdapters: { + default: { + factory: createNullContentTextAdapter, + contentType: '', + url: '' + } }, + defaultContentAdapter: 'default', workspace: getWorkspaceId('') } return createPipeline(ctx, conf, [], false, () => {}) diff --git a/packages/text/src/index.ts b/packages/text/src/index.ts index 33a56300db..22326df824 100644 --- a/packages/text/src/index.ts +++ b/packages/text/src/index.ts @@ -17,3 +17,4 @@ export * from './extensions' export * from './html' export * from './node' export * from './nodes' +export * from './text' diff --git a/packages/text/src/text.ts b/packages/text/src/text.ts new file mode 100644 index 0000000000..5af13d9e74 --- /dev/null +++ b/packages/text/src/text.ts @@ -0,0 +1,23 @@ +// +// Copyright © 2023 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { Node as ProseMirrorNode } from '@tiptap/pm/model' + +/** + * @public + */ +export function getText (node: ProseMirrorNode): string { + return node.textBetween(0, node.content.size, '\n', '') +} diff --git a/pods/server/src/server.ts b/pods/server/src/server.ts index 3567d589aa..295806b1f6 100644 --- a/pods/server/src/server.ts +++ b/pods/server/src/server.ts @@ -41,6 +41,7 @@ import { createMinioDataAdapter, createNullAdapter, createRekoniAdapter, + createYDocAdapter, getMetricsContext, MinioConfig } from '@hcengineering/server' @@ -311,10 +312,19 @@ export function start ( stages: (adapter, storage, storageAdapter, contentAdapter) => createIndexStages(metrics.newChild('stages', {}), workspace, adapter, storage, storageAdapter, contentAdapter) }, - contentAdapter: { - factory: createRekoniAdapter, - url: opt.rekoniUrl + contentAdapters: { + Rekoni: { + factory: createRekoniAdapter, + contentType: '*', + url: opt.rekoniUrl + }, + YDoc: { + factory: createYDocAdapter, + contentType: 'application/ydoc', + url: '' + } }, + defaultContentAdapter: 'Rekoni', storageFactory: () => new MinioService({ ...opt.minioConf, diff --git a/server/core/src/content.ts b/server/core/src/content.ts new file mode 100644 index 0000000000..a6f01c9757 --- /dev/null +++ b/server/core/src/content.ts @@ -0,0 +1,59 @@ +// +// Copyright © 2023 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { MeasureContext, WorkspaceId } from '@hcengineering/core' +import { ContentTextAdapter, ContentTextAdapterConfiguration } from './types' +import { Readable } from 'stream' + +class ContentAdapter implements ContentTextAdapter { + constructor ( + private readonly adapters: Map, + private readonly defaultAdapter: ContentTextAdapter, + private readonly context: MeasureContext + ) {} + + async content (name: string, type: string, doc: string | Readable | Buffer): Promise { + const adapter = this.adapters.get(type) ?? this.defaultAdapter + return await adapter.content(name, type, doc) + } + + metrics (): MeasureContext { + return this.context + } +} + +export async function createContentAdapter ( + contentAdapters: Record, + defaultContentAdapter: string, + workspace: WorkspaceId, + context: MeasureContext +): Promise { + const adapters = new Map() + let defaultAdapter: ContentTextAdapter | undefined + + for (const key in contentAdapters) { + const adapterConf = contentAdapters[key] + const adapter = await adapterConf.factory(adapterConf.url, workspace, context.newChild(key, {})) + + adapters.set(adapterConf.contentType, adapter) + if (key === defaultContentAdapter) { + defaultAdapter = adapter + } + } + if (defaultAdapter === undefined) { + throw new Error('No default content adapter') + } + return new ContentAdapter(adapters, defaultAdapter, context) +} diff --git a/server/core/src/indexer/content.ts b/server/core/src/indexer/content.ts index 1e6a044e55..7485cc002b 100644 --- a/server/core/src/indexer/content.ts +++ b/server/core/src/indexer/content.ts @@ -1,5 +1,5 @@ // -// Copyright © 2022 Hardcore Engineering Inc. +// Copyright © 2022, 2023 Hardcore Engineering Inc. // // Licensed under the Eclipse Public License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. You may @@ -116,7 +116,7 @@ export class ContentRetrievalStage implements FullTextPipelineStage { let textContent = await this.metrics.with( 'fetch', {}, - async () => await this.contentAdapter.fetch(ref, contentType, readable) + async () => await this.contentAdapter.content(ref, contentType, readable) ) textContent = textContent diff --git a/server/core/src/storage.ts b/server/core/src/storage.ts index aea8348088..c8627b6c90 100644 --- a/server/core/src/storage.ts +++ b/server/core/src/storage.ts @@ -55,14 +55,15 @@ import core, { import { MinioService } from '@hcengineering/minio' import { getResource } from '@hcengineering/platform' import { DbAdapter, DbAdapterConfiguration, TxAdapter } from './adapter' +import { createContentAdapter } from './content' import { FullTextIndex } from './fulltext' import { FullTextIndexPipeline } from './indexer' import { FullTextPipelineStage } from './indexer/types' import serverCore from './plugin' import { Triggers } from './triggers' import type { - ContentAdapterFactory, ContentTextAdapter, + ContentTextAdapterConfiguration, FullTextAdapter, FullTextAdapterFactory, ObjectDDParticipant, @@ -94,10 +95,8 @@ export interface DbConfiguration { url: string stages: FullTextPipelineStageFactory } - contentAdapter: { - factory: ContentAdapterFactory - url: string - } + contentAdapters: Record + defaultContentAdapter: string storageFactory?: () => MinioService } @@ -809,12 +808,12 @@ export async function createServerStorage ( const metrics = conf.metrics.newChild('server-storage', {}) - const contentAdapter = await conf.contentAdapter.factory( - conf.contentAdapter.url, + const contentAdapter = await createContentAdapter( + conf.contentAdapters, + conf.defaultContentAdapter, conf.workspace, metrics.newChild('content', {}) ) - console.timeLog(conf.workspace.name, 'finish content adapter') const defaultAdapter = adapters.get(conf.defaultAdapter) diff --git a/server/core/src/types.ts b/server/core/src/types.ts index 69403ee500..a10e64acac 100644 --- a/server/core/src/types.ts +++ b/server/core/src/types.ts @@ -1,5 +1,5 @@ // -// Copyright © 2022 Hardcore Engineering Inc. +// Copyright © 2022, 2023 Hardcore Engineering Inc. // // Licensed under the Eclipse Public License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. You may @@ -242,14 +242,6 @@ export class DummyFullTextAdapter implements FullTextAdapter { } } -/** - * @public - */ -export interface ContentTextAdapter { - fetch: (name: string, type: string, doc: Readable | Buffer | string) => Promise - metrics: () => MeasureContext -} - /** * @public */ @@ -262,7 +254,24 @@ export type FullTextAdapterFactory = ( /** * @public */ -export type ContentAdapterFactory = ( +export interface ContentTextAdapterConfiguration { + factory: ContentTextAdapterFactory + contentType: string + url: string +} + +/** + * @public + */ +export interface ContentTextAdapter { + content: (name: string, type: string, doc: Readable | Buffer | string) => Promise + metrics: () => MeasureContext +} + +/** + * @public + */ +export type ContentTextAdapterFactory = ( url: string, workspace: WorkspaceId, context: MeasureContext diff --git a/server/mongo/src/__tests__/storage.test.ts b/server/mongo/src/__tests__/storage.test.ts index 1993636699..36f70eaafd 100644 --- a/server/mongo/src/__tests__/storage.test.ts +++ b/server/mongo/src/__tests__/storage.test.ts @@ -69,7 +69,7 @@ async function createNullFullTextAdapter (): Promise { async function createNullContentTextAdapter (): Promise { return { - async fetch (name: string, type: string, doc) { + async content (name: string, type: string, doc) { return '' }, metrics (): MeasureContext { @@ -149,10 +149,14 @@ describe('mongo operations', () => { url: '', stages: () => [] }, - contentAdapter: { - factory: createNullContentTextAdapter, - url: '' + contentAdapters: { + default: { + factory: createNullContentTextAdapter, + contentType: '', + url: '' + } }, + defaultContentAdapter: 'default', workspace: getWorkspaceId(dbId, ''), storageFactory: () => createNullStorageFactory() } diff --git a/server/server/package.json b/server/server/package.json index c15b9f2863..cac8932418 100644 --- a/server/server/package.json +++ b/server/server/package.json @@ -42,6 +42,7 @@ "@hcengineering/server-token": "^0.6.6", "@hcengineering/middleware": "^0.6.0", "@hcengineering/minio": "^0.6.0", + "@hcengineering/text": "^0.6.0", "got": "^11.8.3" } } diff --git a/server/server/src/index.ts b/server/server/src/index.ts index a84eb06745..b7937e4fd8 100644 --- a/server/server/src/index.ts +++ b/server/server/src/index.ts @@ -20,3 +20,4 @@ export * from './minio' export * from './backup' export * from './metrics' export * from './rekoni' +export * from './ydoc' diff --git a/server/server/src/rekoni.ts b/server/server/src/rekoni.ts index 879cb6a0b1..c3daff69b2 100644 --- a/server/server/src/rekoni.ts +++ b/server/server/src/rekoni.ts @@ -13,7 +13,7 @@ export async function createRekoniAdapter ( ): Promise { const token = generateToken('anticrm-hcenginnering', workspace) return { - fetch: async (name: string, type: string, doc): Promise => { + content: async (name: string, type: string, doc): Promise => { try { const resContent = await got.post( `${url}/toText?name=${encodeURIComponent(name)}&type=${encodeURIComponent(type)}`, diff --git a/server/server/src/ydoc.ts b/server/server/src/ydoc.ts new file mode 100644 index 0000000000..2de1c822e3 --- /dev/null +++ b/server/server/src/ydoc.ts @@ -0,0 +1,51 @@ +import { MeasureContext, WorkspaceId } from '@hcengineering/core' +import { ContentTextAdapter } from '@hcengineering/server-core' +import { ReferenceNode, defaultExtensions, getText, yDocContentToNodes } from '@hcengineering/text' +import { Readable } from 'stream' + +const extensions = [...defaultExtensions, ReferenceNode] + +/** + * @public + */ +export async function createYDocAdapter ( + _url: string, + _workspace: WorkspaceId, + _metrics: MeasureContext +): Promise { + return { + content: async (_name: string, _type: string, data: Readable | Buffer | string): Promise => { + const chunks: Buffer[] = [] + + if (data instanceof Readable) { + await new Promise((resolve) => { + data.on('readable', () => { + let chunk + while ((chunk = data.read()) !== null) { + const b = chunk as Buffer + chunks.push(b) + } + }) + + data.on('end', () => { + resolve(null) + }) + }) + } else if (data instanceof Buffer) { + chunks.push(data) + } else { + console.warn('ydoc content adapter does not support string content') + } + + if (chunks.length > 0) { + const nodes = yDocContentToNodes(extensions, Buffer.concat(chunks)) + return nodes.map(getText).join('\n') + } + + return '' + }, + metrics (): MeasureContext { + return _metrics + } + } +}