UBER-853 Add ydoc indexation (#3817)

Signed-off-by: Alexander Onnikov <alexander.onnikov@xored.com>
This commit is contained in:
Alexander Onnikov 2023-10-11 10:41:37 +07:00 committed by GitHub
parent 929d4e2feb
commit 8b9c90b388
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 203 additions and 37 deletions

View File

@ -22087,7 +22087,7 @@ packages:
dev: false dev: false
file:projects/server.tgz(esbuild@0.16.17): file:projects/server.tgz(esbuild@0.16.17):
resolution: {integrity: sha512-F5vPayuIg/XetPchRjf7nZfQx94qPAVqlSDxF4n05hfitEqNYIAReEwCYweF4vn1ffNnyFKG/K2KYtdilte+QQ==, tarball: file:projects/server.tgz} resolution: {integrity: sha512-JcHQRXWF7KNRinn91dPMFPhF2ZyfxgASxU1DSKhR5Lzh4NnJhKNkNNaKNYGiJrG4+tBypfwIuqwzgkbxiab86A==, tarball: file:projects/server.tgz}
id: file:projects/server.tgz id: file:projects/server.tgz
name: '@rush-temp/server' name: '@rush-temp/server'
version: 0.0.0 version: 0.0.0

View File

@ -101,7 +101,7 @@ async function createNullFullTextAdapter (): Promise<FullTextAdapter> {
} }
async function createNullContentTextAdapter (): Promise<ContentTextAdapter> { async function createNullContentTextAdapter (): Promise<ContentTextAdapter> {
return { return {
async fetch (name: string, type: string, doc) { async content (name: string, type: string, doc) {
return '' return ''
}, },
metrics () { metrics () {
@ -132,10 +132,14 @@ export async function connect (handler: (tx: Tx) => void): Promise<ClientConnect
url: '', url: '',
stages: () => [] stages: () => []
}, },
contentAdapter: { contentAdapters: {
url: '', default: {
factory: createNullContentTextAdapter factory: createNullContentTextAdapter,
contentType: '',
url: ''
}
}, },
defaultContentAdapter: 'default',
workspace: getWorkspaceId('') workspace: getWorkspaceId('')
} }
const serverStorage = await createServerStorage(conf, { const serverStorage = await createServerStorage(conf, {

View File

@ -31,7 +31,7 @@ async function createNullFullTextAdapter (): Promise<FullTextAdapter> {
} }
async function createNullContentTextAdapter (): Promise<ContentTextAdapter> { async function createNullContentTextAdapter (): Promise<ContentTextAdapter> {
return { return {
async fetch (name: string, type: string, doc) { async content (name: string, type: string, doc) {
return '' return ''
}, },
metrics: () => new MeasureMetricsContext('', {}) metrics: () => new MeasureMetricsContext('', {})
@ -66,10 +66,14 @@ export async function start (port: number, host?: string): Promise<void> {
stages: () => [] stages: () => []
}, },
metrics: new MeasureMetricsContext('', {}), metrics: new MeasureMetricsContext('', {}),
contentAdapter: { contentAdapters: {
url: '', default: {
factory: createNullContentTextAdapter factory: createNullContentTextAdapter,
contentType: '',
url: ''
}
}, },
defaultContentAdapter: 'default',
workspace: getWorkspaceId('') workspace: getWorkspaceId('')
} }
return createPipeline(ctx, conf, [], false, () => {}) return createPipeline(ctx, conf, [], false, () => {})

View File

@ -17,3 +17,4 @@ export * from './extensions'
export * from './html' export * from './html'
export * from './node' export * from './node'
export * from './nodes' export * from './nodes'
export * from './text'

23
packages/text/src/text.ts Normal file
View File

@ -0,0 +1,23 @@
//
// Copyright © 2023 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { Node as ProseMirrorNode } from '@tiptap/pm/model'
/**
* @public
*/
export function getText (node: ProseMirrorNode): string {
return node.textBetween(0, node.content.size, '\n', '')
}

View File

@ -41,6 +41,7 @@ import {
createMinioDataAdapter, createMinioDataAdapter,
createNullAdapter, createNullAdapter,
createRekoniAdapter, createRekoniAdapter,
createYDocAdapter,
getMetricsContext, getMetricsContext,
MinioConfig MinioConfig
} from '@hcengineering/server' } from '@hcengineering/server'
@ -311,10 +312,19 @@ export function start (
stages: (adapter, storage, storageAdapter, contentAdapter) => stages: (adapter, storage, storageAdapter, contentAdapter) =>
createIndexStages(metrics.newChild('stages', {}), workspace, adapter, storage, storageAdapter, contentAdapter) createIndexStages(metrics.newChild('stages', {}), workspace, adapter, storage, storageAdapter, contentAdapter)
}, },
contentAdapter: { contentAdapters: {
factory: createRekoniAdapter, Rekoni: {
url: opt.rekoniUrl factory: createRekoniAdapter,
contentType: '*',
url: opt.rekoniUrl
},
YDoc: {
factory: createYDocAdapter,
contentType: 'application/ydoc',
url: ''
}
}, },
defaultContentAdapter: 'Rekoni',
storageFactory: () => storageFactory: () =>
new MinioService({ new MinioService({
...opt.minioConf, ...opt.minioConf,

View File

@ -0,0 +1,59 @@
//
// Copyright © 2023 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { MeasureContext, WorkspaceId } from '@hcengineering/core'
import { ContentTextAdapter, ContentTextAdapterConfiguration } from './types'
import { Readable } from 'stream'
class ContentAdapter implements ContentTextAdapter {
constructor (
private readonly adapters: Map<string, ContentTextAdapter>,
private readonly defaultAdapter: ContentTextAdapter,
private readonly context: MeasureContext
) {}
async content (name: string, type: string, doc: string | Readable | Buffer): Promise<string> {
const adapter = this.adapters.get(type) ?? this.defaultAdapter
return await adapter.content(name, type, doc)
}
metrics (): MeasureContext {
return this.context
}
}
export async function createContentAdapter (
contentAdapters: Record<string, ContentTextAdapterConfiguration>,
defaultContentAdapter: string,
workspace: WorkspaceId,
context: MeasureContext
): Promise<ContentTextAdapter> {
const adapters = new Map<string, ContentTextAdapter>()
let defaultAdapter: ContentTextAdapter | undefined
for (const key in contentAdapters) {
const adapterConf = contentAdapters[key]
const adapter = await adapterConf.factory(adapterConf.url, workspace, context.newChild(key, {}))
adapters.set(adapterConf.contentType, adapter)
if (key === defaultContentAdapter) {
defaultAdapter = adapter
}
}
if (defaultAdapter === undefined) {
throw new Error('No default content adapter')
}
return new ContentAdapter(adapters, defaultAdapter, context)
}

View File

@ -1,5 +1,5 @@
// //
// Copyright © 2022 Hardcore Engineering Inc. // Copyright © 2022, 2023 Hardcore Engineering Inc.
// //
// Licensed under the Eclipse Public License, Version 2.0 (the "License"); // Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may // you may not use this file except in compliance with the License. You may
@ -116,7 +116,7 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
let textContent = await this.metrics.with( let textContent = await this.metrics.with(
'fetch', 'fetch',
{}, {},
async () => await this.contentAdapter.fetch(ref, contentType, readable) async () => await this.contentAdapter.content(ref, contentType, readable)
) )
textContent = textContent textContent = textContent

View File

@ -55,14 +55,15 @@ import core, {
import { MinioService } from '@hcengineering/minio' import { MinioService } from '@hcengineering/minio'
import { getResource } from '@hcengineering/platform' import { getResource } from '@hcengineering/platform'
import { DbAdapter, DbAdapterConfiguration, TxAdapter } from './adapter' import { DbAdapter, DbAdapterConfiguration, TxAdapter } from './adapter'
import { createContentAdapter } from './content'
import { FullTextIndex } from './fulltext' import { FullTextIndex } from './fulltext'
import { FullTextIndexPipeline } from './indexer' import { FullTextIndexPipeline } from './indexer'
import { FullTextPipelineStage } from './indexer/types' import { FullTextPipelineStage } from './indexer/types'
import serverCore from './plugin' import serverCore from './plugin'
import { Triggers } from './triggers' import { Triggers } from './triggers'
import type { import type {
ContentAdapterFactory,
ContentTextAdapter, ContentTextAdapter,
ContentTextAdapterConfiguration,
FullTextAdapter, FullTextAdapter,
FullTextAdapterFactory, FullTextAdapterFactory,
ObjectDDParticipant, ObjectDDParticipant,
@ -94,10 +95,8 @@ export interface DbConfiguration {
url: string url: string
stages: FullTextPipelineStageFactory stages: FullTextPipelineStageFactory
} }
contentAdapter: { contentAdapters: Record<string, ContentTextAdapterConfiguration>
factory: ContentAdapterFactory defaultContentAdapter: string
url: string
}
storageFactory?: () => MinioService storageFactory?: () => MinioService
} }
@ -809,12 +808,12 @@ export async function createServerStorage (
const metrics = conf.metrics.newChild('server-storage', {}) const metrics = conf.metrics.newChild('server-storage', {})
const contentAdapter = await conf.contentAdapter.factory( const contentAdapter = await createContentAdapter(
conf.contentAdapter.url, conf.contentAdapters,
conf.defaultContentAdapter,
conf.workspace, conf.workspace,
metrics.newChild('content', {}) metrics.newChild('content', {})
) )
console.timeLog(conf.workspace.name, 'finish content adapter') console.timeLog(conf.workspace.name, 'finish content adapter')
const defaultAdapter = adapters.get(conf.defaultAdapter) const defaultAdapter = adapters.get(conf.defaultAdapter)

View File

@ -1,5 +1,5 @@
// //
// Copyright © 2022 Hardcore Engineering Inc. // Copyright © 2022, 2023 Hardcore Engineering Inc.
// //
// Licensed under the Eclipse Public License, Version 2.0 (the "License"); // Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may // you may not use this file except in compliance with the License. You may
@ -242,14 +242,6 @@ export class DummyFullTextAdapter implements FullTextAdapter {
} }
} }
/**
* @public
*/
export interface ContentTextAdapter {
fetch: (name: string, type: string, doc: Readable | Buffer | string) => Promise<string>
metrics: () => MeasureContext
}
/** /**
* @public * @public
*/ */
@ -262,7 +254,24 @@ export type FullTextAdapterFactory = (
/** /**
* @public * @public
*/ */
export type ContentAdapterFactory = ( export interface ContentTextAdapterConfiguration {
factory: ContentTextAdapterFactory
contentType: string
url: string
}
/**
* @public
*/
export interface ContentTextAdapter {
content: (name: string, type: string, doc: Readable | Buffer | string) => Promise<string>
metrics: () => MeasureContext
}
/**
* @public
*/
export type ContentTextAdapterFactory = (
url: string, url: string,
workspace: WorkspaceId, workspace: WorkspaceId,
context: MeasureContext context: MeasureContext

View File

@ -69,7 +69,7 @@ async function createNullFullTextAdapter (): Promise<FullTextAdapter> {
async function createNullContentTextAdapter (): Promise<ContentTextAdapter> { async function createNullContentTextAdapter (): Promise<ContentTextAdapter> {
return { return {
async fetch (name: string, type: string, doc) { async content (name: string, type: string, doc) {
return '' return ''
}, },
metrics (): MeasureContext { metrics (): MeasureContext {
@ -149,10 +149,14 @@ describe('mongo operations', () => {
url: '', url: '',
stages: () => [] stages: () => []
}, },
contentAdapter: { contentAdapters: {
factory: createNullContentTextAdapter, default: {
url: '' factory: createNullContentTextAdapter,
contentType: '',
url: ''
}
}, },
defaultContentAdapter: 'default',
workspace: getWorkspaceId(dbId, ''), workspace: getWorkspaceId(dbId, ''),
storageFactory: () => createNullStorageFactory() storageFactory: () => createNullStorageFactory()
} }

View File

@ -42,6 +42,7 @@
"@hcengineering/server-token": "^0.6.6", "@hcengineering/server-token": "^0.6.6",
"@hcengineering/middleware": "^0.6.0", "@hcengineering/middleware": "^0.6.0",
"@hcengineering/minio": "^0.6.0", "@hcengineering/minio": "^0.6.0",
"@hcengineering/text": "^0.6.0",
"got": "^11.8.3" "got": "^11.8.3"
} }
} }

View File

@ -20,3 +20,4 @@ export * from './minio'
export * from './backup' export * from './backup'
export * from './metrics' export * from './metrics'
export * from './rekoni' export * from './rekoni'
export * from './ydoc'

View File

@ -13,7 +13,7 @@ export async function createRekoniAdapter (
): Promise<ContentTextAdapter> { ): Promise<ContentTextAdapter> {
const token = generateToken('anticrm-hcenginnering', workspace) const token = generateToken('anticrm-hcenginnering', workspace)
return { return {
fetch: async (name: string, type: string, doc): Promise<string> => { content: async (name: string, type: string, doc): Promise<string> => {
try { try {
const resContent = await got.post( const resContent = await got.post(
`${url}/toText?name=${encodeURIComponent(name)}&type=${encodeURIComponent(type)}`, `${url}/toText?name=${encodeURIComponent(name)}&type=${encodeURIComponent(type)}`,

51
server/server/src/ydoc.ts Normal file
View File

@ -0,0 +1,51 @@
import { MeasureContext, WorkspaceId } from '@hcengineering/core'
import { ContentTextAdapter } from '@hcengineering/server-core'
import { ReferenceNode, defaultExtensions, getText, yDocContentToNodes } from '@hcengineering/text'
import { Readable } from 'stream'
const extensions = [...defaultExtensions, ReferenceNode]
/**
* @public
*/
export async function createYDocAdapter (
_url: string,
_workspace: WorkspaceId,
_metrics: MeasureContext
): Promise<ContentTextAdapter> {
return {
content: async (_name: string, _type: string, data: Readable | Buffer | string): Promise<string> => {
const chunks: Buffer[] = []
if (data instanceof Readable) {
await new Promise((resolve) => {
data.on('readable', () => {
let chunk
while ((chunk = data.read()) !== null) {
const b = chunk as Buffer
chunks.push(b)
}
})
data.on('end', () => {
resolve(null)
})
})
} else if (data instanceof Buffer) {
chunks.push(data)
} else {
console.warn('ydoc content adapter does not support string content')
}
if (chunks.length > 0) {
const nodes = yDocContentToNodes(extensions, Buffer.concat(chunks))
return nodes.map(getText).join('\n')
}
return ''
},
metrics (): MeasureContext {
return _metrics
}
}
}