UBER-853 Add ydoc indexation (#3817)

Signed-off-by: Alexander Onnikov <alexander.onnikov@xored.com>
This commit is contained in:
Alexander Onnikov 2023-10-11 10:41:37 +07:00 committed by GitHub
parent 929d4e2feb
commit 8b9c90b388
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 203 additions and 37 deletions

View File

@ -22087,7 +22087,7 @@ packages:
dev: false
file:projects/server.tgz(esbuild@0.16.17):
resolution: {integrity: sha512-F5vPayuIg/XetPchRjf7nZfQx94qPAVqlSDxF4n05hfitEqNYIAReEwCYweF4vn1ffNnyFKG/K2KYtdilte+QQ==, tarball: file:projects/server.tgz}
resolution: {integrity: sha512-JcHQRXWF7KNRinn91dPMFPhF2ZyfxgASxU1DSKhR5Lzh4NnJhKNkNNaKNYGiJrG4+tBypfwIuqwzgkbxiab86A==, tarball: file:projects/server.tgz}
id: file:projects/server.tgz
name: '@rush-temp/server'
version: 0.0.0

View File

@ -101,7 +101,7 @@ async function createNullFullTextAdapter (): Promise<FullTextAdapter> {
}
async function createNullContentTextAdapter (): Promise<ContentTextAdapter> {
return {
async fetch (name: string, type: string, doc) {
async content (name: string, type: string, doc) {
return ''
},
metrics () {
@ -132,10 +132,14 @@ export async function connect (handler: (tx: Tx) => void): Promise<ClientConnect
url: '',
stages: () => []
},
contentAdapter: {
url: '',
factory: createNullContentTextAdapter
contentAdapters: {
default: {
factory: createNullContentTextAdapter,
contentType: '',
url: ''
}
},
defaultContentAdapter: 'default',
workspace: getWorkspaceId('')
}
const serverStorage = await createServerStorage(conf, {

View File

@ -31,7 +31,7 @@ async function createNullFullTextAdapter (): Promise<FullTextAdapter> {
}
async function createNullContentTextAdapter (): Promise<ContentTextAdapter> {
return {
async fetch (name: string, type: string, doc) {
async content (name: string, type: string, doc) {
return ''
},
metrics: () => new MeasureMetricsContext('', {})
@ -66,10 +66,14 @@ export async function start (port: number, host?: string): Promise<void> {
stages: () => []
},
metrics: new MeasureMetricsContext('', {}),
contentAdapter: {
url: '',
factory: createNullContentTextAdapter
contentAdapters: {
default: {
factory: createNullContentTextAdapter,
contentType: '',
url: ''
}
},
defaultContentAdapter: 'default',
workspace: getWorkspaceId('')
}
return createPipeline(ctx, conf, [], false, () => {})

View File

@ -17,3 +17,4 @@ export * from './extensions'
export * from './html'
export * from './node'
export * from './nodes'
export * from './text'

23
packages/text/src/text.ts Normal file
View File

@ -0,0 +1,23 @@
//
// Copyright © 2023 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { Node as ProseMirrorNode } from '@tiptap/pm/model'
/**
* @public
*/
export function getText (node: ProseMirrorNode): string {
return node.textBetween(0, node.content.size, '\n', '')
}

View File

@ -41,6 +41,7 @@ import {
createMinioDataAdapter,
createNullAdapter,
createRekoniAdapter,
createYDocAdapter,
getMetricsContext,
MinioConfig
} from '@hcengineering/server'
@ -311,10 +312,19 @@ export function start (
stages: (adapter, storage, storageAdapter, contentAdapter) =>
createIndexStages(metrics.newChild('stages', {}), workspace, adapter, storage, storageAdapter, contentAdapter)
},
contentAdapter: {
contentAdapters: {
Rekoni: {
factory: createRekoniAdapter,
contentType: '*',
url: opt.rekoniUrl
},
YDoc: {
factory: createYDocAdapter,
contentType: 'application/ydoc',
url: ''
}
},
defaultContentAdapter: 'Rekoni',
storageFactory: () =>
new MinioService({
...opt.minioConf,

View File

@ -0,0 +1,59 @@
//
// Copyright © 2023 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { MeasureContext, WorkspaceId } from '@hcengineering/core'
import { ContentTextAdapter, ContentTextAdapterConfiguration } from './types'
import { Readable } from 'stream'
class ContentAdapter implements ContentTextAdapter {
constructor (
private readonly adapters: Map<string, ContentTextAdapter>,
private readonly defaultAdapter: ContentTextAdapter,
private readonly context: MeasureContext
) {}
async content (name: string, type: string, doc: string | Readable | Buffer): Promise<string> {
const adapter = this.adapters.get(type) ?? this.defaultAdapter
return await adapter.content(name, type, doc)
}
metrics (): MeasureContext {
return this.context
}
}
export async function createContentAdapter (
contentAdapters: Record<string, ContentTextAdapterConfiguration>,
defaultContentAdapter: string,
workspace: WorkspaceId,
context: MeasureContext
): Promise<ContentTextAdapter> {
const adapters = new Map<string, ContentTextAdapter>()
let defaultAdapter: ContentTextAdapter | undefined
for (const key in contentAdapters) {
const adapterConf = contentAdapters[key]
const adapter = await adapterConf.factory(adapterConf.url, workspace, context.newChild(key, {}))
adapters.set(adapterConf.contentType, adapter)
if (key === defaultContentAdapter) {
defaultAdapter = adapter
}
}
if (defaultAdapter === undefined) {
throw new Error('No default content adapter')
}
return new ContentAdapter(adapters, defaultAdapter, context)
}

View File

@ -1,5 +1,5 @@
//
// Copyright © 2022 Hardcore Engineering Inc.
// Copyright © 2022, 2023 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
@ -116,7 +116,7 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
let textContent = await this.metrics.with(
'fetch',
{},
async () => await this.contentAdapter.fetch(ref, contentType, readable)
async () => await this.contentAdapter.content(ref, contentType, readable)
)
textContent = textContent

View File

@ -55,14 +55,15 @@ import core, {
import { MinioService } from '@hcengineering/minio'
import { getResource } from '@hcengineering/platform'
import { DbAdapter, DbAdapterConfiguration, TxAdapter } from './adapter'
import { createContentAdapter } from './content'
import { FullTextIndex } from './fulltext'
import { FullTextIndexPipeline } from './indexer'
import { FullTextPipelineStage } from './indexer/types'
import serverCore from './plugin'
import { Triggers } from './triggers'
import type {
ContentAdapterFactory,
ContentTextAdapter,
ContentTextAdapterConfiguration,
FullTextAdapter,
FullTextAdapterFactory,
ObjectDDParticipant,
@ -94,10 +95,8 @@ export interface DbConfiguration {
url: string
stages: FullTextPipelineStageFactory
}
contentAdapter: {
factory: ContentAdapterFactory
url: string
}
contentAdapters: Record<string, ContentTextAdapterConfiguration>
defaultContentAdapter: string
storageFactory?: () => MinioService
}
@ -809,12 +808,12 @@ export async function createServerStorage (
const metrics = conf.metrics.newChild('server-storage', {})
const contentAdapter = await conf.contentAdapter.factory(
conf.contentAdapter.url,
const contentAdapter = await createContentAdapter(
conf.contentAdapters,
conf.defaultContentAdapter,
conf.workspace,
metrics.newChild('content', {})
)
console.timeLog(conf.workspace.name, 'finish content adapter')
const defaultAdapter = adapters.get(conf.defaultAdapter)

View File

@ -1,5 +1,5 @@
//
// Copyright © 2022 Hardcore Engineering Inc.
// Copyright © 2022, 2023 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
@ -242,14 +242,6 @@ export class DummyFullTextAdapter implements FullTextAdapter {
}
}
/**
* @public
*/
export interface ContentTextAdapter {
fetch: (name: string, type: string, doc: Readable | Buffer | string) => Promise<string>
metrics: () => MeasureContext
}
/**
* @public
*/
@ -262,7 +254,24 @@ export type FullTextAdapterFactory = (
/**
* @public
*/
export type ContentAdapterFactory = (
export interface ContentTextAdapterConfiguration {
factory: ContentTextAdapterFactory
contentType: string
url: string
}
/**
* @public
*/
export interface ContentTextAdapter {
content: (name: string, type: string, doc: Readable | Buffer | string) => Promise<string>
metrics: () => MeasureContext
}
/**
* @public
*/
export type ContentTextAdapterFactory = (
url: string,
workspace: WorkspaceId,
context: MeasureContext

View File

@ -69,7 +69,7 @@ async function createNullFullTextAdapter (): Promise<FullTextAdapter> {
async function createNullContentTextAdapter (): Promise<ContentTextAdapter> {
return {
async fetch (name: string, type: string, doc) {
async content (name: string, type: string, doc) {
return ''
},
metrics (): MeasureContext {
@ -149,10 +149,14 @@ describe('mongo operations', () => {
url: '',
stages: () => []
},
contentAdapter: {
contentAdapters: {
default: {
factory: createNullContentTextAdapter,
contentType: '',
url: ''
}
},
defaultContentAdapter: 'default',
workspace: getWorkspaceId(dbId, ''),
storageFactory: () => createNullStorageFactory()
}

View File

@ -42,6 +42,7 @@
"@hcengineering/server-token": "^0.6.6",
"@hcengineering/middleware": "^0.6.0",
"@hcengineering/minio": "^0.6.0",
"@hcengineering/text": "^0.6.0",
"got": "^11.8.3"
}
}

View File

@ -20,3 +20,4 @@ export * from './minio'
export * from './backup'
export * from './metrics'
export * from './rekoni'
export * from './ydoc'

View File

@ -13,7 +13,7 @@ export async function createRekoniAdapter (
): Promise<ContentTextAdapter> {
const token = generateToken('anticrm-hcenginnering', workspace)
return {
fetch: async (name: string, type: string, doc): Promise<string> => {
content: async (name: string, type: string, doc): Promise<string> => {
try {
const resContent = await got.post(
`${url}/toText?name=${encodeURIComponent(name)}&type=${encodeURIComponent(type)}`,

51
server/server/src/ydoc.ts Normal file
View File

@ -0,0 +1,51 @@
import { MeasureContext, WorkspaceId } from '@hcengineering/core'
import { ContentTextAdapter } from '@hcengineering/server-core'
import { ReferenceNode, defaultExtensions, getText, yDocContentToNodes } from '@hcengineering/text'
import { Readable } from 'stream'
const extensions = [...defaultExtensions, ReferenceNode]
/**
* @public
*/
export async function createYDocAdapter (
_url: string,
_workspace: WorkspaceId,
_metrics: MeasureContext
): Promise<ContentTextAdapter> {
return {
content: async (_name: string, _type: string, data: Readable | Buffer | string): Promise<string> => {
const chunks: Buffer[] = []
if (data instanceof Readable) {
await new Promise((resolve) => {
data.on('readable', () => {
let chunk
while ((chunk = data.read()) !== null) {
const b = chunk as Buffer
chunks.push(b)
}
})
data.on('end', () => {
resolve(null)
})
})
} else if (data instanceof Buffer) {
chunks.push(data)
} else {
console.warn('ydoc content adapter does not support string content')
}
if (chunks.length > 0) {
const nodes = yDocContentToNodes(extensions, Buffer.concat(chunks))
return nodes.map(getText).join('\n')
}
return ''
},
metrics (): MeasureContext {
return _metrics
}
}
}