From 87ff5e9134d0305659d4dc7ed328c4a4eaa5e3f7 Mon Sep 17 00:00:00 2001 From: Vyacheslav Tumanov Date: Tue, 2 Apr 2024 18:02:37 +0500 Subject: [PATCH] improve reindexing speed (#5141) --- packages/core/src/utils.ts | 2 - server/core/src/indexer/fulltextPush.ts | 68 +------------------------ server/core/src/indexer/indexer.ts | 2 + server/core/src/mapper.ts | 2 +- 4 files changed, 5 insertions(+), 69 deletions(-) diff --git a/packages/core/src/utils.ts b/packages/core/src/utils.ts index c73267e3a2..a2f2fcb425 100644 --- a/packages/core/src/utils.ts +++ b/packages/core/src/utils.ts @@ -145,8 +145,6 @@ export interface IndexKeyOptions { _class?: Ref> docId?: Ref extra?: string[] - relative?: boolean - refAttribute?: string } /** * @public diff --git a/server/core/src/indexer/fulltextPush.ts b/server/core/src/indexer/fulltextPush.ts index a8513787f2..90889a0e3a 100644 --- a/server/core/src/indexer/fulltextPush.ts +++ b/server/core/src/indexer/fulltextPush.ts @@ -14,7 +14,6 @@ // import core, { - type AnyAttribute, type ArrOf, type Class, type Doc, @@ -24,7 +23,6 @@ import core, { extractDocKey, type Hierarchy, isFullTextAttribute, - isIndexedAttribute, type MeasureContext, type Ref, type ServerStorage, @@ -42,14 +40,7 @@ import { type FullTextPipelineStage, fullTextPushStageId } from './types' -import { - collectPropagate, - collectPropagateClasses, - docKey, - getFullTextContext, - type IndexKeyOptions, - isCustomAttr -} from './utils' +import { collectPropagate, collectPropagateClasses, docKey, getFullTextContext, isCustomAttr } from './utils' /** * @public @@ -108,52 +99,6 @@ export class FullTextPushStage implements FullTextPipelineStage { return { docs: [], pass: true } } - async indexRefAttributes ( - attributes: Map, - doc: DocIndexState, - elasticDoc: IndexedDoc, - metrics: MeasureContext - ): Promise { - for (const attribute in doc.attributes) { - const { attr } = extractDocKey(attribute) - const attrObj = attributes.get(attr) - if ( - attrObj !== null && - attrObj !== undefined && - isIndexedAttribute(attrObj) && - (attrObj.type._class === core.class.RefTo || - (attrObj.type._class === core.class.ArrOf && (attrObj.type as ArrOf).of._class === core.class.RefTo)) - ) { - const attrStringValue = doc.attributes[attribute] - if (attrStringValue !== undefined && attrStringValue !== null && attrStringValue !== '') { - const refs: Ref[] = attrStringValue.split(',') - if (refs.length > 0) { - const refDocs = await metrics.with( - 'ref-docs', - {}, - async (ctx) => - await this.dbStorage.findAll( - ctx, - core.class.DocIndexState, - { - _id: refs.length === 1 ? refs[0] : { $in: refs } - }, - { limit: refs.length } - ) - ) - if (refDocs.length > 0) { - for (const ref of refDocs) { - await metrics.with('updateDoc2Elastic', {}, async (ctx) => { - updateDoc2Elastic(ref.attributes, elasticDoc, ref._id, attribute) - }) - } - } - } - } - } - } - } - async collect (toIndex: DocIndexState[], pipeline: FullTextPipeline, ctx: MeasureContext): Promise { const bulk: IndexedDoc[] = [] @@ -243,11 +188,6 @@ export class FullTextPushStage implements FullTextPipelineStage { }) ) - const allAttributes = pipeline.hierarchy.getAllAttributes(doc.objectClass) - - // Include child ref attributes - await this.indexRefAttributes(allAttributes, doc, elasticDoc, ctx) - await updateDocWithPresenter(pipeline.hierarchy, doc, elasticDoc, { parentDoc, spaceDoc }) this.checkIntegrity(elasticDoc) @@ -349,11 +289,7 @@ function updateDoc2Elastic ( } continue } - const docKeyOpts: IndexKeyOptions = { _class, relative: true, extra: extra.filter((it) => it !== 'base64') } - if (refAttribute !== undefined) { - docKeyOpts.refAttribute = refAttribute - } - const docIdAttr = docKey(attr, docKeyOpts) + const docIdAttr = docKey(attr, { _class, extra: extra.filter((it) => it !== 'base64') }) if (vv !== null) { // Since we replace array of values, we could ignore null doc[docIdAttr] = diff --git a/server/core/src/indexer/indexer.ts b/server/core/src/indexer/indexer.ts index 5f6bac3284..fb49a15c32 100644 --- a/server/core/src/indexer/indexer.ts +++ b/server/core/src/indexer/indexer.ts @@ -27,6 +27,7 @@ import core, { type ModelDb, type Ref, type ServerStorage, + SortingOrder, TxFactory, type WorkspaceId, _getOperator, @@ -443,6 +444,7 @@ export class FullTextIndexPipeline implements FullTextPipeline { removed: false }, { + sort: { modifiedOn: SortingOrder.Descending }, limit: globalIndexer.processingSize } ) diff --git a/server/core/src/mapper.ts b/server/core/src/mapper.ts index 20580298cc..9c5537acc8 100644 --- a/server/core/src/mapper.ts +++ b/server/core/src/mapper.ts @@ -36,7 +36,7 @@ function createIndexedReader ( get: (attr: string) => { const realAttr = hierarchy.findAttribute(_class, attr) if (realAttr !== undefined) { - return doc.attributes[docKey(attr, { refAttribute, _class: realAttr.attributeOf })] + return doc.attributes[docKey(attr, { _class: realAttr.attributeOf })] } return undefined },