improve reindexing speed (#5141)

This commit is contained in:
Vyacheslav Tumanov 2024-04-02 18:02:37 +05:00 committed by GitHub
parent 7a7ea1e92b
commit 87ff5e9134
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 5 additions and 69 deletions

View File

@ -145,8 +145,6 @@ export interface IndexKeyOptions {
_class?: Ref<Class<Obj>>
docId?: Ref<DocIndexState>
extra?: string[]
relative?: boolean
refAttribute?: string
}
/**
* @public

View File

@ -14,7 +14,6 @@
//
import core, {
type AnyAttribute,
type ArrOf,
type Class,
type Doc,
@ -24,7 +23,6 @@ import core, {
extractDocKey,
type Hierarchy,
isFullTextAttribute,
isIndexedAttribute,
type MeasureContext,
type Ref,
type ServerStorage,
@ -42,14 +40,7 @@ import {
type FullTextPipelineStage,
fullTextPushStageId
} from './types'
import {
collectPropagate,
collectPropagateClasses,
docKey,
getFullTextContext,
type IndexKeyOptions,
isCustomAttr
} from './utils'
import { collectPropagate, collectPropagateClasses, docKey, getFullTextContext, isCustomAttr } from './utils'
/**
* @public
@ -108,52 +99,6 @@ export class FullTextPushStage implements FullTextPipelineStage {
return { docs: [], pass: true }
}
async indexRefAttributes (
attributes: Map<string, AnyAttribute>,
doc: DocIndexState,
elasticDoc: IndexedDoc,
metrics: MeasureContext
): Promise<void> {
for (const attribute in doc.attributes) {
const { attr } = extractDocKey(attribute)
const attrObj = attributes.get(attr)
if (
attrObj !== null &&
attrObj !== undefined &&
isIndexedAttribute(attrObj) &&
(attrObj.type._class === core.class.RefTo ||
(attrObj.type._class === core.class.ArrOf && (attrObj.type as ArrOf<any>).of._class === core.class.RefTo))
) {
const attrStringValue = doc.attributes[attribute]
if (attrStringValue !== undefined && attrStringValue !== null && attrStringValue !== '') {
const refs: Ref<DocIndexState>[] = attrStringValue.split(',')
if (refs.length > 0) {
const refDocs = await metrics.with(
'ref-docs',
{},
async (ctx) =>
await this.dbStorage.findAll(
ctx,
core.class.DocIndexState,
{
_id: refs.length === 1 ? refs[0] : { $in: refs }
},
{ limit: refs.length }
)
)
if (refDocs.length > 0) {
for (const ref of refDocs) {
await metrics.with('updateDoc2Elastic', {}, async (ctx) => {
updateDoc2Elastic(ref.attributes, elasticDoc, ref._id, attribute)
})
}
}
}
}
}
}
}
async collect (toIndex: DocIndexState[], pipeline: FullTextPipeline, ctx: MeasureContext): Promise<void> {
const bulk: IndexedDoc[] = []
@ -243,11 +188,6 @@ export class FullTextPushStage implements FullTextPipelineStage {
})
)
const allAttributes = pipeline.hierarchy.getAllAttributes(doc.objectClass)
// Include child ref attributes
await this.indexRefAttributes(allAttributes, doc, elasticDoc, ctx)
await updateDocWithPresenter(pipeline.hierarchy, doc, elasticDoc, { parentDoc, spaceDoc })
this.checkIntegrity(elasticDoc)
@ -349,11 +289,7 @@ function updateDoc2Elastic (
}
continue
}
const docKeyOpts: IndexKeyOptions = { _class, relative: true, extra: extra.filter((it) => it !== 'base64') }
if (refAttribute !== undefined) {
docKeyOpts.refAttribute = refAttribute
}
const docIdAttr = docKey(attr, docKeyOpts)
const docIdAttr = docKey(attr, { _class, extra: extra.filter((it) => it !== 'base64') })
if (vv !== null) {
// Since we replace array of values, we could ignore null
doc[docIdAttr] =

View File

@ -27,6 +27,7 @@ import core, {
type ModelDb,
type Ref,
type ServerStorage,
SortingOrder,
TxFactory,
type WorkspaceId,
_getOperator,
@ -443,6 +444,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
removed: false
},
{
sort: { modifiedOn: SortingOrder.Descending },
limit: globalIndexer.processingSize
}
)

View File

@ -36,7 +36,7 @@ function createIndexedReader (
get: (attr: string) => {
const realAttr = hierarchy.findAttribute(_class, attr)
if (realAttr !== undefined) {
return doc.attributes[docKey(attr, { refAttribute, _class: realAttr.attributeOf })]
return doc.attributes[docKey(attr, { _class: realAttr.attributeOf })]
}
return undefined
},