Optimize elastic rebuild & minor fix (#1451)

Signed-off-by: Denis Bykhov <80476319+BykhovDenis@users.noreply.github.com>
This commit is contained in:
Denis Bykhov 2022-04-19 15:50:01 +06:00 committed by GitHub
parent 8e43fcec51
commit 3b981931f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 95 additions and 99 deletions

View File

@ -1,6 +1,5 @@
// //
// Copyright © 2020, 2021 Anticrm Platform Contributors. // Copyright © 2022 Hardcore Engineering Inc.
// Copyright © 2021 Hardcore Engineering Inc.
// //
// Licensed under the Eclipse Public License, Version 2.0 (the "License"); // Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may // you may not use this file except in compliance with the License. You may
@ -16,7 +15,6 @@
import { Attachment } from '@anticrm/attachment' import { Attachment } from '@anticrm/attachment'
import core, { import core, {
Account,
AttachedDoc, AttachedDoc,
Class, Class,
Doc, DocumentQuery, Doc, DocumentQuery,
@ -39,32 +37,33 @@ import core, {
import { createElasticAdapter } from '@anticrm/elastic' import { createElasticAdapter } from '@anticrm/elastic'
import { DOMAIN_ATTACHMENT } from '@anticrm/model-attachment' import { DOMAIN_ATTACHMENT } from '@anticrm/model-attachment'
import { createMongoAdapter, createMongoTxAdapter } from '@anticrm/mongo' import { createMongoAdapter, createMongoTxAdapter } from '@anticrm/mongo'
import { addLocation } from '@anticrm/platform'
import { serverAttachmentId } from '@anticrm/server-attachment'
import { serverCalendarId } from '@anticrm/server-calendar'
import { serverChunterId } from '@anticrm/server-chunter'
import { serverContactId } from '@anticrm/server-contact'
import { import {
createServerStorage, createServerStorage,
DbAdapter, DbAdapter,
DbConfiguration, DbConfiguration,
FullTextAdapter, FullTextAdapter,
FullTextIndex,
IndexedDoc, IndexedDoc,
TxAdapter TxAdapter
} from '@anticrm/server-core' } from '@anticrm/server-core'
import { serverAttachmentId } from '@anticrm/server-attachment' import { serverGmailId } from '@anticrm/server-gmail'
import { serverContactId } from '@anticrm/server-contact'
import { serverNotificationId } from '@anticrm/server-notification'
import { serverSettingId } from '@anticrm/server-setting'
import { serverChunterId } from '@anticrm/server-chunter'
import { serverInventoryId } from '@anticrm/server-inventory' import { serverInventoryId } from '@anticrm/server-inventory'
import { serverLeadId } from '@anticrm/server-lead' import { serverLeadId } from '@anticrm/server-lead'
import { serverNotificationId } from '@anticrm/server-notification'
import { serverRecruitId } from '@anticrm/server-recruit' import { serverRecruitId } from '@anticrm/server-recruit'
import { serverTaskId } from '@anticrm/server-task' import { serverSettingId } from '@anticrm/server-setting'
import { serverTagsId } from '@anticrm/server-tags' import { serverTagsId } from '@anticrm/server-tags'
import { serverCalendarId } from '@anticrm/server-calendar' import { serverTaskId } from '@anticrm/server-task'
import { serverGmailId } from '@anticrm/server-gmail'
import { serverTelegramId } from '@anticrm/server-telegram' import { serverTelegramId } from '@anticrm/server-telegram'
import { Client as ElasticClient } from '@elastic/elasticsearch' import { Client as ElasticClient } from '@elastic/elasticsearch'
import { Client } from 'minio' import { Client } from 'minio'
import { Db, MongoClient } from 'mongodb' import { Db, MongoClient } from 'mongodb'
import { listMinioObjects } from './minio' import { listMinioObjects } from './minio'
import { addLocation } from '@anticrm/platform'
export async function rebuildElastic ( export async function rebuildElastic (
mongoUrl: string, mongoUrl: string,
@ -112,20 +111,22 @@ export class ElasticTool {
elastic!: FullTextAdapter & {close: () => Promise<void>} elastic!: FullTextAdapter & {close: () => Promise<void>}
storage!: ServerStorage storage!: ServerStorage
db!: Db db!: Db
fulltext!: FullTextIndex
constructor (readonly mongoUrl: string, readonly dbName: string, readonly minio: Client, readonly elasticUrl: string) { constructor (readonly mongoUrl: string, readonly dbName: string, readonly minio: Client, readonly elasticUrl: string) {
addLocation(serverAttachmentId, () => import('@anticrm/server-attachment-resources')) addLocation(serverAttachmentId, () => import('@anticrm/server-attachment-resources'))
addLocation(serverContactId, () => import('@anticrm/server-contact-resources')) addLocation(serverContactId, () => import('@anticrm/server-contact-resources'))
addLocation(serverNotificationId, () => import('@anticrm/server-notification-resources')) addLocation(serverNotificationId, () => import('@anticrm/server-notification-resources'))
addLocation(serverChunterId, () => import(/* webpackChunkName: "server-chunter" */ '@anticrm/server-chunter-resources')) addLocation(serverChunterId, () => import('@anticrm/server-chunter-resources'))
addLocation(serverInventoryId, () => import(/* webpackChunkName: "server-inventory" */ '@anticrm/server-inventory-resources')) addLocation(serverInventoryId, () => import('@anticrm/server-inventory-resources'))
addLocation(serverLeadId, () => import(/* webpackChunkName: "server-lead" */ '@anticrm/server-lead-resources')) addLocation(serverLeadId, () => import('@anticrm/server-lead-resources'))
addLocation(serverRecruitId, () => import(/* webpackChunkName: "server-recruit" */ '@anticrm/server-recruit-resources')) addLocation(serverRecruitId, () => import('@anticrm/server-recruit-resources'))
addLocation(serverSettingId, () => import(/* webpackChunkName: "server-recruit" */ '@anticrm/server-setting-resources')) addLocation(serverSettingId, () => import('@anticrm/server-setting-resources'))
addLocation(serverTaskId, () => import/* webpackChunkName: "server-task" */ ('@anticrm/server-task-resources')) addLocation(serverTaskId, () => import('@anticrm/server-task-resources'))
addLocation(serverTagsId, () => import/* webpackChunkName: "server-tags" */ ('@anticrm/server-tags-resources')) addLocation(serverTagsId, () => import('@anticrm/server-tags-resources'))
addLocation(serverCalendarId, () => import/* webpackChunkName: "server-calendar" */ ('@anticrm/server-calendar-resources')) addLocation(serverCalendarId, () => import('@anticrm/server-calendar-resources'))
addLocation(serverGmailId, () => import/* webpackChunkName: "server-gmail" */ ('@anticrm/server-gmail-resources')) addLocation(serverGmailId, () => import('@anticrm/server-gmail-resources'))
addLocation(serverTelegramId, () => import/* webpackChunkName: "server-telegram" */ ('@anticrm/server-telegram-resources')) addLocation(serverTelegramId, () => import('@anticrm/server-telegram-resources'))
this.mongoClient = new MongoClient(mongoUrl) this.mongoClient = new MongoClient(mongoUrl)
} }
@ -135,6 +136,7 @@ export class ElasticTool {
this.db = this.mongoClient.db(this.dbName) this.db = this.mongoClient.db(this.dbName)
this.elastic = await createElasticAdapter(this.elasticUrl, this.dbName) this.elastic = await createElasticAdapter(this.elasticUrl, this.dbName)
this.storage = await createStorage(this.mongoUrl, this.elasticUrl, this.dbName) this.storage = await createStorage(this.mongoUrl, this.elasticUrl, this.dbName)
this.fulltext = new FullTextIndex(this.storage.hierarchy, this.elastic, this.storage, true)
return async () => { return async () => {
await this.mongoClient.close() await this.mongoClient.close()
@ -161,7 +163,7 @@ export class ElasticTool {
_class: doc._class, _class: doc._class,
space: doc.space, space: doc.space,
modifiedOn: doc.modifiedOn, modifiedOn: doc.modifiedOn,
modifiedBy: 'core:account:System' as Ref<Account>, modifiedBy: core.account.System,
attachedTo: doc.attachedTo, attachedTo: doc.attachedTo,
data: buffer.toString('base64') data: buffer.toString('base64')
} }
@ -196,90 +198,69 @@ async function restoreElastic (mongoUrl: string, dbName: string, minio: Client,
const data = (await tool.db.collection<Tx>(DOMAIN_TX).find().toArray()) const data = (await tool.db.collection<Tx>(DOMAIN_TX).find().toArray())
const m = newMetrics() const m = newMetrics()
const metricsCtx = new MeasureMetricsContext('elastic', {}, m) const metricsCtx = new MeasureMetricsContext('elastic', {}, m)
console.log('replay elastic transactions', data.length)
let pos = 0
let p = Date.now()
const isCreateTx = (tx: Tx): boolean => tx._class === core.class.TxCreateDoc const isCreateTx = (tx: Tx): boolean => tx._class === core.class.TxCreateDoc
const isMixinTx = (tx: Tx): boolean => tx._class === core.class.TxMixin || (tx._class === core.class.TxCollectionCUD && (tx as TxCollectionCUD<Doc, AttachedDoc>).tx._class === core.class.TxMixin)
const isCollectionCreateTx = (tx: Tx): boolean => tx._class === core.class.TxCollectionCUD && (tx as TxCollectionCUD<Doc, AttachedDoc>).tx._class === core.class.TxCreateDoc const isCollectionCreateTx = (tx: Tx): boolean => tx._class === core.class.TxCollectionCUD && (tx as TxCollectionCUD<Doc, AttachedDoc>).tx._class === core.class.TxCreateDoc
const tdata = data.filter(tx => isCreateTx(tx) || isMixinTx(tx) || isCollectionCreateTx(tx)) const createTxes = data.filter((tx) => isCreateTx(tx))
const collectionTxes = data.filter((tx) => isCollectionCreateTx(tx))
const removedDocument = new Set<Ref<Doc>>() const removedDocument = new Set<Ref<Doc>>()
for (const tx of tdata) {
pos++
if (pos % 5000 === 0) {
console.log('replay elastic transactions', pos, tdata.length, Date.now() - p)
p = Date.now()
}
if (isCreateTx(tx)) {
const createTx = tx as TxCreateDoc<Doc>
const docSnapshot = (await tool.storage.findAll(metricsCtx, createTx.objectClass, { _id: createTx.objectId }, { limit: 1 })).shift()
if (docSnapshot !== undefined) {
// If there is no doc, then it is removed, not need to do something with elastic.
const { _class, _id, modifiedBy, modifiedOn, space, ...docData } = docSnapshot
try {
const newTx: TxCreateDoc<Doc> = {
...createTx,
attributes: docData,
modifiedBy,
modifiedOn,
objectSpace: space // <- it could be moved, let's take actual one.
}
await tool.storage.tx(metricsCtx, newTx)
} catch (err: any) {
console.error('failed to replay tx', tx, err.message)
}
} else {
removedDocument.add(createTx.objectId)
}
}
// We need process mixins. const startCreate = Date.now()
if (isMixinTx(tx)) { console.log('replay elastic create transactions', createTxes.length)
await Promise.all(createTxes.map(async (tx) => {
const createTx = tx as TxCreateDoc<Doc>
const docSnapshot = (await tool.storage.findAll(metricsCtx, createTx.objectClass, { _id: createTx.objectId }, { limit: 1 })).shift()
if (docSnapshot !== undefined) {
// If there is no doc, then it is removed, not need to do something with elastic.
const { _class, _id, modifiedBy, modifiedOn, space, ...docData } = docSnapshot
try { try {
let deleted = false const newTx: TxCreateDoc<Doc> = {
if (tx._class === core.class.TxMixin) { ...createTx,
deleted = removedDocument.has((tx as TxMixin<Doc, Doc>).objectId) attributes: docData,
modifiedBy,
modifiedOn,
objectSpace: space // <- it could be moved, let's take actual one.
} }
if (tx._class === core.class.TxCollectionCUD && (tx as TxCollectionCUD<Doc, AttachedDoc>).tx._class === core.class.TxMixin) { await tool.fulltext.tx(metricsCtx, newTx)
deleted = removedDocument.has((tx as TxCollectionCUD<Doc, AttachedDoc>).tx.objectId) } catch (err: any) {
} console.error('failed to replay tx', tx, err.message)
if (!deleted) { }
await tool.storage.tx(metricsCtx, tx) } else {
removedDocument.add(createTx.objectId)
}
}))
console.log('replay elastic create transactions done', Date.now() - startCreate)
const startCollection = Date.now()
console.log('replay elastic collection transactions', collectionTxes.length)
await Promise.all(collectionTxes.map(async (tx) => {
const collTx = tx as TxCollectionCUD<Doc, AttachedDoc>
const createTx = collTx.tx as unknown as TxCreateDoc<AttachedDoc>
const docSnapshot = (await tool.storage.findAll(metricsCtx, createTx.objectClass, { _id: createTx.objectId }, { limit: 1 })).shift() as AttachedDoc
if (docSnapshot !== undefined) {
// If there is no doc, then it is removed, not need to do something with elastic.
const { _class, _id, modifiedBy, modifiedOn, space, ...data } = docSnapshot
try {
const newTx: TxCreateDoc<AttachedDoc> = {
...createTx,
attributes: data,
modifiedBy,
modifiedOn,
objectSpace: space // <- it could be moved, let's take actual one.
} }
collTx.tx = newTx
collTx.modifiedBy = modifiedBy
collTx.modifiedOn = modifiedOn
collTx.objectSpace = space
await tool.fulltext.tx(metricsCtx, collTx)
} catch (err: any) { } catch (err: any) {
console.error('failed to replay tx', tx, err.message) console.error('failed to replay tx', tx, err.message)
} }
} }
}))
console.log('replay elastic collection transactions done', Date.now() - startCollection)
// We need process collection creations.
if (isCollectionCreateTx(tx)) {
const collTx = tx as TxCollectionCUD<Doc, AttachedDoc>
const createTx = collTx.tx as unknown as TxCreateDoc<AttachedDoc>
const docSnapshot = (await tool.storage.findAll(metricsCtx, createTx.objectClass, { _id: createTx.objectId }, { limit: 1 })).shift() as AttachedDoc
if (docSnapshot !== undefined) {
// If there is no doc, then it is removed, not need to do something with elastic.
const { _class, _id, modifiedBy, modifiedOn, space, ...data } = docSnapshot
try {
const newTx: TxCreateDoc<AttachedDoc> = {
...createTx,
attributes: data,
modifiedBy,
modifiedOn,
objectSpace: space // <- it could be moved, let's take actual one.
}
collTx.tx = newTx
collTx.modifiedBy = modifiedBy
collTx.modifiedOn = modifiedOn
collTx.objectSpace = space
await tool.storage.tx(metricsCtx, collTx)
} catch (err: any) {
console.error('failed to replay tx', tx, err.message)
}
}
}
}
let apos = 0 let apos = 0
if (await minio.bucketExists(dbName)) { if (await minio.bucketExists(dbName)) {
const minioObjects = await listMinioObjects(minio, dbName) const minioObjects = await listMinioObjects(minio, dbName)

View File

@ -170,13 +170,15 @@ export class FullTextIndex implements WithFind {
} else { } else {
const result: T[] = [] const result: T[] = []
const size = options.limit const size = options.limit
let start = 0
while (true) { while (true) {
const ids = resultIds.splice(0, size) const ids = resultIds.slice(start, start + size)
const res = await this.getResult(ctx, _class, ids, mainQuery as DocumentQuery<T>, options) const res = await this.getResult(ctx, _class, ids, mainQuery as DocumentQuery<T>, options)
result.push(...res) result.push(...res)
if (result.length >= size || res.length < size) { if (result.length >= size || res.length < size) {
break break
} }
start += size
} }
if (result.length >= size) { if (result.length >= size) {
const total = await this.getResult(ctx, _class, resultIds, mainQuery as DocumentQuery<T>, { limit: 1 }) const total = await this.getResult(ctx, _class, resultIds, mainQuery as DocumentQuery<T>, { limit: 1 })

View File

@ -31,8 +31,9 @@ class ElasticAdapter implements FullTextAdapter {
bool: { bool: {
must: [ must: [
{ {
query_string: { simple_query_string: {
query: query.$search, query: query.$search,
flags: 'OR|PREFIX|PHRASE',
default_operator: 'and' default_operator: 'and'
} }
} }
@ -43,11 +44,23 @@ class ElasticAdapter implements FullTextAdapter {
_class: _classes.map((c) => c.toLowerCase()), _class: _classes.map((c) => c.toLowerCase()),
boost: 10.0 boost: 10.0
} }
}, }
],
filter: [
{ {
terms: { bool: {
attachedToClass: _classes.map((c) => c.toLowerCase()), should: [
boost: 5.0 {
terms: {
_class: _classes.map((c) => c.toLowerCase())
}
},
{
terms: {
attachedToClass: _classes.map((c) => c.toLowerCase())
}
}
]
} }
} }
] ]