Optimize elastic rebuild & minor fix (#1451)

Signed-off-by: Denis Bykhov <80476319+BykhovDenis@users.noreply.github.com>
This commit is contained in:
Denis Bykhov 2022-04-19 15:50:01 +06:00 committed by GitHub
parent 8e43fcec51
commit 3b981931f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 95 additions and 99 deletions

View File

@ -1,6 +1,5 @@
//
// Copyright © 2020, 2021 Anticrm Platform Contributors.
// Copyright © 2021 Hardcore Engineering Inc.
// Copyright © 2022 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
@ -16,7 +15,6 @@
import { Attachment } from '@anticrm/attachment'
import core, {
Account,
AttachedDoc,
Class,
Doc, DocumentQuery,
@ -39,32 +37,33 @@ import core, {
import { createElasticAdapter } from '@anticrm/elastic'
import { DOMAIN_ATTACHMENT } from '@anticrm/model-attachment'
import { createMongoAdapter, createMongoTxAdapter } from '@anticrm/mongo'
import { addLocation } from '@anticrm/platform'
import { serverAttachmentId } from '@anticrm/server-attachment'
import { serverCalendarId } from '@anticrm/server-calendar'
import { serverChunterId } from '@anticrm/server-chunter'
import { serverContactId } from '@anticrm/server-contact'
import {
createServerStorage,
DbAdapter,
DbConfiguration,
FullTextAdapter,
FullTextIndex,
IndexedDoc,
TxAdapter
} from '@anticrm/server-core'
import { serverAttachmentId } from '@anticrm/server-attachment'
import { serverContactId } from '@anticrm/server-contact'
import { serverNotificationId } from '@anticrm/server-notification'
import { serverSettingId } from '@anticrm/server-setting'
import { serverChunterId } from '@anticrm/server-chunter'
import { serverGmailId } from '@anticrm/server-gmail'
import { serverInventoryId } from '@anticrm/server-inventory'
import { serverLeadId } from '@anticrm/server-lead'
import { serverNotificationId } from '@anticrm/server-notification'
import { serverRecruitId } from '@anticrm/server-recruit'
import { serverTaskId } from '@anticrm/server-task'
import { serverSettingId } from '@anticrm/server-setting'
import { serverTagsId } from '@anticrm/server-tags'
import { serverCalendarId } from '@anticrm/server-calendar'
import { serverGmailId } from '@anticrm/server-gmail'
import { serverTaskId } from '@anticrm/server-task'
import { serverTelegramId } from '@anticrm/server-telegram'
import { Client as ElasticClient } from '@elastic/elasticsearch'
import { Client } from 'minio'
import { Db, MongoClient } from 'mongodb'
import { listMinioObjects } from './minio'
import { addLocation } from '@anticrm/platform'
export async function rebuildElastic (
mongoUrl: string,
@ -112,20 +111,22 @@ export class ElasticTool {
elastic!: FullTextAdapter & {close: () => Promise<void>}
storage!: ServerStorage
db!: Db
fulltext!: FullTextIndex
constructor (readonly mongoUrl: string, readonly dbName: string, readonly minio: Client, readonly elasticUrl: string) {
addLocation(serverAttachmentId, () => import('@anticrm/server-attachment-resources'))
addLocation(serverContactId, () => import('@anticrm/server-contact-resources'))
addLocation(serverNotificationId, () => import('@anticrm/server-notification-resources'))
addLocation(serverChunterId, () => import(/* webpackChunkName: "server-chunter" */ '@anticrm/server-chunter-resources'))
addLocation(serverInventoryId, () => import(/* webpackChunkName: "server-inventory" */ '@anticrm/server-inventory-resources'))
addLocation(serverLeadId, () => import(/* webpackChunkName: "server-lead" */ '@anticrm/server-lead-resources'))
addLocation(serverRecruitId, () => import(/* webpackChunkName: "server-recruit" */ '@anticrm/server-recruit-resources'))
addLocation(serverSettingId, () => import(/* webpackChunkName: "server-recruit" */ '@anticrm/server-setting-resources'))
addLocation(serverTaskId, () => import/* webpackChunkName: "server-task" */ ('@anticrm/server-task-resources'))
addLocation(serverTagsId, () => import/* webpackChunkName: "server-tags" */ ('@anticrm/server-tags-resources'))
addLocation(serverCalendarId, () => import/* webpackChunkName: "server-calendar" */ ('@anticrm/server-calendar-resources'))
addLocation(serverGmailId, () => import/* webpackChunkName: "server-gmail" */ ('@anticrm/server-gmail-resources'))
addLocation(serverTelegramId, () => import/* webpackChunkName: "server-telegram" */ ('@anticrm/server-telegram-resources'))
addLocation(serverChunterId, () => import('@anticrm/server-chunter-resources'))
addLocation(serverInventoryId, () => import('@anticrm/server-inventory-resources'))
addLocation(serverLeadId, () => import('@anticrm/server-lead-resources'))
addLocation(serverRecruitId, () => import('@anticrm/server-recruit-resources'))
addLocation(serverSettingId, () => import('@anticrm/server-setting-resources'))
addLocation(serverTaskId, () => import('@anticrm/server-task-resources'))
addLocation(serverTagsId, () => import('@anticrm/server-tags-resources'))
addLocation(serverCalendarId, () => import('@anticrm/server-calendar-resources'))
addLocation(serverGmailId, () => import('@anticrm/server-gmail-resources'))
addLocation(serverTelegramId, () => import('@anticrm/server-telegram-resources'))
this.mongoClient = new MongoClient(mongoUrl)
}
@ -135,6 +136,7 @@ export class ElasticTool {
this.db = this.mongoClient.db(this.dbName)
this.elastic = await createElasticAdapter(this.elasticUrl, this.dbName)
this.storage = await createStorage(this.mongoUrl, this.elasticUrl, this.dbName)
this.fulltext = new FullTextIndex(this.storage.hierarchy, this.elastic, this.storage, true)
return async () => {
await this.mongoClient.close()
@ -161,7 +163,7 @@ export class ElasticTool {
_class: doc._class,
space: doc.space,
modifiedOn: doc.modifiedOn,
modifiedBy: 'core:account:System' as Ref<Account>,
modifiedBy: core.account.System,
attachedTo: doc.attachedTo,
data: buffer.toString('base64')
}
@ -196,90 +198,69 @@ async function restoreElastic (mongoUrl: string, dbName: string, minio: Client,
const data = (await tool.db.collection<Tx>(DOMAIN_TX).find().toArray())
const m = newMetrics()
const metricsCtx = new MeasureMetricsContext('elastic', {}, m)
console.log('replay elastic transactions', data.length)
let pos = 0
let p = Date.now()
const isCreateTx = (tx: Tx): boolean => tx._class === core.class.TxCreateDoc
const isMixinTx = (tx: Tx): boolean => tx._class === core.class.TxMixin || (tx._class === core.class.TxCollectionCUD && (tx as TxCollectionCUD<Doc, AttachedDoc>).tx._class === core.class.TxMixin)
const isCollectionCreateTx = (tx: Tx): boolean => tx._class === core.class.TxCollectionCUD && (tx as TxCollectionCUD<Doc, AttachedDoc>).tx._class === core.class.TxCreateDoc
const tdata = data.filter(tx => isCreateTx(tx) || isMixinTx(tx) || isCollectionCreateTx(tx))
const createTxes = data.filter((tx) => isCreateTx(tx))
const collectionTxes = data.filter((tx) => isCollectionCreateTx(tx))
const removedDocument = new Set<Ref<Doc>>()
for (const tx of tdata) {
pos++
if (pos % 5000 === 0) {
console.log('replay elastic transactions', pos, tdata.length, Date.now() - p)
p = Date.now()
}
if (isCreateTx(tx)) {
const createTx = tx as TxCreateDoc<Doc>
const docSnapshot = (await tool.storage.findAll(metricsCtx, createTx.objectClass, { _id: createTx.objectId }, { limit: 1 })).shift()
if (docSnapshot !== undefined) {
// If there is no doc, then it is removed, not need to do something with elastic.
const { _class, _id, modifiedBy, modifiedOn, space, ...docData } = docSnapshot
try {
const newTx: TxCreateDoc<Doc> = {
...createTx,
attributes: docData,
modifiedBy,
modifiedOn,
objectSpace: space // <- it could be moved, let's take actual one.
}
await tool.storage.tx(metricsCtx, newTx)
} catch (err: any) {
console.error('failed to replay tx', tx, err.message)
}
} else {
removedDocument.add(createTx.objectId)
}
}
// We need process mixins.
if (isMixinTx(tx)) {
const startCreate = Date.now()
console.log('replay elastic create transactions', createTxes.length)
await Promise.all(createTxes.map(async (tx) => {
const createTx = tx as TxCreateDoc<Doc>
const docSnapshot = (await tool.storage.findAll(metricsCtx, createTx.objectClass, { _id: createTx.objectId }, { limit: 1 })).shift()
if (docSnapshot !== undefined) {
// If there is no doc, then it is removed, not need to do something with elastic.
const { _class, _id, modifiedBy, modifiedOn, space, ...docData } = docSnapshot
try {
let deleted = false
if (tx._class === core.class.TxMixin) {
deleted = removedDocument.has((tx as TxMixin<Doc, Doc>).objectId)
const newTx: TxCreateDoc<Doc> = {
...createTx,
attributes: docData,
modifiedBy,
modifiedOn,
objectSpace: space // <- it could be moved, let's take actual one.
}
if (tx._class === core.class.TxCollectionCUD && (tx as TxCollectionCUD<Doc, AttachedDoc>).tx._class === core.class.TxMixin) {
deleted = removedDocument.has((tx as TxCollectionCUD<Doc, AttachedDoc>).tx.objectId)
}
if (!deleted) {
await tool.storage.tx(metricsCtx, tx)
await tool.fulltext.tx(metricsCtx, newTx)
} catch (err: any) {
console.error('failed to replay tx', tx, err.message)
}
} else {
removedDocument.add(createTx.objectId)
}
}))
console.log('replay elastic create transactions done', Date.now() - startCreate)
const startCollection = Date.now()
console.log('replay elastic collection transactions', collectionTxes.length)
await Promise.all(collectionTxes.map(async (tx) => {
const collTx = tx as TxCollectionCUD<Doc, AttachedDoc>
const createTx = collTx.tx as unknown as TxCreateDoc<AttachedDoc>
const docSnapshot = (await tool.storage.findAll(metricsCtx, createTx.objectClass, { _id: createTx.objectId }, { limit: 1 })).shift() as AttachedDoc
if (docSnapshot !== undefined) {
// If there is no doc, then it is removed, not need to do something with elastic.
const { _class, _id, modifiedBy, modifiedOn, space, ...data } = docSnapshot
try {
const newTx: TxCreateDoc<AttachedDoc> = {
...createTx,
attributes: data,
modifiedBy,
modifiedOn,
objectSpace: space // <- it could be moved, let's take actual one.
}
collTx.tx = newTx
collTx.modifiedBy = modifiedBy
collTx.modifiedOn = modifiedOn
collTx.objectSpace = space
await tool.fulltext.tx(metricsCtx, collTx)
} catch (err: any) {
console.error('failed to replay tx', tx, err.message)
}
}
}))
console.log('replay elastic collection transactions done', Date.now() - startCollection)
// We need process collection creations.
if (isCollectionCreateTx(tx)) {
const collTx = tx as TxCollectionCUD<Doc, AttachedDoc>
const createTx = collTx.tx as unknown as TxCreateDoc<AttachedDoc>
const docSnapshot = (await tool.storage.findAll(metricsCtx, createTx.objectClass, { _id: createTx.objectId }, { limit: 1 })).shift() as AttachedDoc
if (docSnapshot !== undefined) {
// If there is no doc, then it is removed, not need to do something with elastic.
const { _class, _id, modifiedBy, modifiedOn, space, ...data } = docSnapshot
try {
const newTx: TxCreateDoc<AttachedDoc> = {
...createTx,
attributes: data,
modifiedBy,
modifiedOn,
objectSpace: space // <- it could be moved, let's take actual one.
}
collTx.tx = newTx
collTx.modifiedBy = modifiedBy
collTx.modifiedOn = modifiedOn
collTx.objectSpace = space
await tool.storage.tx(metricsCtx, collTx)
} catch (err: any) {
console.error('failed to replay tx', tx, err.message)
}
}
}
}
let apos = 0
if (await minio.bucketExists(dbName)) {
const minioObjects = await listMinioObjects(minio, dbName)

View File

@ -170,13 +170,15 @@ export class FullTextIndex implements WithFind {
} else {
const result: T[] = []
const size = options.limit
let start = 0
while (true) {
const ids = resultIds.splice(0, size)
const ids = resultIds.slice(start, start + size)
const res = await this.getResult(ctx, _class, ids, mainQuery as DocumentQuery<T>, options)
result.push(...res)
if (result.length >= size || res.length < size) {
break
}
start += size
}
if (result.length >= size) {
const total = await this.getResult(ctx, _class, resultIds, mainQuery as DocumentQuery<T>, { limit: 1 })

View File

@ -31,8 +31,9 @@ class ElasticAdapter implements FullTextAdapter {
bool: {
must: [
{
query_string: {
simple_query_string: {
query: query.$search,
flags: 'OR|PREFIX|PHRASE',
default_operator: 'and'
}
}
@ -43,11 +44,23 @@ class ElasticAdapter implements FullTextAdapter {
_class: _classes.map((c) => c.toLowerCase()),
boost: 10.0
}
},
}
],
filter: [
{
terms: {
attachedToClass: _classes.map((c) => c.toLowerCase()),
boost: 5.0
bool: {
should: [
{
terms: {
_class: _classes.map((c) => c.toLowerCase())
}
},
{
terms: {
attachedToClass: _classes.map((c) => c.toLowerCase())
}
}
]
}
}
]