UBERF-9226 Workspace reindex tool

Signed-off-by: Alexander Onnikov <Alexander.Onnikov@xored.com>
This commit is contained in:
Alexander Onnikov 2025-01-22 16:46:16 +07:00
parent 0534c2b90e
commit c9a2d0c5d0
No known key found for this signature in database
GPG Key ID: 3320C3B3324E934C
5 changed files with 106 additions and 24 deletions

33
dev/tool/src/fulltext.ts Normal file
View File

@ -0,0 +1,33 @@
//
// Copyright © 2025 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { type MeasureContext } from '@hcengineering/core'
export async function reindexWorkspace (ctx: MeasureContext, fulltextUrl: string, token: string): Promise<void> {
try {
const res = await fetch(fulltextUrl + '/api/v1/reindex', {
method: 'PUT',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ token })
})
if (!res.ok) {
throw new Error(`HTTP Error ${res.status} ${res.statusText}`)
}
} catch (err: any) {
ctx.error('failed to reset index', { err })
}
}

View File

@ -77,7 +77,7 @@ import { buildStorageFromConfig, createStorageFromConfig, storageConfigFromEnv }
import { program, type Command } from 'commander'
import { addControlledDocumentRank } from './qms'
import { clearTelegramHistory } from './telegram'
import { diffWorkspace, recreateElastic, updateField } from './workspace'
import { diffWorkspace, updateField } from './workspace'
import core, {
AccountRole,
@ -149,6 +149,7 @@ import { fixMixinForeignAttributes, showMixinForeignAttributes } from './mixin'
import { fixAccountEmails, renameAccount } from './renameAccount'
import { copyToDatalake, moveFiles, showLostFiles } from './storage'
import { createPostgresTxAdapter, createPostgresAdapter, createPostgreeDestroyAdapter } from '@hcengineering/postgres'
import { reindexWorkspace } from './fulltext'
const colorConstants = {
colorRed: '\u001b[31m',
@ -1925,27 +1926,43 @@ export function devTool (
)
program
.command('recreate-elastic-indexes-mongo <workspace>')
.description('reindex workspace to elastic')
.command('fulltext-reindex <workspace>')
.description('reindex workspace')
.action(async (workspace: string) => {
const mongodbUri = getMongoDBUrl()
const fulltextUrl = process.env.FULLTEXT_URL
if (fulltextUrl === undefined) {
console.error('please provide FULLTEXT_URL')
process.exit(1)
}
const wsid = getWorkspaceId(workspace)
await recreateElastic(mongodbUri, wsid)
const token = generateToken(systemAccountEmail, wsid)
console.log('reindex workspace', workspace)
await reindexWorkspace(toolCtx, fulltextUrl, token)
console.log('done', workspace)
})
program
.command('recreate-all-elastic-indexes-mongo')
.description('reindex elastic')
.command('fulltext-reindex-all')
.description('reindex workspaces')
.action(async () => {
const { dbUrl } = prepareTools()
const mongodbUri = getMongoDBUrl()
const fulltextUrl = process.env.FULLTEXT_URL
if (fulltextUrl === undefined) {
console.error('please provide FULLTEXT_URL')
process.exit(1)
}
await withAccountDatabase(async (db) => {
const workspaces = await listWorkspacesRaw(db)
workspaces.sort((a, b) => b.lastVisit - a.lastVisit)
for (const workspace of workspaces) {
const wsid = getWorkspaceId(workspace.workspace)
await recreateElastic(mongodbUri ?? dbUrl, wsid)
const token = generateToken(systemAccountEmail, wsid)
console.log('reindex workspace', workspace)
await reindexWorkspace(toolCtx, fulltextUrl, token)
console.log('done', workspace)
}
})
})

View File

@ -20,7 +20,6 @@ import core, {
type Class,
type Client as CoreClient,
type Doc,
DOMAIN_DOC_INDEX_STATE,
DOMAIN_TX,
type Ref,
type Tx,
@ -96,16 +95,3 @@ export async function updateField (
await connection.close()
}
}
export async function recreateElastic (mongoUrl: string, workspaceId: WorkspaceId): Promise<void> {
const client = getMongoClient(mongoUrl)
const _client = await client.getClient()
try {
const db = getWorkspaceMongoDB(_client, workspaceId)
await db
.collection(DOMAIN_DOC_INDEX_STATE)
.updateMany({ _class: core.class.DocIndexState }, { $set: { needIndex: true } })
} finally {
client.close()
}
}

View File

@ -161,6 +161,14 @@ class WorkspaceIndexer {
return result
}
async reindex (): Promise<void> {
await this.fulltext.cancel()
await this.fulltext.clearIndex()
await this.fulltext.startIndexing(() => {
this.lastUpdate = Date.now()
})
}
async close (): Promise<void> {
await this.fulltext.cancel()
await this.pipeline.close()
@ -188,6 +196,10 @@ interface Search {
fullTextLimit: number
}
interface Reindex {
token: string
}
export async function startIndexer (
ctx: MeasureContext,
opt: {
@ -391,6 +403,26 @@ export async function startIndexer (
}
})
router.put('/api/v1/reindex', async (req, res) => {
try {
const request = req.request.body as Reindex
const decoded = decodeToken(request.token) // Just to be safe
req.body = {}
ctx.info('reindex', { workspace: decoded.workspace })
const indexer = await getIndexer(ctx, decoded.workspace, request.token, true)
if (indexer !== undefined) {
indexer.lastUpdate = Date.now()
await indexer.reindex()
}
} catch (err: any) {
Analytics.handleError(err)
console.error(err)
req.res.writeHead(404, {})
req.res.end()
}
})
app.use(router.routes()).use(router.allowedMethods())
const server = app.listen(opt.port, () => {

View File

@ -165,6 +165,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
triggerIndexing = (): void => {}
async startIndexing (indexing: () => void): Promise<void> {
this.cancelling = false
this.verify = this.verifyWorkspace(this.metrics, indexing)
void this.verify.then(() => {
this.indexing = this.doIndexing(indexing)
@ -282,6 +283,19 @@ export class FullTextIndexPipeline implements FullTextPipeline {
}
}
async clearIndex (): Promise<void> {
const ctx = this.metrics
const migrations = await this.storage.findAll<MigrationState>(ctx, core.class.MigrationState, {
plugin: coreId,
state: {
$in: ['verify-indexes-v2', 'full-text-indexer-v4', 'full-text-structure-v4']
}
})
const refs = migrations.map((it) => it._id)
await this.storage.clean(ctx, DOMAIN_MIGRATION, refs)
}
broadcastClasses = new Set<Ref<Class<Doc>>>()
broadcasts: number = 0