UBERF-9226 Workspace reindex tool

Signed-off-by: Alexander Onnikov <Alexander.Onnikov@xored.com>
This commit is contained in:
Alexander Onnikov 2025-01-22 16:46:16 +07:00
parent 0534c2b90e
commit c9a2d0c5d0
No known key found for this signature in database
GPG Key ID: 3320C3B3324E934C
5 changed files with 106 additions and 24 deletions

33
dev/tool/src/fulltext.ts Normal file
View File

@ -0,0 +1,33 @@
//
// Copyright © 2025 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { type MeasureContext } from '@hcengineering/core'
export async function reindexWorkspace (ctx: MeasureContext, fulltextUrl: string, token: string): Promise<void> {
try {
const res = await fetch(fulltextUrl + '/api/v1/reindex', {
method: 'PUT',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ token })
})
if (!res.ok) {
throw new Error(`HTTP Error ${res.status} ${res.statusText}`)
}
} catch (err: any) {
ctx.error('failed to reset index', { err })
}
}

View File

@ -77,7 +77,7 @@ import { buildStorageFromConfig, createStorageFromConfig, storageConfigFromEnv }
import { program, type Command } from 'commander' import { program, type Command } from 'commander'
import { addControlledDocumentRank } from './qms' import { addControlledDocumentRank } from './qms'
import { clearTelegramHistory } from './telegram' import { clearTelegramHistory } from './telegram'
import { diffWorkspace, recreateElastic, updateField } from './workspace' import { diffWorkspace, updateField } from './workspace'
import core, { import core, {
AccountRole, AccountRole,
@ -149,6 +149,7 @@ import { fixMixinForeignAttributes, showMixinForeignAttributes } from './mixin'
import { fixAccountEmails, renameAccount } from './renameAccount' import { fixAccountEmails, renameAccount } from './renameAccount'
import { copyToDatalake, moveFiles, showLostFiles } from './storage' import { copyToDatalake, moveFiles, showLostFiles } from './storage'
import { createPostgresTxAdapter, createPostgresAdapter, createPostgreeDestroyAdapter } from '@hcengineering/postgres' import { createPostgresTxAdapter, createPostgresAdapter, createPostgreeDestroyAdapter } from '@hcengineering/postgres'
import { reindexWorkspace } from './fulltext'
const colorConstants = { const colorConstants = {
colorRed: '\u001b[31m', colorRed: '\u001b[31m',
@ -1925,27 +1926,43 @@ export function devTool (
) )
program program
.command('recreate-elastic-indexes-mongo <workspace>') .command('fulltext-reindex <workspace>')
.description('reindex workspace to elastic') .description('reindex workspace')
.action(async (workspace: string) => { .action(async (workspace: string) => {
const mongodbUri = getMongoDBUrl() const fulltextUrl = process.env.FULLTEXT_URL
if (fulltextUrl === undefined) {
console.error('please provide FULLTEXT_URL')
process.exit(1)
}
const wsid = getWorkspaceId(workspace) const wsid = getWorkspaceId(workspace)
await recreateElastic(mongodbUri, wsid) const token = generateToken(systemAccountEmail, wsid)
console.log('reindex workspace', workspace)
await reindexWorkspace(toolCtx, fulltextUrl, token)
console.log('done', workspace)
}) })
program program
.command('recreate-all-elastic-indexes-mongo') .command('fulltext-reindex-all')
.description('reindex elastic') .description('reindex workspaces')
.action(async () => { .action(async () => {
const { dbUrl } = prepareTools() const fulltextUrl = process.env.FULLTEXT_URL
const mongodbUri = getMongoDBUrl() if (fulltextUrl === undefined) {
console.error('please provide FULLTEXT_URL')
process.exit(1)
}
await withAccountDatabase(async (db) => { await withAccountDatabase(async (db) => {
const workspaces = await listWorkspacesRaw(db) const workspaces = await listWorkspacesRaw(db)
workspaces.sort((a, b) => b.lastVisit - a.lastVisit) workspaces.sort((a, b) => b.lastVisit - a.lastVisit)
for (const workspace of workspaces) { for (const workspace of workspaces) {
const wsid = getWorkspaceId(workspace.workspace) const wsid = getWorkspaceId(workspace.workspace)
await recreateElastic(mongodbUri ?? dbUrl, wsid) const token = generateToken(systemAccountEmail, wsid)
console.log('reindex workspace', workspace)
await reindexWorkspace(toolCtx, fulltextUrl, token)
console.log('done', workspace)
} }
}) })
}) })

View File

@ -20,7 +20,6 @@ import core, {
type Class, type Class,
type Client as CoreClient, type Client as CoreClient,
type Doc, type Doc,
DOMAIN_DOC_INDEX_STATE,
DOMAIN_TX, DOMAIN_TX,
type Ref, type Ref,
type Tx, type Tx,
@ -96,16 +95,3 @@ export async function updateField (
await connection.close() await connection.close()
} }
} }
export async function recreateElastic (mongoUrl: string, workspaceId: WorkspaceId): Promise<void> {
const client = getMongoClient(mongoUrl)
const _client = await client.getClient()
try {
const db = getWorkspaceMongoDB(_client, workspaceId)
await db
.collection(DOMAIN_DOC_INDEX_STATE)
.updateMany({ _class: core.class.DocIndexState }, { $set: { needIndex: true } })
} finally {
client.close()
}
}

View File

@ -161,6 +161,14 @@ class WorkspaceIndexer {
return result return result
} }
async reindex (): Promise<void> {
await this.fulltext.cancel()
await this.fulltext.clearIndex()
await this.fulltext.startIndexing(() => {
this.lastUpdate = Date.now()
})
}
async close (): Promise<void> { async close (): Promise<void> {
await this.fulltext.cancel() await this.fulltext.cancel()
await this.pipeline.close() await this.pipeline.close()
@ -188,6 +196,10 @@ interface Search {
fullTextLimit: number fullTextLimit: number
} }
interface Reindex {
token: string
}
export async function startIndexer ( export async function startIndexer (
ctx: MeasureContext, ctx: MeasureContext,
opt: { opt: {
@ -391,6 +403,26 @@ export async function startIndexer (
} }
}) })
router.put('/api/v1/reindex', async (req, res) => {
try {
const request = req.request.body as Reindex
const decoded = decodeToken(request.token) // Just to be safe
req.body = {}
ctx.info('reindex', { workspace: decoded.workspace })
const indexer = await getIndexer(ctx, decoded.workspace, request.token, true)
if (indexer !== undefined) {
indexer.lastUpdate = Date.now()
await indexer.reindex()
}
} catch (err: any) {
Analytics.handleError(err)
console.error(err)
req.res.writeHead(404, {})
req.res.end()
}
})
app.use(router.routes()).use(router.allowedMethods()) app.use(router.routes()).use(router.allowedMethods())
const server = app.listen(opt.port, () => { const server = app.listen(opt.port, () => {

View File

@ -165,6 +165,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
triggerIndexing = (): void => {} triggerIndexing = (): void => {}
async startIndexing (indexing: () => void): Promise<void> { async startIndexing (indexing: () => void): Promise<void> {
this.cancelling = false
this.verify = this.verifyWorkspace(this.metrics, indexing) this.verify = this.verifyWorkspace(this.metrics, indexing)
void this.verify.then(() => { void this.verify.then(() => {
this.indexing = this.doIndexing(indexing) this.indexing = this.doIndexing(indexing)
@ -282,6 +283,19 @@ export class FullTextIndexPipeline implements FullTextPipeline {
} }
} }
async clearIndex (): Promise<void> {
const ctx = this.metrics
const migrations = await this.storage.findAll<MigrationState>(ctx, core.class.MigrationState, {
plugin: coreId,
state: {
$in: ['verify-indexes-v2', 'full-text-indexer-v4', 'full-text-structure-v4']
}
})
const refs = migrations.map((it) => it._id)
await this.storage.clean(ctx, DOMAIN_MIGRATION, refs)
}
broadcastClasses = new Set<Ref<Class<Doc>>>() broadcastClasses = new Set<Ref<Class<Doc>>>()
broadcasts: number = 0 broadcasts: number = 0