UBERF-9739: Try to fix backup hang (#8494)

Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
This commit is contained in:
Andrey Sobolev 2025-04-08 16:40:48 +07:00 committed by GitHub
parent 96d39ad728
commit 7f117a46e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 23 additions and 8 deletions

View File

@ -188,7 +188,7 @@ async function loadDigest (
result.delete(k as Ref<Doc>) result.delete(k as Ref<Doc>)
} }
} catch (err: any) { } catch (err: any) {
ctx.error('digest is broken, will do full backup for', { domain, err: err.message, snapshot }) ctx.warn('digest is broken, will do full backup for', { domain, err: err.message, snapshot })
} }
} }
// Stop if stop date is matched and provided // Stop if stop date is matched and provided
@ -377,7 +377,7 @@ async function updateDigest (
} catch (err: any) { } catch (err: any) {
digestToRemove.add(snapshot) digestToRemove.add(snapshot)
modifiedFiles.push(snapshot) modifiedFiles.push(snapshot)
ctx.error('digest is broken, will do full backup for', { domain, err: err.message, snapshot }) ctx.warn('digest is broken, will do full backup for', { domain, err: err.message, snapshot })
modified = true modified = true
} }
} }

View File

@ -40,9 +40,9 @@ import {
type StorageAdapter type StorageAdapter
} from '@hcengineering/server-core' } from '@hcengineering/server-core'
import { generateToken } from '@hcengineering/server-token' import { generateToken } from '@hcengineering/server-token'
import { clearInterval } from 'node:timers'
import { backup, restore } from '.' import { backup, restore } from '.'
import { createStorageBackupStorage } from './storage' import { createStorageBackupStorage } from './storage'
import { clearInterval } from 'node:timers'
export interface BackupConfig { export interface BackupConfig {
AccountsURL: string AccountsURL: string
Token: string Token: string
@ -208,6 +208,7 @@ class BackupWorker {
const rateLimiter = new RateLimiter(this.config.Parallel) const rateLimiter = new RateLimiter(this.config.Parallel)
const times: number[] = [] const times: number[] = []
const activeWorkspaces = new Set<string>()
const infoTo = setInterval(() => { const infoTo = setInterval(() => {
const avgTime = times.length > 0 ? Math.round(times.reduce((p, c) => p + c, 0) / times.length) / 1000 : 0 const avgTime = times.length > 0 ? Math.round(times.reduce((p, c) => p + c, 0) / times.length) / 1000 : 0
@ -218,7 +219,8 @@ class BackupWorker {
index, index,
Elapsed: (Date.now() - startTime) / 1000, Elapsed: (Date.now() - startTime) / 1000,
ETA: Math.round((workspaces.length - processed) * avgTime), ETA: Math.round((workspaces.length - processed) * avgTime),
active: rateLimiter.processingQueue.size activeLen: activeWorkspaces.size,
active: Array.from(activeWorkspaces).join(',')
}) })
}, 10000) }, 10000)
@ -226,6 +228,7 @@ class BackupWorker {
for (const ws of workspaces) { for (const ws of workspaces) {
await rateLimiter.add(async () => { await rateLimiter.add(async () => {
try { try {
activeWorkspaces.add(ws.workspace)
index++ index++
if (this.canceled || Date.now() - startTime > recheckTimeout) { if (this.canceled || Date.now() - startTime > recheckTimeout) {
return // If canceled, we should stop return // If canceled, we should stop
@ -241,6 +244,9 @@ class BackupWorker {
processed++ processed++
} catch (err: any) { } catch (err: any) {
ctx.error('Backup failed', { err }) ctx.error('Backup failed', { err })
failedWorkspaces.push(ws)
} finally {
activeWorkspaces.delete(ws.workspace)
} }
}) })
} }
@ -348,6 +354,14 @@ class BackupWorker {
// We need to report update for stats to account service // We need to report update for stats to account service
const token = generateToken(systemAccountEmail, { name: ws.workspace }, { service: 'backup' }) const token = generateToken(systemAccountEmail, { name: ws.workspace }, { service: 'backup' })
await updateBackupInfo(token, backupInfo) await updateBackupInfo(token, backupInfo)
} else {
rootCtx.error('BACKUP FAILED', {
workspace: ws.workspace,
workspaceUrl: ws.workspaceUrl,
workspaceName: ws.workspaceName,
time: Math.round((Date.now() - st) / 1000)
})
return false
} }
} catch (err: any) { } catch (err: any) {
rootCtx.error('\n\nFAILED to BACKUP', { workspace: ws.workspace, err }) rootCtx.error('\n\nFAILED to BACKUP', { workspace: ws.workspace, err })

View File

@ -64,15 +64,16 @@ import core, {
type WorkspaceId type WorkspaceId
} from '@hcengineering/core' } from '@hcengineering/core'
import { import {
calcHashHash,
type DbAdapter, type DbAdapter,
type DbAdapterHandler, type DbAdapterHandler,
type DomainHelperOperations, type DomainHelperOperations,
type ServerFindOptions, type ServerFindOptions,
type StorageAdapter, type StorageAdapter,
type TxAdapter, type TxAdapter
calcHashHash
} from '@hcengineering/server-core' } from '@hcengineering/server-core'
import { import {
ObjectId,
type AbstractCursor, type AbstractCursor,
type AnyBulkWriteOperation, type AnyBulkWriteOperation,
type Collection, type Collection,
@ -1123,14 +1124,14 @@ abstract class MongoAdapterBase implements DbAdapter {
const result: DocInfo[] = [] const result: DocInfo[] = []
if (d != null) { if (d != null) {
result.push({ result.push({
id: d._id, id: (d._id as any) instanceof ObjectId ? d._id.toString() : d._id,
hash: this.strimSize((d as any)['%hash%']) hash: this.strimSize((d as any)['%hash%'])
}) })
} }
if (iterator.bufferedCount() > 0) { if (iterator.bufferedCount() > 0) {
result.push( result.push(
...iterator.readBufferedDocuments().map((it) => ({ ...iterator.readBufferedDocuments().map((it) => ({
id: it._id, id: (it._id as any) instanceof ObjectId ? it._id.toString() : it._id,
hash: this.strimSize((it as any)['%hash%']) hash: this.strimSize((it as any)['%hash%'])
})) }))
) )