UBERF-9739: Try to fix backup hang (#8496)

Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
This commit is contained in:
Andrey Sobolev 2025-04-08 16:41:00 +07:00 committed by GitHub
parent f81d1d66f3
commit 17f0aedaf6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 21 additions and 6 deletions

View File

@ -188,7 +188,7 @@ async function loadDigest (
result.delete(k as Ref<Doc>)
}
} catch (err: any) {
ctx.error('digest is broken, will do full backup for', { domain, err: err.message, snapshot })
ctx.warn('digest is broken, will do full backup for', { domain, err: err.message, snapshot })
}
}
// Stop if stop date is matched and provided
@ -386,7 +386,7 @@ async function updateDigest (
} catch (err: any) {
digestToRemove.add(snapshot)
modifiedFiles.push(snapshot)
ctx.error('digest is broken, will do full backup for', { domain, err: err.message, snapshot })
ctx.warn('digest is broken, will do full backup for', { domain, err: err.message, snapshot })
modified = true
}
}

View File

@ -42,9 +42,9 @@ import {
} from '@hcengineering/server-core'
import { getAccountClient } from '@hcengineering/server-client'
import { generateToken } from '@hcengineering/server-token'
import { clearInterval } from 'node:timers'
import { backup, restore } from '.'
import { createStorageBackupStorage } from './storage'
import { clearInterval } from 'node:timers'
export interface BackupConfig {
AccountsURL: string
Token: string
@ -210,6 +210,7 @@ class BackupWorker {
const rateLimiter = new RateLimiter(this.config.Parallel)
const times: number[] = []
const activeWorkspaces = new Set<string>()
const infoTo = setInterval(() => {
const avgTime = times.length > 0 ? Math.round(times.reduce((p, c) => p + c, 0) / times.length) / 1000 : 0
@ -220,7 +221,8 @@ class BackupWorker {
index,
Elapsed: (Date.now() - startTime) / 1000,
ETA: Math.round((workspaces.length - processed) * avgTime),
active: rateLimiter.processingQueue.size
activeLen: activeWorkspaces.size,
active: Array.from(activeWorkspaces).join(',')
})
}, 10000)
@ -228,6 +230,7 @@ class BackupWorker {
for (const ws of workspaces) {
await rateLimiter.add(async () => {
try {
activeWorkspaces.add(ws.uuid)
index++
if (this.canceled || Date.now() - startTime > recheckTimeout) {
return // If canceled, we should stop
@ -243,6 +246,9 @@ class BackupWorker {
processed++
} catch (err: any) {
ctx.error('Backup failed', { err })
failedWorkspaces.push(ws)
} finally {
activeWorkspaces.delete(ws.uuid)
}
})
}
@ -355,6 +361,14 @@ class BackupWorker {
// We need to report update for stats to account service
const token = generateToken(systemAccountUuid, ws.uuid, { service: 'backup' })
await getAccountClient(token).updateBackupInfo(backupInfo)
} else {
rootCtx.error('BACKUP FAILED', {
workspace: ws.uuid,
workspaceUrl: ws.url,
workspaceName: ws.name,
time: Math.round((Date.now() - st) / 1000)
})
return false
}
} catch (err: any) {
rootCtx.error('\n\nFAILED to BACKUP', { workspace: ws.uuid, url: ws.url, err })

View File

@ -75,6 +75,7 @@ import {
type TxAdapter
} from '@hcengineering/server-core'
import {
ObjectId,
type AbstractCursor,
type AnyBulkWriteOperation,
type Collection,
@ -1125,14 +1126,14 @@ abstract class MongoAdapterBase implements DbAdapter {
const result: DocInfo[] = []
if (d != null) {
result.push({
id: d._id,
id: (d._id as any) instanceof ObjectId ? d._id.toString() : d._id,
hash: this.strimSize((d as any)['%hash%'])
})
}
if (iterator.bufferedCount() > 0) {
result.push(
...iterator.readBufferedDocuments().map((it) => ({
id: it._id,
id: (it._id as any) instanceof ObjectId ? it._id.toString() : it._id,
hash: this.strimSize((it as any)['%hash%'])
}))
)