diff --git a/server/client/src/account.ts b/server/client/src/account.ts
index 3d7ebe465f..c2bac8440e 100644
--- a/server/client/src/account.ts
+++ b/server/client/src/account.ts
@@ -102,6 +102,30 @@ export async function getTransactorEndpoint (
}
}
+export function withRetryUntilTimeout
(
+ f: (...params: P) => Promise,
+ timeoutMs: number = 5000
+): (...params: P) => Promise {
+ return async function (...params: P): Promise {
+ const timeout = Date.now() + timeoutMs
+ while (true) {
+ try {
+ return await f(...params)
+ } catch (err: any) {
+ if (timeout < Date.now()) {
+ // Timeout happened
+ throw err
+ }
+ if (err?.cause?.code === 'ECONNRESET' || err?.cause?.code === 'ECONNREFUSED') {
+ await new Promise((resolve) => setTimeout(resolve, 1000))
+ } else {
+ throw err
+ }
+ }
+ }
+ }
+}
+
export async function getPendingWorkspace (
token: string,
region: string,
diff --git a/server/workspace-service/src/service.ts b/server/workspace-service/src/service.ts
index dcad505b67..3486edd28d 100644
--- a/server/workspace-service/src/service.ts
+++ b/server/workspace-service/src/service.ts
@@ -24,7 +24,12 @@ import {
getWorkspaceId
} from '@hcengineering/core'
import { type MigrateOperation, type ModelLogger } from '@hcengineering/model'
-import { getPendingWorkspace, updateWorkspaceInfo, workerHandshake } from '@hcengineering/server-client'
+import {
+ getPendingWorkspace,
+ updateWorkspaceInfo,
+ workerHandshake,
+ withRetryUntilTimeout
+} from '@hcengineering/server-client'
import { generateToken } from '@hcengineering/server-token'
import { FileModelLogger } from '@hcengineering/server-tool'
import path from 'path'
@@ -145,15 +150,33 @@ export class WorkspaceWorker {
const branding = getBranding(this.brandings, ws.branding)
const wsId = getWorkspaceId(ws.workspace)
const token = generateToken(systemAccountEmail, wsId, { service: 'workspace' })
- const handleWsEvent = updateWorkspaceInfo.bind(null, token, ws.workspace)
+ const handleWsEventWithRetry = (
+ event: 'ping' | 'create-started' | 'progress' | 'create-done',
+ version: Data,
+ progress: number,
+ message?: string
+ ): Promise => {
+ return withRetryUntilTimeout(
+ () => updateWorkspaceInfo(token, ws.workspace, event, version, progress, message),
+ 5000
+ )()
+ }
if (ws.mode !== 'creating' || (ws.progress ?? 0) < 30) {
- await createWorkspace(ctx, this.version, branding, ws, this.txes, this.migrationOperation, handleWsEvent)
+ await createWorkspace(
+ ctx,
+ this.version,
+ branding,
+ ws,
+ this.txes,
+ this.migrationOperation,
+ handleWsEventWithRetry
+ )
} else {
// The previous attempth failed during init script and we cannot really retry it.
// But it should not be a blocker though. We can just warn user about that if we want.
// So we don't clear the previous error message if any
- await handleWsEvent?.('create-done', this.version, ws.progress ?? 0)
+ await handleWsEventWithRetry?.('create-done', this.version, ws.progress ?? 0)
}
ctx.info('---CREATE-DONE---------', {
@@ -171,7 +194,7 @@ export class WorkspaceWorker {
ctx.error('error', { err })
}
- ctx.info('---CREATE-FAILED---------', {
+ ctx.error('---CREATE-FAILED---------', {
workspace: ws.workspace,
version: this.version,
region: this.region,
@@ -210,7 +233,17 @@ export class WorkspaceWorker {
try {
const wsId = getWorkspaceId(ws.workspace)
const token = generateToken(systemAccountEmail, wsId, { service: 'workspace' })
- const handleWsEvent = updateWorkspaceInfo.bind(null, token, ws.workspace)
+ const handleWsEventWithRetry = (
+ event: 'upgrade-started' | 'progress' | 'upgrade-done' | 'ping',
+ version: Data,
+ progress: number,
+ message?: string
+ ): Promise => {
+ return withRetryUntilTimeout(
+ () => updateWorkspaceInfo(token, ws.workspace, event, version, progress, message),
+ 5000
+ )()
+ }
await upgradeWorkspace(
ctx,
@@ -219,7 +252,7 @@ export class WorkspaceWorker {
this.migrationOperation,
ws,
logger,
- handleWsEvent,
+ handleWsEventWithRetry,
opt.force
)
ctx.info('---UPGRADE-DONE---------', {
@@ -237,7 +270,7 @@ export class WorkspaceWorker {
ctx.error('error', { err })
}
- ctx.info('---UPGRADE-FAILED---------', {
+ ctx.error('---UPGRADE-FAILED---------', {
workspace: ws.workspace,
version: this.version,
region: this.region,