diff --git a/common/config/rush/pnpm-lock.yaml b/common/config/rush/pnpm-lock.yaml index 8f5ea7356e..98600b1128 100644 --- a/common/config/rush/pnpm-lock.yaml +++ b/common/config/rush/pnpm-lock.yaml @@ -1727,8 +1727,8 @@ importers: specifier: ^3.1.0 version: 3.1.0 mammoth: - specifier: ^1.6.0 - version: 1.8.0 + specifier: ^1.9.0 + version: 1.9.0 markdown-it: specifier: ^14.0.0 version: 14.0.0 @@ -4119,7 +4119,7 @@ packages: version: 0.0.0 '@rush-temp/import-tool@file:projects/import-tool.tgz': - resolution: {integrity: sha512-imMlneB1gppaXcJi4brs9jxXdGgK5zUGanT6vjF2+lNnrRCnp1X3Ys6/iRVLlexVOnSsA6BTpaZ1Xo5SSTO/gg==, tarball: file:projects/import-tool.tgz} + resolution: {integrity: sha512-yTXXuY90bmLrEEiipvDCuv+mLPokLCSLajsI3+hEES0iCdCbiJlua2NYv73WVshCcu0oq1acpRPElKxELjNgDw==, tarball: file:projects/import-tool.tgz} version: 0.0.0 '@rush-temp/importer@file:projects/importer.tgz': @@ -4587,7 +4587,7 @@ packages: version: 0.0.0 '@rush-temp/pod-print@file:projects/pod-print.tgz': - resolution: {integrity: sha512-19hcUtJBpca/kmHtIv4Z30rvHIaMLeGm3PdirKFuVS7VAdmYV6Um+m4o+5fq2LUUcda4T10vHscPeAvZR8EPEA==, tarball: file:projects/pod-print.tgz} + resolution: {integrity: sha512-uH6mY0Z3/3bbAe+rGjr2WPJmavKnoNgKZ5KoLqToGGF+iDzZOZGLjd8OrwUI90Z3behW7PPIRuPDGiRmFGOtcw==, tarball: file:projects/pod-print.tgz} version: 0.0.0 '@rush-temp/pod-server@file:projects/pod-server.tgz': @@ -4671,7 +4671,7 @@ packages: version: 0.0.0 '@rush-temp/qms-doc-import-tool@file:projects/qms-doc-import-tool.tgz': - resolution: {integrity: sha512-QqdrovP6ZWs8dmU+6Ly95n8BFMjlvtMcme4uj9XgPqCKMOmuMhtQ0Wn5ae3h8hzgB1K2HK25F0BPrzhRBGlxTA==, tarball: file:projects/qms-doc-import-tool.tgz} + resolution: {integrity: sha512-m7UFAU/1lPMVaVWNf5rvDKrRWTxOzjuWinK48EQ8OSJD3JSB5SP/IHrW9zSckgWBnLYL1vnljAgjM0QZ2qjIlQ==, tarball: file:projects/qms-doc-import-tool.tgz} version: 0.0.0 '@rush-temp/qms-tests-sanity@file:projects/qms-tests-sanity.tgz': @@ -4711,7 +4711,7 @@ packages: version: 0.0.0 '@rush-temp/rekoni-service@file:projects/rekoni-service.tgz': - resolution: {integrity: sha512-c3Vh1CX471Q8N6l5hoftqqktDZ7PuqzOXwcnhy5bNc14ynVi8Q9mTV7hHeP5xCFzQzepXCQ+tF5Etparbn2pdQ==, tarball: file:projects/rekoni-service.tgz} + resolution: {integrity: sha512-KwM2th57U3OVRVgPsNgcakA3gCpIYsASv2TeeqJAbv0cFim1ha1xDaeb8A96O9vTHjHRknDfgqQVYYIUdosQwQ==, tarball: file:projects/rekoni-service.tgz} version: 0.0.0 '@rush-temp/rekoni@file:projects/rekoni.tgz': @@ -10276,8 +10276,8 @@ packages: resolution: {integrity: sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==} hasBin: true - lop@0.4.1: - resolution: {integrity: sha512-9xyho9why2A2tzm5aIcMWKvzqKsnxrf9B5I+8O30olh6lQU8PH978LqZoI4++37RBgS1Em5i54v1TFs/3wnmXQ==} + lop@0.4.2: + resolution: {integrity: sha512-RefILVDQ4DKoRZsJ4Pj22TxE3omDO47yFpkIBoDKzkqPRISs5U1cnAdg/5583YPkWPaLIYHOKRMQSvjFsO26cw==} lower-case@2.0.2: resolution: {integrity: sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==} @@ -10328,8 +10328,8 @@ packages: makeerror@1.0.12: resolution: {integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==} - mammoth@1.8.0: - resolution: {integrity: sha512-pJNfxSk9IEGVpau+tsZFz22ofjUsl2mnA5eT8PjPs2n0BP+rhVte4Nez6FdgEuxv3IGI3afiV46ImKqTGDVlbA==} + mammoth@1.9.0: + resolution: {integrity: sha512-F+0NxzankQV9XSUAuVKvkdQK0GbtGGuqVnND9aVf9VSeUA82LQa29GjLqYU6Eez8LHqSJG3eGiDW3224OKdpZg==} engines: {node: '>=12.0.0'} hasBin: true @@ -18227,6 +18227,7 @@ snapshots: '@rush-temp/import-tool@file:projects/import-tool.tgz(@babel/core@7.23.9)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.23.9))': dependencies: '@types/jest': 29.5.12 + '@types/js-yaml': 4.0.9 '@types/node': 20.11.19 '@typescript-eslint/eslint-plugin': 6.21.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.3.3))(eslint@8.56.0)(typescript@5.6.2) '@typescript-eslint/parser': 6.21.0(eslint@8.56.0)(typescript@5.6.2) @@ -18239,6 +18240,8 @@ snapshots: eslint-plugin-n: 15.7.0(eslint@8.56.0) eslint-plugin-promise: 6.1.1(eslint@8.56.0) jest: 29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3)) + js-yaml: 4.1.0 + mammoth: 1.9.0 prettier: 3.2.5 ts-jest: 29.1.2(@babel/core@7.23.9)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.23.9))(esbuild@0.24.2)(jest@29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3)))(typescript@5.6.2) ts-node: 10.9.2(@types/node@20.11.19)(typescript@5.6.2) @@ -20760,7 +20763,7 @@ snapshots: eslint-plugin-promise: 6.1.1(eslint@8.56.0) express: 4.21.2 jest: 29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3)) - mammoth: 1.8.0 + mammoth: 1.9.0 prettier: 3.2.5 puppeteer: 22.14.0(bufferutil@4.0.8)(typescript@5.3.3)(utf-8-validate@6.0.4) ts-jest: 29.1.2(@babel/core@7.23.9)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.23.9))(esbuild@0.24.2)(jest@29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3)))(typescript@5.3.3) @@ -21533,7 +21536,7 @@ snapshots: eslint-plugin-promise: 6.1.1(eslint@8.56.0) htmlparser2: 9.1.0 jest: 29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3)) - mammoth: 1.8.0 + mammoth: 1.9.0 prettier: 3.2.5 ts-jest: 29.1.2(@babel/core@7.23.9)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.23.9))(esbuild@0.24.2)(jest@29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3)))(typescript@5.3.3) ts-node: 10.9.2(@types/node@20.11.19)(typescript@5.3.3) @@ -21842,7 +21845,7 @@ snapshots: jimp: 0.16.13 jwt-simple: 0.5.6 libphonenumber-js: 1.10.56 - mammoth: 1.8.0 + mammoth: 1.9.0 mime-types: 2.1.35 morgan: 1.10.0 node-loader: 2.0.0(webpack@5.97.1) @@ -32113,7 +32116,7 @@ snapshots: dependencies: js-tokens: 4.0.0 - lop@0.4.1: + lop@0.4.2: dependencies: duck: 0.1.12 option: 0.2.4 @@ -32185,7 +32188,7 @@ snapshots: dependencies: tmpl: 1.0.5 - mammoth@1.8.0: + mammoth@1.9.0: dependencies: '@xmldom/xmldom': 0.8.10 argparse: 1.0.10 @@ -32193,7 +32196,7 @@ snapshots: bluebird: 3.4.7 dingbat-to-unicode: 1.0.1 jszip: 3.10.1 - lop: 0.4.1 + lop: 0.4.2 path-is-absolute: 1.0.1 underscore: 1.13.7 xmlbuilder: 10.1.1 diff --git a/dev/doc-import-tool/package.json b/dev/doc-import-tool/package.json index b12af8eab4..97589e0d79 100644 --- a/dev/doc-import-tool/package.json +++ b/dev/doc-import-tool/package.json @@ -63,7 +63,7 @@ "domhandler": "^5.0.3", "domutils": "^3.1.0", "htmlparser2": "^9.0.0", - "mammoth": "^1.6.0", + "mammoth": "^1.9.0", "docx4js": "^3.2.20", "zod": "^3.22.4" } diff --git a/dev/import-tool/package.json b/dev/import-tool/package.json index f1cbc1d398..d05f79e63e 100644 --- a/dev/import-tool/package.json +++ b/dev/import-tool/package.json @@ -47,13 +47,16 @@ "eslint-plugin-import": "^2.26.0", "eslint-plugin-n": "^15.4.0", "eslint-plugin-promise": "^6.1.1", - "prettier": "^3.1.0" + "prettier": "^3.1.0", + "@types/js-yaml": "^4.0.9" }, "dependencies": { "@hcengineering/core": "^0.6.32", "@hcengineering/platform": "^0.6.11", "@hcengineering/server-client": "^0.6.0", "@hcengineering/importer": "^0.6.1", - "commander": "^8.1.0" + "commander": "^8.1.0", + "js-yaml": "^4.1.0", + "mammoth": "^1.9.0" } } diff --git a/dev/import-tool/src/index.ts b/dev/import-tool/src/index.ts index ff40a87eaa..31595b29db 100644 --- a/dev/import-tool/src/index.ts +++ b/dev/import-tool/src/index.ts @@ -13,6 +13,18 @@ // limitations under the License. // import { concatLink, TxOperations } from '@hcengineering/core' +import { + ClickupImporter, + defaultDocumentPreprocessors, + DocumentConverter, + FrontFileUploader, + importNotion, + UnifiedFormatImporter, + type DocumentConverterOptions, + type FileUploader, + type Logger +} from '@hcengineering/importer' +import { setMetadata } from '@hcengineering/platform' import serverClientPlugin, { createClient, getUserWorkspaces, @@ -20,15 +32,10 @@ import serverClientPlugin, { selectWorkspace } from '@hcengineering/server-client' import { program } from 'commander' -import { setMetadata } from '@hcengineering/platform' -import { - UnifiedFormatImporter, - ClickupImporter, - importNotion, - FrontFileUploader, - type FileUploader, - type Logger -} from '@hcengineering/importer' +import { readFileSync } from 'fs' +import * as yaml from 'js-yaml' +import mammoth from 'mammoth' +import { join } from 'path' class ConsoleLogger implements Logger { log (msg: string, data?: any): void { @@ -165,5 +172,38 @@ export function importTool (): void { }) }) + program + .command('convert-qms-docx ') + .requiredOption('-o, --out ', 'out') + .option('-c, --config ', 'configPath') + .description('convert QMS document into Unified Huly Format') + .action(async (dir: string, cmd) => { + const { out, configPath } = cmd + const configSearchPath = configPath ?? join(dir, 'import.yaml') + + let config: DocumentConverterOptions + try { + const configYaml = readFileSync(configSearchPath, 'utf-8') + const configFromFile = yaml.load(configYaml) as DocumentConverterOptions + config = { ...configFromFile, outputPath: out } + } catch (e: any) { + console.error(`Unable to load config file from ${configSearchPath}: ${e}`) + return + } + + config.steps = [ + { name: '_extractImages' }, + { name: '_cleanupMarkup' }, + ...config.steps, + { name: '_addStubHeader' } + ] + + config.htmlConverter = async (path) => (await mammoth.convertToHtml({ path })).value + + const converter = new DocumentConverter(config, defaultDocumentPreprocessors) + await converter.processFolder(dir) + await converter.flush() + }) + program.parse(process.argv) } diff --git a/packages/importer/src/docx/docx.ts b/packages/importer/src/docx/docx.ts new file mode 100644 index 0000000000..1670143e5c --- /dev/null +++ b/packages/importer/src/docx/docx.ts @@ -0,0 +1,127 @@ +// +// Copyright © 2025 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { defaultExtensions, htmlToJSON, MarkupNode, serializeMessage } from '@hcengineering/text' +import { mkdir, readdir, readFile, writeFile } from 'fs/promises' +import * as yaml from 'js-yaml' +import { basename, dirname, extname, join, relative } from 'path' +import { UnifiedControlledDocumentHeader, UnifiedDocumentTemplateHeader } from '../huly/unified' + +export interface DocumentConverterOptions { + outputPath: string + owner: string + steps: DocumentPreprocessorOptions[] + htmlConverter: (path: string) => Promise +} + +export interface DocumentState { + name: string + path: string + root: string + markup: MarkupNode + header?: UnifiedControlledDocumentHeader | UnifiedDocumentTemplateHeader +} + +export interface DocumentPreprocessorOptions { + name: string + options?: T +} + +export type DocumentPreprocessor = (document: DocumentState) => DocumentState | undefined +export type DocumentPreprocessorSpec = (converter: DocumentConverter, options?: T) => DocumentPreprocessor + +export class DocumentConverter { + documents = new Map() + output = new Map() + preprocessors: DocumentPreprocessor[] + + options: DocumentConverterOptions + + constructor (options: DocumentConverterOptions, specs: Record>) { + this.options = options + this.preprocessors = [] + + for (const step of options.steps) { + const spec = specs[step.name] + if (spec === undefined) { + throw new Error(`Unknown step: ${step.name}`) + } + this.preprocessors.push(spec(this, step.options)) + } + } + + async processFolder (root: string): Promise { + const files = await scanFiles(root) + for (const path of files) { + const ext = extname(path) + if (ext === '.docx') await this.processDocument(path, root) + else if (ext === '.md') this.addOutputFile(relative(root, path), await readFile(path, 'utf-8')) + } + } + + async processDocument (path: string, root: string): Promise { + const htmlString = await this.options.htmlConverter(path) + const markup = htmlToJSON(htmlString, defaultExtensions) + + let document: DocumentState = { + name: fileNameNoExt(path), + path, + root, + markup + } + + for (const processor of this.preprocessors) { + document = processor(document) ?? document + } + + this.documents.set(path, document) + + const content = compileMarkdown(document) + this.addOutputFile(join(relative(root, dirname(path)), fileNameNoExt(path)) + '.md', content) + } + + addOutputFile (rel: string, content: string | Buffer): void { + this.output.set(join(this.options.outputPath, rel), content) + } + + async flush (): Promise { + for (const [path, content] of this.output) { + await mkdir(dirname(path), { recursive: true }) + await writeFile(path, content as any) + } + } +} + +function compileMarkdown (file: DocumentState): string { + const markdown = serializeMessage(file.markup, 'ref://', '') + + const headerYaml = yaml.dump(file.header) + const headerString = '---\n' + headerYaml + '---\n' + + const finalContent = headerString + markdown + return finalContent +} + +function fileNameNoExt (path: string): string { + const bname = basename(path) + const ext = extname(path) + return bname.slice(0, bname.length - ext.length) +} + +async function scanFiles (dir: string): Promise { + const filesAndDirs = await readdir(dir, { recursive: true, withFileTypes: true }) + const files = filesAndDirs.filter((file) => !file.isDirectory()).map((f) => join(f.path, f.name)) + return files +} diff --git a/packages/importer/src/docx/preprocessors.ts b/packages/importer/src/docx/preprocessors.ts new file mode 100644 index 0000000000..bd11d05004 --- /dev/null +++ b/packages/importer/src/docx/preprocessors.ts @@ -0,0 +1,126 @@ +// +// Copyright © 2025 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { AttrValue, MarkupNode, MarkupNodeType } from '@hcengineering/text' +import { dirname, join, relative } from 'path' +import { DocumentPreprocessorSpec, DocumentState } from './docx' +import documents from '@hcengineering/controlled-documents' + +const _addStubHeader: DocumentPreprocessorSpec = (converter, inputOptions) => { + return (document) => { + const options: DocumentState['header'] = inputOptions ?? { + class: 'documents:class:ControlledDocument', + title: document.name, + template: documents.template.ProductChangeControl, + author: converter.options.owner, + owner: converter.options.owner + } + const header = document.header ?? options + return { ...document, header } + } +} + +interface ExtractImagesOptions { + folder?: string + extensions?: Record +} + +const _extractImages: DocumentPreprocessorSpec = (converter, inputOptions) => { + const options = { + folder: 'files', + extensions: { + 'image/jpeg': '.jpeg', + 'image/jpg': '.jpeg', + 'image/png': '.png' + }, + ...inputOptions + } + + let imageCount = 0 + interface Image { + extension: string + buffer: Buffer + } + + const extractBase64Image = (imageContent: AttrValue): Image | undefined => { + if (typeof imageContent !== 'string' || !imageContent.startsWith('data:')) { + return + } + + const buffer = Buffer.from(imageContent.split(',')[1], 'base64') + const type = imageContent.split(';')[0].split(':')[1] + + const extension = options.extensions[type] + if (extension === undefined) { + return + } + + return { buffer, extension } + } + + const transformImage = (dir: string, node: MarkupNode): MarkupNode => { + if (node.type !== MarkupNodeType.image) { + return node + } + + const image = extractBase64Image(node.attrs?.src ?? '') + if (image === undefined) { + return node + } + + imageCount++ + const path = join(options.folder, 'image_' + imageCount + image.extension) + + node = { ...node, attrs: { ...node.attrs, src: relative(dir, path) } } + converter.addOutputFile(path, image.buffer) + + return node + } + + return (document) => { + const dir = relative(document.root, dirname(document.path)) + const markup = transformMarkupRecursive(document.markup, (node) => transformImage(dir, node)) + return { ...document, markup } + } +} + +const _cleanupMarkup: DocumentPreprocessorSpec = (converter) => { + const transform = (node: MarkupNode): MarkupNode => { + if (node.type === MarkupNodeType.table_header) { + node = { ...node, type: MarkupNodeType.table_cell } + } + return node + } + + return (document) => { + const markup = transformMarkupRecursive(document.markup, transform) + return { ...document, markup } + } +} + +export const defaultDocumentPreprocessors = { + _addStubHeader, + _extractImages, + _cleanupMarkup +} + +function transformMarkupRecursive (node: MarkupNode, transformer: (node: MarkupNode) => MarkupNode): MarkupNode { + let content = node.content + if (content !== undefined) { + content = content.map((node) => transformMarkupRecursive(node, transformer)) + node = { ...node, content } + } + return transformer(node) +} diff --git a/packages/importer/src/huly/unified.ts b/packages/importer/src/huly/unified.ts index a07892931d..38c013637c 100644 --- a/packages/importer/src/huly/unified.ts +++ b/packages/importer/src/huly/unified.ts @@ -50,13 +50,13 @@ import documents, { DocumentMeta } from '@hcengineering/controlled-documents' -interface UnifiedComment { +export interface UnifiedComment { author: string text: string attachments?: string[] } -interface UnifiedIssueHeader { +export interface UnifiedIssueHeader { class: 'tracker:class:Issue' title: string status: string @@ -67,7 +67,7 @@ interface UnifiedIssueHeader { comments?: UnifiedComment[] } -interface UnifiedSpaceSettings { +export interface UnifiedSpaceSettings { class: 'tracker:class:Project' | 'document:class:Teamspace' | 'documents:class:OrgSpace' title: string private?: boolean @@ -79,7 +79,7 @@ interface UnifiedSpaceSettings { emoji?: string } -interface UnifiedProjectSettings extends UnifiedSpaceSettings { +export interface UnifiedProjectSettings extends UnifiedSpaceSettings { class: 'tracker:class:Project' identifier: string id?: 'tracker:project:DefaultProject' @@ -87,16 +87,16 @@ interface UnifiedProjectSettings extends UnifiedSpaceSettings { defaultIssueStatus?: string } -interface UnifiedTeamspaceSettings extends UnifiedSpaceSettings { +export interface UnifiedTeamspaceSettings extends UnifiedSpaceSettings { class: 'document:class:Teamspace' } -interface UnifiedDocumentHeader { +export interface UnifiedDocumentHeader { class: 'document:class:Document' title: string } -interface UnifiedWorkspaceSettings { +export interface UnifiedWorkspaceSettings { projectTypes?: Array<{ name: string taskTypes?: Array<{ @@ -110,13 +110,13 @@ interface UnifiedWorkspaceSettings { }> } -interface UnifiedChangeControlHeader { +export interface UnifiedChangeControlHeader { description?: string reason?: string impact?: string } -interface UnifiedControlledDocumentHeader { +export interface UnifiedControlledDocumentHeader { class: 'documents:class:ControlledDocument' title: string template: string @@ -129,7 +129,7 @@ interface UnifiedControlledDocumentHeader { changeControl?: UnifiedChangeControlHeader } -interface UnifiedDocumentTemplateHeader { +export interface UnifiedDocumentTemplateHeader { class: 'documents:mixin:DocumentTemplate' title: string category: string @@ -143,7 +143,7 @@ interface UnifiedDocumentTemplateHeader { changeControl?: UnifiedChangeControlHeader } -interface UnifiedOrgSpaceSettings extends UnifiedSpaceSettings { +export interface UnifiedOrgSpaceSettings extends UnifiedSpaceSettings { class: 'documents:class:OrgSpace' qualified?: string manager?: string diff --git a/packages/importer/src/index.ts b/packages/importer/src/index.ts index 83ab51cf82..e7bbadd6d4 100644 --- a/packages/importer/src/index.ts +++ b/packages/importer/src/index.ts @@ -16,6 +16,8 @@ export * from './huly/unified' export * from './clickup/clickup' export * from './notion/notion' +export * from './docx/docx' +export * from './docx/preprocessors' export * from './importer/uploader' export * from './importer/storageUploader' diff --git a/pods/backup/src/index.ts b/pods/backup/src/index.ts index 7c9a3d41bd..e14c6e0dbb 100644 --- a/pods/backup/src/index.ts +++ b/pods/backup/src/index.ts @@ -27,9 +27,14 @@ import { } from '@hcengineering/server-pipeline' import { join } from 'path' +import { createMongoAdapter, createMongoDestroyAdapter, createMongoTxAdapter } from '@hcengineering/mongo' +import { + createPostgreeDestroyAdapter, + createPostgresAdapter, + createPostgresTxAdapter, + setDBExtraOptions +} from '@hcengineering/postgres' import { readFileSync } from 'node:fs' -import { createMongoTxAdapter, createMongoAdapter, createMongoDestroyAdapter } from '@hcengineering/mongo' -import { createPostgresTxAdapter, createPostgresAdapter, createPostgreeDestroyAdapter } from '@hcengineering/postgres' const model = JSON.parse(readFileSync(process.env.MODEL_JSON ?? 'model.json').toString()) as Tx[] const metricsContext = initStatisticsContext('backup', { @@ -51,6 +56,12 @@ const sentryDSN = process.env.SENTRY_DSN configureAnalytics(sentryDSN, {}) Analytics.setTag('application', 'backup-service') +const usePrepare = process.env.DB_PREPARE === 'true' + +setDBExtraOptions({ + prepare: usePrepare // We override defaults +}) + registerTxAdapterFactory('mongodb', createMongoTxAdapter) registerAdapterFactory('mongodb', createMongoAdapter) registerDestroyFactory('mongodb', createMongoDestroyAdapter) diff --git a/pods/fulltext/src/server.ts b/pods/fulltext/src/server.ts index 605a28c37e..a87b7b9c82 100644 --- a/pods/fulltext/src/server.ts +++ b/pods/fulltext/src/server.ts @@ -32,7 +32,12 @@ import { } from '@hcengineering/middleware' import { createMongoAdapter, createMongoDestroyAdapter, createMongoTxAdapter } from '@hcengineering/mongo' import { PlatformError, setMetadata, unknownError } from '@hcengineering/platform' -import { createPostgreeDestroyAdapter, createPostgresAdapter, createPostgresTxAdapter } from '@hcengineering/postgres' +import { + createPostgreeDestroyAdapter, + createPostgresAdapter, + createPostgresTxAdapter, + setDBExtraOptions +} from '@hcengineering/postgres' import serverClientPlugin, { getTransactorEndpoint, getWorkspaceInfo } from '@hcengineering/server-client' import serverCore, { createContentAdapter, @@ -215,6 +220,12 @@ export async function startIndexer ( ): Promise<() => void> { const closeTimeout = 5 * 60 * 1000 + const usePrepare = process.env.DB_PREPARE === 'true' + + setDBExtraOptions({ + prepare: usePrepare // We override defaults + }) + setMetadata(serverToken.metadata.Secret, opt.serverSecret) setMetadata(serverCore.metadata.ElasticIndexName, opt.elasticIndexName) setMetadata(serverClientPlugin.metadata.Endpoint, opt.accountsUrl) diff --git a/pods/server/src/__start.ts b/pods/server/src/__start.ts index 955fb897ad..04a15b4e1d 100644 --- a/pods/server/src/__start.ts +++ b/pods/server/src/__start.ts @@ -28,6 +28,7 @@ import { startHttpServer } from '@hcengineering/server-ws' import { join } from 'path' import { start } from '.' import { profileStart, profileStop } from './inspector' +import { setDBExtraOptions } from '@hcengineering/postgres' configureAnalytics(process.env.SENTRY_DSN, {}) Analytics.setTag('application', 'transactor') @@ -58,6 +59,12 @@ setOperationLogProfiling(process.env.OPERATION_PROFILING === 'true') const config = serverConfigFromEnv() const storageConfig: StorageConfiguration = storageConfigFromEnv() +const usePrepare = process.env.DB_PREPARE === 'true' + +setDBExtraOptions({ + prepare: usePrepare // We override defaults +}) + const lastNameFirst = process.env.LAST_NAME_FIRST === 'true' setMetadata(contactPlugin.metadata.LastNameFirst, lastNameFirst) setMetadata(serverCore.metadata.FrontUrl, config.frontUrl) diff --git a/server/postgres/src/storage.ts b/server/postgres/src/storage.ts index e8b3c4fe79..fc65099f42 100644 --- a/server/postgres/src/storage.ts +++ b/server/postgres/src/storage.ts @@ -117,6 +117,7 @@ async function * createCursorGenerator ( } } catch (err: any) { console.error('failed to recieve data', { err }) + throw err // Rethrow the error after logging } } @@ -156,7 +157,11 @@ class ConnectionInfo { throw err } finally { if (this.released) { - reserved?.release() + try { + reserved?.release() + } catch (err: any) { + console.error('failed to release', err) + } } else { // after use we put into available if (reserved !== undefined) { @@ -168,7 +173,11 @@ class ConnectionInfo { const toRelease = this.available.splice(1, this.available.length - 1) for (const r of toRelease) { - r.release() + try { + r.release() + } catch (err: any) { + console.error('failed to relase', err) + } } } } @@ -176,7 +185,7 @@ class ConnectionInfo { } release (): void { - for (const c of this.available) { + for (const c of [...this.available]) { c.release() } this.available = [] @@ -302,7 +311,11 @@ class ConnectionMgr { ([, it]: [string, ConnectionInfo]) => it.mgrId === this.mgrId )) { connections.delete(k) - conn.release() + try { + conn.release() + } catch (err: any) { + console.error('failed to release connection') + } } } @@ -1336,7 +1349,7 @@ abstract class PostgresAdapterBase implements DbAdapter { case '$options': break case '$all': - res.push(`${tkey} @> ARRAY[${value}]`) + res.push(`${tkey} @> ${vars.addArray(value, inferType(value))}`) break default: res.push(`${tkey} @> '[${JSON.stringify(value)}]'`) @@ -1542,64 +1555,39 @@ abstract class PostgresAdapterBase implements DbAdapter { return ctx.with('upload', { domain }, async (ctx) => { const schemaFields = getSchemaAndFields(domain) const filedsWithData = [...schemaFields.fields, 'data'] - const insertFields: string[] = [] - const onConflict: string[] = [] - for (const field of filedsWithData) { - insertFields.push(`"${field}"`) - if (handleConflicts) { - onConflict.push(`"${field}" = EXCLUDED."${field}"`) - } - } + + const insertFields = filedsWithData.map((field) => `"${field}"`) + const onConflict = handleConflicts ? filedsWithData.map((field) => `"${field}" = EXCLUDED."${field}"`) : [] + const insertStr = insertFields.join(', ') const onConflictStr = onConflict.join(', ') try { - const toUpload = [...docs] const tdomain = translateDomain(domain) - while (toUpload.length > 0) { - const part = toUpload.splice(0, 200) + const batchSize = 200 + for (let i = 0; i < docs.length; i += batchSize) { + const part = docs.slice(i, i + batchSize) const values = new ValuesVariables() const vars: string[] = [] const wsId = values.add(this.workspaceId.name, '::uuid') - for (let i = 0; i < part.length; i++) { - const doc = part[i] - const variables: string[] = [] - + for (const doc of part) { if (!('%hash%' in doc) || doc['%hash%'] === '' || doc['%hash%'] == null) { ;(doc as any)['%hash%'] = this.curHash() // We need to set current hash } const d = convertDoc(domain, doc, this.workspaceId.name, schemaFields) - variables.push(wsId) - for (const field of schemaFields.fields) { - variables.push(values.add(d[field], `::${schemaFields.schema[field].type}`)) - } - variables.push(values.add(d.data, '::json')) + const variables = [ + wsId, + ...schemaFields.fields.map((field) => values.add(d[field], `::${schemaFields.schema[field].type}`)), + values.add(d.data, '::json') + ] vars.push(`(${variables.join(', ')})`) } const vals = vars.join(',') - if (handleConflicts) { - await this.mgr.retry( - ctx.id, - async (client) => - await client.unsafe( - `INSERT INTO ${tdomain} ("workspaceId", ${insertStr}) VALUES ${vals} - ON CONFLICT ("workspaceId", _id) DO UPDATE SET ${onConflictStr};`, - values.getValues(), - getPrepare() - ) - ) - } else { - await this.mgr.retry( - ctx.id, - async (client) => - await client.unsafe( - `INSERT INTO ${tdomain} ("workspaceId", ${insertStr}) VALUES ${vals};`, - values.getValues(), - getPrepare() - ) - ) - } + const query = `INSERT INTO ${tdomain} ("workspaceId", ${insertStr}) VALUES ${vals} ${ + handleConflicts ? `ON CONFLICT ("workspaceId", _id) DO UPDATE SET ${onConflictStr}` : '' + };` + await this.mgr.retry(ctx.id, async (client) => await client.unsafe(query, values.getValues(), getPrepare())) } } catch (err: any) { ctx.error('failed to upload', { err }) @@ -1610,17 +1598,14 @@ abstract class PostgresAdapterBase implements DbAdapter { async clean (ctx: MeasureContext, domain: Domain, docs: Ref[]): Promise { const tdomain = translateDomain(domain) - const toClean = [...docs] - while (toClean.length > 0) { - const part = toClean.splice(0, 2500) + const batchSize = 2500 + const query = `DELETE FROM ${tdomain} WHERE "workspaceId" = $1 AND _id = ANY($2::text[])` + + for (let i = 0; i < docs.length; i += batchSize) { + const part = docs.slice(i, i + batchSize) await ctx.with('clean', {}, () => { - return this.mgr.retry(ctx.id, (client) => - client.unsafe( - `DELETE FROM ${tdomain} WHERE "workspaceId" = $1 AND _id = ANY($2::text[])`, - [this.workspaceId.name, part], - getPrepare() - ) - ) + const params = [this.workspaceId.name, part] + return this.mgr.retry(ctx.id, (client) => client.unsafe(query, params, getPrepare())) }) } } @@ -1635,10 +1620,16 @@ abstract class PostgresAdapterBase implements DbAdapter { return ctx.with('groupBy', { domain }, async (ctx) => { try { const vars = new ValuesVariables() - const finalSql = `SELECT DISTINCT ${key} as ${field}, Count(*) AS count FROM ${translateDomain(domain)} WHERE ${this.buildRawQuery(vars, domain, query ?? {})} GROUP BY ${key}` + const sqlChunks: string[] = [ + `SELECT ${key} as ${field}, Count(*) AS count`, + `FROM ${translateDomain(domain)}`, + `WHERE ${this.buildRawQuery(vars, domain, query ?? {})}`, + `GROUP BY ${key}` + ] + const finalSql = sqlChunks.join(' ') return await this.mgr.retry(ctx.id, async (connection) => { const result = await connection.unsafe(finalSql, vars.getValues(), getPrepare()) - return new Map(result.map((r) => [r[field.toLocaleLowerCase()], parseInt(r.count)])) + return new Map(result.map((r) => [r[field.toLowerCase()], r.count])) }) } catch (err) { ctx.error('Error while grouping by', { domain, field }) @@ -1920,10 +1911,10 @@ class PostgresAdapter extends PostgresAdapterBase { const result: TxResult[] = [] try { const schema = getSchema(domain) - const updates = groupByArray(operations, (it) => it.fields.join(',')) - for (const upds of updates.values()) { - while (upds.length > 0) { - const part = upds.splice(0, 200) + const groupedUpdates = groupByArray(operations, (it) => it.fields.join(',')) + for (const groupedOps of groupedUpdates.values()) { + for (let i = 0; i < groupedOps.length; i += 200) { + const part = groupedOps.slice(i, i + 200) let idx = 1 const indexes: string[] = [] const data: any[] = [] @@ -2021,7 +2012,9 @@ class PostgresTxAdapter extends PostgresAdapterBase implements TxAdapter { async getModel (ctx: MeasureContext): Promise { const res: DBDoc[] = await this.mgr.retry(undefined, (client) => { return client.unsafe( - `SELECT * FROM "${translateDomain(DOMAIN_MODEL_TX)}" WHERE "workspaceId" = '${this.workspaceId.name}'::uuid ORDER BY _id::text ASC, "modifiedOn"::bigint ASC` + `SELECT * FROM "${translateDomain(DOMAIN_MODEL_TX)}" WHERE "workspaceId" = '${this.workspaceId.name}'::uuid ORDER BY _id::text ASC, "modifiedOn"::bigint ASC`, + undefined, + getPrepare() ) }) diff --git a/server/postgres/src/utils.ts b/server/postgres/src/utils.ts index 2938070b22..f00d2967ba 100644 --- a/server/postgres/src/utils.ts +++ b/server/postgres/src/utils.ts @@ -317,6 +317,11 @@ export function getDBClient (connectionString: string, database?: string): Postg }, database, max: 10, + min: 2, + connect_timeout: 10, + idle_timeout: 30, + max_lifetime: 300, + fetch_types: true, transform: { undefined: null }, diff --git a/server/workspace-service/src/index.ts b/server/workspace-service/src/index.ts index 02a34b8a60..ec0c59cd19 100644 --- a/server/workspace-service/src/index.ts +++ b/server/workspace-service/src/index.ts @@ -122,20 +122,24 @@ export function serveWorkspaceAccount ( brandings ) - void worker.start( - measureCtx, - { - errorHandler: async (ws, err) => { - Analytics.handleError(err) + void worker + .start( + measureCtx, + { + errorHandler: async (ws, err) => { + Analytics.handleError(err) + }, + force: false, + console: false, + logs: 'upgrade-logs', + waitTimeout, + backup }, - force: false, - console: false, - logs: 'upgrade-logs', - waitTimeout, - backup - }, - () => canceled - ) + () => canceled + ) + .catch((err) => { + measureCtx.error('failed to start', { err }) + }) const close = (): void => { canceled = true diff --git a/server/workspace-service/src/service.ts b/server/workspace-service/src/service.ts index e7e3590b9e..5ff47cebdf 100644 --- a/server/workspace-service/src/service.ts +++ b/server/workspace-service/src/service.ts @@ -40,6 +40,8 @@ import { FileModelLogger, prepareTools } from '@hcengineering/server-tool' import path from 'path' import { Analytics } from '@hcengineering/analytics' +import { createMongoAdapter, createMongoDestroyAdapter, createMongoTxAdapter } from '@hcengineering/mongo' +import { createPostgreeDestroyAdapter, createPostgresAdapter, createPostgresTxAdapter } from '@hcengineering/postgres' import { doBackupWorkspace, doRestoreWorkspace } from '@hcengineering/server-backup' import type { PipelineFactory, StorageAdapter } from '@hcengineering/server-core' import { @@ -54,8 +56,6 @@ import { } from '@hcengineering/server-pipeline' import { buildStorageFromConfig, storageConfigFromEnv } from '@hcengineering/server-storage' import { createWorkspace, upgradeWorkspace } from './ws-operations' -import { createMongoTxAdapter, createMongoAdapter, createMongoDestroyAdapter } from '@hcengineering/mongo' -import { createPostgresTxAdapter, createPostgresAdapter, createPostgreeDestroyAdapter } from '@hcengineering/postgres' export interface WorkspaceOptions { errorHandler: (workspace: BaseWorkspaceInfo, error: any) => Promise @@ -115,7 +115,14 @@ export class WorkspaceWorker { ctx.info('Sending a handshake to the account service...') - await withRetryConnUntilSuccess(workerHandshake)(token, this.region, this.version, this.operation) + while (true) { + try { + await withRetryConnUntilSuccess(workerHandshake)(token, this.region, this.version, this.operation) + break + } catch (err: any) { + ctx.error('error', { err }) + } + } ctx.info('Successfully connected to the account service') @@ -150,7 +157,10 @@ export class WorkspaceWorker { }), workspace, opt - ) + ).catch((err) => { + Analytics.handleError(err) + ctx.error('error', { err }) + }) }) } } diff --git a/services/print/pod-print/package.json b/services/print/pod-print/package.json index d68ea81e37..e7df51780a 100644 --- a/services/print/pod-print/package.json +++ b/services/print/pod-print/package.json @@ -63,7 +63,7 @@ "dotenv": "~16.0.0", "express": "^4.21.2", "puppeteer": "^22.6.1", - "mammoth": "^1.6.0", + "mammoth": "^1.9.0", "ws": "^8.18.0" } } diff --git a/services/rekoni/package.json b/services/rekoni/package.json index 87d6e293db..fabca28b56 100644 --- a/services/rekoni/package.json +++ b/services/rekoni/package.json @@ -81,7 +81,7 @@ "jimp": "^0.16.1", "jwt-simple": "^0.5.6", "libphonenumber-js": "^1.9.46", - "mammoth": "^1.6.0", + "mammoth": "^1.9.0", "mime-types": "~2.1.34", "pdfjs-dist": "2.12.313", "sharp": "~0.32.0",