mirror of
https://github.com/hcengineering/platform.git
synced 2025-04-30 12:15:51 +00:00
Basic workflow for converting docx to unified format (#7779)
* Basic workflow for converting docx to unified format Signed-off-by: Victor Ilyushchenko <alt13ri@gmail.com> * dep fix Signed-off-by: Victor Ilyushchenko <alt13ri@gmail.com> * @vercel/webpack-asset-relocator-loader can't handle mammoth for some reason, moved dep to import-tool Signed-off-by: Victor Ilyushchenko <alt13ri@gmail.com> --------- Signed-off-by: Victor Ilyushchenko <alt13ri@gmail.com>
This commit is contained in:
parent
9a29cde7a6
commit
d6b7a38af9
@ -1727,8 +1727,8 @@ importers:
|
||||
specifier: ^3.1.0
|
||||
version: 3.1.0
|
||||
mammoth:
|
||||
specifier: ^1.6.0
|
||||
version: 1.8.0
|
||||
specifier: ^1.9.0
|
||||
version: 1.9.0
|
||||
markdown-it:
|
||||
specifier: ^14.0.0
|
||||
version: 14.0.0
|
||||
@ -4119,7 +4119,7 @@ packages:
|
||||
version: 0.0.0
|
||||
|
||||
'@rush-temp/import-tool@file:projects/import-tool.tgz':
|
||||
resolution: {integrity: sha512-imMlneB1gppaXcJi4brs9jxXdGgK5zUGanT6vjF2+lNnrRCnp1X3Ys6/iRVLlexVOnSsA6BTpaZ1Xo5SSTO/gg==, tarball: file:projects/import-tool.tgz}
|
||||
resolution: {integrity: sha512-yTXXuY90bmLrEEiipvDCuv+mLPokLCSLajsI3+hEES0iCdCbiJlua2NYv73WVshCcu0oq1acpRPElKxELjNgDw==, tarball: file:projects/import-tool.tgz}
|
||||
version: 0.0.0
|
||||
|
||||
'@rush-temp/importer@file:projects/importer.tgz':
|
||||
@ -4587,7 +4587,7 @@ packages:
|
||||
version: 0.0.0
|
||||
|
||||
'@rush-temp/pod-print@file:projects/pod-print.tgz':
|
||||
resolution: {integrity: sha512-19hcUtJBpca/kmHtIv4Z30rvHIaMLeGm3PdirKFuVS7VAdmYV6Um+m4o+5fq2LUUcda4T10vHscPeAvZR8EPEA==, tarball: file:projects/pod-print.tgz}
|
||||
resolution: {integrity: sha512-uH6mY0Z3/3bbAe+rGjr2WPJmavKnoNgKZ5KoLqToGGF+iDzZOZGLjd8OrwUI90Z3behW7PPIRuPDGiRmFGOtcw==, tarball: file:projects/pod-print.tgz}
|
||||
version: 0.0.0
|
||||
|
||||
'@rush-temp/pod-server@file:projects/pod-server.tgz':
|
||||
@ -4671,7 +4671,7 @@ packages:
|
||||
version: 0.0.0
|
||||
|
||||
'@rush-temp/qms-doc-import-tool@file:projects/qms-doc-import-tool.tgz':
|
||||
resolution: {integrity: sha512-QqdrovP6ZWs8dmU+6Ly95n8BFMjlvtMcme4uj9XgPqCKMOmuMhtQ0Wn5ae3h8hzgB1K2HK25F0BPrzhRBGlxTA==, tarball: file:projects/qms-doc-import-tool.tgz}
|
||||
resolution: {integrity: sha512-m7UFAU/1lPMVaVWNf5rvDKrRWTxOzjuWinK48EQ8OSJD3JSB5SP/IHrW9zSckgWBnLYL1vnljAgjM0QZ2qjIlQ==, tarball: file:projects/qms-doc-import-tool.tgz}
|
||||
version: 0.0.0
|
||||
|
||||
'@rush-temp/qms-tests-sanity@file:projects/qms-tests-sanity.tgz':
|
||||
@ -4711,7 +4711,7 @@ packages:
|
||||
version: 0.0.0
|
||||
|
||||
'@rush-temp/rekoni-service@file:projects/rekoni-service.tgz':
|
||||
resolution: {integrity: sha512-c3Vh1CX471Q8N6l5hoftqqktDZ7PuqzOXwcnhy5bNc14ynVi8Q9mTV7hHeP5xCFzQzepXCQ+tF5Etparbn2pdQ==, tarball: file:projects/rekoni-service.tgz}
|
||||
resolution: {integrity: sha512-KwM2th57U3OVRVgPsNgcakA3gCpIYsASv2TeeqJAbv0cFim1ha1xDaeb8A96O9vTHjHRknDfgqQVYYIUdosQwQ==, tarball: file:projects/rekoni-service.tgz}
|
||||
version: 0.0.0
|
||||
|
||||
'@rush-temp/rekoni@file:projects/rekoni.tgz':
|
||||
@ -10276,8 +10276,8 @@ packages:
|
||||
resolution: {integrity: sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==}
|
||||
hasBin: true
|
||||
|
||||
lop@0.4.1:
|
||||
resolution: {integrity: sha512-9xyho9why2A2tzm5aIcMWKvzqKsnxrf9B5I+8O30olh6lQU8PH978LqZoI4++37RBgS1Em5i54v1TFs/3wnmXQ==}
|
||||
lop@0.4.2:
|
||||
resolution: {integrity: sha512-RefILVDQ4DKoRZsJ4Pj22TxE3omDO47yFpkIBoDKzkqPRISs5U1cnAdg/5583YPkWPaLIYHOKRMQSvjFsO26cw==}
|
||||
|
||||
lower-case@2.0.2:
|
||||
resolution: {integrity: sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==}
|
||||
@ -10328,8 +10328,8 @@ packages:
|
||||
makeerror@1.0.12:
|
||||
resolution: {integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==}
|
||||
|
||||
mammoth@1.8.0:
|
||||
resolution: {integrity: sha512-pJNfxSk9IEGVpau+tsZFz22ofjUsl2mnA5eT8PjPs2n0BP+rhVte4Nez6FdgEuxv3IGI3afiV46ImKqTGDVlbA==}
|
||||
mammoth@1.9.0:
|
||||
resolution: {integrity: sha512-F+0NxzankQV9XSUAuVKvkdQK0GbtGGuqVnND9aVf9VSeUA82LQa29GjLqYU6Eez8LHqSJG3eGiDW3224OKdpZg==}
|
||||
engines: {node: '>=12.0.0'}
|
||||
hasBin: true
|
||||
|
||||
@ -18227,6 +18227,7 @@ snapshots:
|
||||
'@rush-temp/import-tool@file:projects/import-tool.tgz(@babel/core@7.23.9)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.23.9))':
|
||||
dependencies:
|
||||
'@types/jest': 29.5.12
|
||||
'@types/js-yaml': 4.0.9
|
||||
'@types/node': 20.11.19
|
||||
'@typescript-eslint/eslint-plugin': 6.21.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.3.3))(eslint@8.56.0)(typescript@5.6.2)
|
||||
'@typescript-eslint/parser': 6.21.0(eslint@8.56.0)(typescript@5.6.2)
|
||||
@ -18239,6 +18240,8 @@ snapshots:
|
||||
eslint-plugin-n: 15.7.0(eslint@8.56.0)
|
||||
eslint-plugin-promise: 6.1.1(eslint@8.56.0)
|
||||
jest: 29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3))
|
||||
js-yaml: 4.1.0
|
||||
mammoth: 1.9.0
|
||||
prettier: 3.2.5
|
||||
ts-jest: 29.1.2(@babel/core@7.23.9)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.23.9))(esbuild@0.24.2)(jest@29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3)))(typescript@5.6.2)
|
||||
ts-node: 10.9.2(@types/node@20.11.19)(typescript@5.6.2)
|
||||
@ -20760,7 +20763,7 @@ snapshots:
|
||||
eslint-plugin-promise: 6.1.1(eslint@8.56.0)
|
||||
express: 4.21.2
|
||||
jest: 29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3))
|
||||
mammoth: 1.8.0
|
||||
mammoth: 1.9.0
|
||||
prettier: 3.2.5
|
||||
puppeteer: 22.14.0(bufferutil@4.0.8)(typescript@5.3.3)(utf-8-validate@6.0.4)
|
||||
ts-jest: 29.1.2(@babel/core@7.23.9)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.23.9))(esbuild@0.24.2)(jest@29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3)))(typescript@5.3.3)
|
||||
@ -21533,7 +21536,7 @@ snapshots:
|
||||
eslint-plugin-promise: 6.1.1(eslint@8.56.0)
|
||||
htmlparser2: 9.1.0
|
||||
jest: 29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3))
|
||||
mammoth: 1.8.0
|
||||
mammoth: 1.9.0
|
||||
prettier: 3.2.5
|
||||
ts-jest: 29.1.2(@babel/core@7.23.9)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.23.9))(esbuild@0.24.2)(jest@29.7.0(@types/node@20.11.19)(ts-node@10.9.2(@types/node@20.11.19)(typescript@5.3.3)))(typescript@5.3.3)
|
||||
ts-node: 10.9.2(@types/node@20.11.19)(typescript@5.3.3)
|
||||
@ -21842,7 +21845,7 @@ snapshots:
|
||||
jimp: 0.16.13
|
||||
jwt-simple: 0.5.6
|
||||
libphonenumber-js: 1.10.56
|
||||
mammoth: 1.8.0
|
||||
mammoth: 1.9.0
|
||||
mime-types: 2.1.35
|
||||
morgan: 1.10.0
|
||||
node-loader: 2.0.0(webpack@5.97.1)
|
||||
@ -32113,7 +32116,7 @@ snapshots:
|
||||
dependencies:
|
||||
js-tokens: 4.0.0
|
||||
|
||||
lop@0.4.1:
|
||||
lop@0.4.2:
|
||||
dependencies:
|
||||
duck: 0.1.12
|
||||
option: 0.2.4
|
||||
@ -32185,7 +32188,7 @@ snapshots:
|
||||
dependencies:
|
||||
tmpl: 1.0.5
|
||||
|
||||
mammoth@1.8.0:
|
||||
mammoth@1.9.0:
|
||||
dependencies:
|
||||
'@xmldom/xmldom': 0.8.10
|
||||
argparse: 1.0.10
|
||||
@ -32193,7 +32196,7 @@ snapshots:
|
||||
bluebird: 3.4.7
|
||||
dingbat-to-unicode: 1.0.1
|
||||
jszip: 3.10.1
|
||||
lop: 0.4.1
|
||||
lop: 0.4.2
|
||||
path-is-absolute: 1.0.1
|
||||
underscore: 1.13.7
|
||||
xmlbuilder: 10.1.1
|
||||
|
@ -63,7 +63,7 @@
|
||||
"domhandler": "^5.0.3",
|
||||
"domutils": "^3.1.0",
|
||||
"htmlparser2": "^9.0.0",
|
||||
"mammoth": "^1.6.0",
|
||||
"mammoth": "^1.9.0",
|
||||
"docx4js": "^3.2.20",
|
||||
"zod": "^3.22.4"
|
||||
}
|
||||
|
@ -47,13 +47,16 @@
|
||||
"eslint-plugin-import": "^2.26.0",
|
||||
"eslint-plugin-n": "^15.4.0",
|
||||
"eslint-plugin-promise": "^6.1.1",
|
||||
"prettier": "^3.1.0"
|
||||
"prettier": "^3.1.0",
|
||||
"@types/js-yaml": "^4.0.9"
|
||||
},
|
||||
"dependencies": {
|
||||
"@hcengineering/core": "^0.6.32",
|
||||
"@hcengineering/platform": "^0.6.11",
|
||||
"@hcengineering/server-client": "^0.6.0",
|
||||
"@hcengineering/importer": "^0.6.1",
|
||||
"commander": "^8.1.0"
|
||||
"commander": "^8.1.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"mammoth": "^1.9.0"
|
||||
}
|
||||
}
|
||||
|
@ -13,6 +13,18 @@
|
||||
// limitations under the License.
|
||||
//
|
||||
import { concatLink, TxOperations } from '@hcengineering/core'
|
||||
import {
|
||||
ClickupImporter,
|
||||
defaultDocumentPreprocessors,
|
||||
DocumentConverter,
|
||||
FrontFileUploader,
|
||||
importNotion,
|
||||
UnifiedFormatImporter,
|
||||
type DocumentConverterOptions,
|
||||
type FileUploader,
|
||||
type Logger
|
||||
} from '@hcengineering/importer'
|
||||
import { setMetadata } from '@hcengineering/platform'
|
||||
import serverClientPlugin, {
|
||||
createClient,
|
||||
getUserWorkspaces,
|
||||
@ -20,15 +32,10 @@ import serverClientPlugin, {
|
||||
selectWorkspace
|
||||
} from '@hcengineering/server-client'
|
||||
import { program } from 'commander'
|
||||
import { setMetadata } from '@hcengineering/platform'
|
||||
import {
|
||||
UnifiedFormatImporter,
|
||||
ClickupImporter,
|
||||
importNotion,
|
||||
FrontFileUploader,
|
||||
type FileUploader,
|
||||
type Logger
|
||||
} from '@hcengineering/importer'
|
||||
import { readFileSync } from 'fs'
|
||||
import * as yaml from 'js-yaml'
|
||||
import mammoth from 'mammoth'
|
||||
import { join } from 'path'
|
||||
|
||||
class ConsoleLogger implements Logger {
|
||||
log (msg: string, data?: any): void {
|
||||
@ -165,5 +172,38 @@ export function importTool (): void {
|
||||
})
|
||||
})
|
||||
|
||||
program
|
||||
.command('convert-qms-docx <dir>')
|
||||
.requiredOption('-o, --out <dir>', 'out')
|
||||
.option('-c, --config <file>', 'configPath')
|
||||
.description('convert QMS document into Unified Huly Format')
|
||||
.action(async (dir: string, cmd) => {
|
||||
const { out, configPath } = cmd
|
||||
const configSearchPath = configPath ?? join(dir, 'import.yaml')
|
||||
|
||||
let config: DocumentConverterOptions
|
||||
try {
|
||||
const configYaml = readFileSync(configSearchPath, 'utf-8')
|
||||
const configFromFile = yaml.load(configYaml) as DocumentConverterOptions
|
||||
config = { ...configFromFile, outputPath: out }
|
||||
} catch (e: any) {
|
||||
console.error(`Unable to load config file from ${configSearchPath}: ${e}`)
|
||||
return
|
||||
}
|
||||
|
||||
config.steps = [
|
||||
{ name: '_extractImages' },
|
||||
{ name: '_cleanupMarkup' },
|
||||
...config.steps,
|
||||
{ name: '_addStubHeader' }
|
||||
]
|
||||
|
||||
config.htmlConverter = async (path) => (await mammoth.convertToHtml({ path })).value
|
||||
|
||||
const converter = new DocumentConverter(config, defaultDocumentPreprocessors)
|
||||
await converter.processFolder(dir)
|
||||
await converter.flush()
|
||||
})
|
||||
|
||||
program.parse(process.argv)
|
||||
}
|
||||
|
127
packages/importer/src/docx/docx.ts
Normal file
127
packages/importer/src/docx/docx.ts
Normal file
@ -0,0 +1,127 @@
|
||||
//
|
||||
// Copyright © 2025 Hardcore Engineering Inc.
|
||||
//
|
||||
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License. You may
|
||||
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
//
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
import { defaultExtensions, htmlToJSON, MarkupNode, serializeMessage } from '@hcengineering/text'
|
||||
import { mkdir, readdir, readFile, writeFile } from 'fs/promises'
|
||||
import * as yaml from 'js-yaml'
|
||||
import { basename, dirname, extname, join, relative } from 'path'
|
||||
import { UnifiedControlledDocumentHeader, UnifiedDocumentTemplateHeader } from '../huly/unified'
|
||||
|
||||
export interface DocumentConverterOptions {
|
||||
outputPath: string
|
||||
owner: string
|
||||
steps: DocumentPreprocessorOptions<any>[]
|
||||
htmlConverter: (path: string) => Promise<string>
|
||||
}
|
||||
|
||||
export interface DocumentState {
|
||||
name: string
|
||||
path: string
|
||||
root: string
|
||||
markup: MarkupNode
|
||||
header?: UnifiedControlledDocumentHeader | UnifiedDocumentTemplateHeader
|
||||
}
|
||||
|
||||
export interface DocumentPreprocessorOptions<T> {
|
||||
name: string
|
||||
options?: T
|
||||
}
|
||||
|
||||
export type DocumentPreprocessor = (document: DocumentState) => DocumentState | undefined
|
||||
export type DocumentPreprocessorSpec<T> = (converter: DocumentConverter, options?: T) => DocumentPreprocessor
|
||||
|
||||
export class DocumentConverter {
|
||||
documents = new Map<string, DocumentState>()
|
||||
output = new Map<string, Buffer | string>()
|
||||
preprocessors: DocumentPreprocessor[]
|
||||
|
||||
options: DocumentConverterOptions
|
||||
|
||||
constructor (options: DocumentConverterOptions, specs: Record<string, DocumentPreprocessorSpec<any>>) {
|
||||
this.options = options
|
||||
this.preprocessors = []
|
||||
|
||||
for (const step of options.steps) {
|
||||
const spec = specs[step.name]
|
||||
if (spec === undefined) {
|
||||
throw new Error(`Unknown step: ${step.name}`)
|
||||
}
|
||||
this.preprocessors.push(spec(this, step.options))
|
||||
}
|
||||
}
|
||||
|
||||
async processFolder (root: string): Promise<void> {
|
||||
const files = await scanFiles(root)
|
||||
for (const path of files) {
|
||||
const ext = extname(path)
|
||||
if (ext === '.docx') await this.processDocument(path, root)
|
||||
else if (ext === '.md') this.addOutputFile(relative(root, path), await readFile(path, 'utf-8'))
|
||||
}
|
||||
}
|
||||
|
||||
async processDocument (path: string, root: string): Promise<void> {
|
||||
const htmlString = await this.options.htmlConverter(path)
|
||||
const markup = htmlToJSON(htmlString, defaultExtensions)
|
||||
|
||||
let document: DocumentState = {
|
||||
name: fileNameNoExt(path),
|
||||
path,
|
||||
root,
|
||||
markup
|
||||
}
|
||||
|
||||
for (const processor of this.preprocessors) {
|
||||
document = processor(document) ?? document
|
||||
}
|
||||
|
||||
this.documents.set(path, document)
|
||||
|
||||
const content = compileMarkdown(document)
|
||||
this.addOutputFile(join(relative(root, dirname(path)), fileNameNoExt(path)) + '.md', content)
|
||||
}
|
||||
|
||||
addOutputFile (rel: string, content: string | Buffer): void {
|
||||
this.output.set(join(this.options.outputPath, rel), content)
|
||||
}
|
||||
|
||||
async flush (): Promise<void> {
|
||||
for (const [path, content] of this.output) {
|
||||
await mkdir(dirname(path), { recursive: true })
|
||||
await writeFile(path, content as any)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function compileMarkdown (file: DocumentState): string {
|
||||
const markdown = serializeMessage(file.markup, 'ref://', '')
|
||||
|
||||
const headerYaml = yaml.dump(file.header)
|
||||
const headerString = '---\n' + headerYaml + '---\n'
|
||||
|
||||
const finalContent = headerString + markdown
|
||||
return finalContent
|
||||
}
|
||||
|
||||
function fileNameNoExt (path: string): string {
|
||||
const bname = basename(path)
|
||||
const ext = extname(path)
|
||||
return bname.slice(0, bname.length - ext.length)
|
||||
}
|
||||
|
||||
async function scanFiles (dir: string): Promise<string[]> {
|
||||
const filesAndDirs = await readdir(dir, { recursive: true, withFileTypes: true })
|
||||
const files = filesAndDirs.filter((file) => !file.isDirectory()).map((f) => join(f.path, f.name))
|
||||
return files
|
||||
}
|
126
packages/importer/src/docx/preprocessors.ts
Normal file
126
packages/importer/src/docx/preprocessors.ts
Normal file
@ -0,0 +1,126 @@
|
||||
//
|
||||
// Copyright © 2025 Hardcore Engineering Inc.
|
||||
//
|
||||
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License. You may
|
||||
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
//
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
import { AttrValue, MarkupNode, MarkupNodeType } from '@hcengineering/text'
|
||||
import { dirname, join, relative } from 'path'
|
||||
import { DocumentPreprocessorSpec, DocumentState } from './docx'
|
||||
import documents from '@hcengineering/controlled-documents'
|
||||
|
||||
const _addStubHeader: DocumentPreprocessorSpec<DocumentState['header']> = (converter, inputOptions) => {
|
||||
return (document) => {
|
||||
const options: DocumentState['header'] = inputOptions ?? {
|
||||
class: 'documents:class:ControlledDocument',
|
||||
title: document.name,
|
||||
template: documents.template.ProductChangeControl,
|
||||
author: converter.options.owner,
|
||||
owner: converter.options.owner
|
||||
}
|
||||
const header = document.header ?? options
|
||||
return { ...document, header }
|
||||
}
|
||||
}
|
||||
|
||||
interface ExtractImagesOptions {
|
||||
folder?: string
|
||||
extensions?: Record<string, string>
|
||||
}
|
||||
|
||||
const _extractImages: DocumentPreprocessorSpec<ExtractImagesOptions> = (converter, inputOptions) => {
|
||||
const options = {
|
||||
folder: 'files',
|
||||
extensions: {
|
||||
'image/jpeg': '.jpeg',
|
||||
'image/jpg': '.jpeg',
|
||||
'image/png': '.png'
|
||||
},
|
||||
...inputOptions
|
||||
}
|
||||
|
||||
let imageCount = 0
|
||||
interface Image {
|
||||
extension: string
|
||||
buffer: Buffer
|
||||
}
|
||||
|
||||
const extractBase64Image = (imageContent: AttrValue): Image | undefined => {
|
||||
if (typeof imageContent !== 'string' || !imageContent.startsWith('data:')) {
|
||||
return
|
||||
}
|
||||
|
||||
const buffer = Buffer.from(imageContent.split(',')[1], 'base64')
|
||||
const type = imageContent.split(';')[0].split(':')[1]
|
||||
|
||||
const extension = options.extensions[type]
|
||||
if (extension === undefined) {
|
||||
return
|
||||
}
|
||||
|
||||
return { buffer, extension }
|
||||
}
|
||||
|
||||
const transformImage = (dir: string, node: MarkupNode): MarkupNode => {
|
||||
if (node.type !== MarkupNodeType.image) {
|
||||
return node
|
||||
}
|
||||
|
||||
const image = extractBase64Image(node.attrs?.src ?? '')
|
||||
if (image === undefined) {
|
||||
return node
|
||||
}
|
||||
|
||||
imageCount++
|
||||
const path = join(options.folder, 'image_' + imageCount + image.extension)
|
||||
|
||||
node = { ...node, attrs: { ...node.attrs, src: relative(dir, path) } }
|
||||
converter.addOutputFile(path, image.buffer)
|
||||
|
||||
return node
|
||||
}
|
||||
|
||||
return (document) => {
|
||||
const dir = relative(document.root, dirname(document.path))
|
||||
const markup = transformMarkupRecursive(document.markup, (node) => transformImage(dir, node))
|
||||
return { ...document, markup }
|
||||
}
|
||||
}
|
||||
|
||||
const _cleanupMarkup: DocumentPreprocessorSpec<any> = (converter) => {
|
||||
const transform = (node: MarkupNode): MarkupNode => {
|
||||
if (node.type === MarkupNodeType.table_header) {
|
||||
node = { ...node, type: MarkupNodeType.table_cell }
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
return (document) => {
|
||||
const markup = transformMarkupRecursive(document.markup, transform)
|
||||
return { ...document, markup }
|
||||
}
|
||||
}
|
||||
|
||||
export const defaultDocumentPreprocessors = {
|
||||
_addStubHeader,
|
||||
_extractImages,
|
||||
_cleanupMarkup
|
||||
}
|
||||
|
||||
function transformMarkupRecursive (node: MarkupNode, transformer: (node: MarkupNode) => MarkupNode): MarkupNode {
|
||||
let content = node.content
|
||||
if (content !== undefined) {
|
||||
content = content.map((node) => transformMarkupRecursive(node, transformer))
|
||||
node = { ...node, content }
|
||||
}
|
||||
return transformer(node)
|
||||
}
|
@ -50,13 +50,13 @@ import documents, {
|
||||
DocumentMeta
|
||||
} from '@hcengineering/controlled-documents'
|
||||
|
||||
interface UnifiedComment {
|
||||
export interface UnifiedComment {
|
||||
author: string
|
||||
text: string
|
||||
attachments?: string[]
|
||||
}
|
||||
|
||||
interface UnifiedIssueHeader {
|
||||
export interface UnifiedIssueHeader {
|
||||
class: 'tracker:class:Issue'
|
||||
title: string
|
||||
status: string
|
||||
@ -67,7 +67,7 @@ interface UnifiedIssueHeader {
|
||||
comments?: UnifiedComment[]
|
||||
}
|
||||
|
||||
interface UnifiedSpaceSettings {
|
||||
export interface UnifiedSpaceSettings {
|
||||
class: 'tracker:class:Project' | 'document:class:Teamspace' | 'documents:class:OrgSpace'
|
||||
title: string
|
||||
private?: boolean
|
||||
@ -79,7 +79,7 @@ interface UnifiedSpaceSettings {
|
||||
emoji?: string
|
||||
}
|
||||
|
||||
interface UnifiedProjectSettings extends UnifiedSpaceSettings {
|
||||
export interface UnifiedProjectSettings extends UnifiedSpaceSettings {
|
||||
class: 'tracker:class:Project'
|
||||
identifier: string
|
||||
id?: 'tracker:project:DefaultProject'
|
||||
@ -87,16 +87,16 @@ interface UnifiedProjectSettings extends UnifiedSpaceSettings {
|
||||
defaultIssueStatus?: string
|
||||
}
|
||||
|
||||
interface UnifiedTeamspaceSettings extends UnifiedSpaceSettings {
|
||||
export interface UnifiedTeamspaceSettings extends UnifiedSpaceSettings {
|
||||
class: 'document:class:Teamspace'
|
||||
}
|
||||
|
||||
interface UnifiedDocumentHeader {
|
||||
export interface UnifiedDocumentHeader {
|
||||
class: 'document:class:Document'
|
||||
title: string
|
||||
}
|
||||
|
||||
interface UnifiedWorkspaceSettings {
|
||||
export interface UnifiedWorkspaceSettings {
|
||||
projectTypes?: Array<{
|
||||
name: string
|
||||
taskTypes?: Array<{
|
||||
@ -110,13 +110,13 @@ interface UnifiedWorkspaceSettings {
|
||||
}>
|
||||
}
|
||||
|
||||
interface UnifiedChangeControlHeader {
|
||||
export interface UnifiedChangeControlHeader {
|
||||
description?: string
|
||||
reason?: string
|
||||
impact?: string
|
||||
}
|
||||
|
||||
interface UnifiedControlledDocumentHeader {
|
||||
export interface UnifiedControlledDocumentHeader {
|
||||
class: 'documents:class:ControlledDocument'
|
||||
title: string
|
||||
template: string
|
||||
@ -129,7 +129,7 @@ interface UnifiedControlledDocumentHeader {
|
||||
changeControl?: UnifiedChangeControlHeader
|
||||
}
|
||||
|
||||
interface UnifiedDocumentTemplateHeader {
|
||||
export interface UnifiedDocumentTemplateHeader {
|
||||
class: 'documents:mixin:DocumentTemplate'
|
||||
title: string
|
||||
category: string
|
||||
@ -143,7 +143,7 @@ interface UnifiedDocumentTemplateHeader {
|
||||
changeControl?: UnifiedChangeControlHeader
|
||||
}
|
||||
|
||||
interface UnifiedOrgSpaceSettings extends UnifiedSpaceSettings {
|
||||
export interface UnifiedOrgSpaceSettings extends UnifiedSpaceSettings {
|
||||
class: 'documents:class:OrgSpace'
|
||||
qualified?: string
|
||||
manager?: string
|
||||
|
@ -16,6 +16,8 @@
|
||||
export * from './huly/unified'
|
||||
export * from './clickup/clickup'
|
||||
export * from './notion/notion'
|
||||
export * from './docx/docx'
|
||||
export * from './docx/preprocessors'
|
||||
|
||||
export * from './importer/uploader'
|
||||
export * from './importer/storageUploader'
|
||||
|
@ -63,7 +63,7 @@
|
||||
"dotenv": "~16.0.0",
|
||||
"express": "^4.21.2",
|
||||
"puppeteer": "^22.6.1",
|
||||
"mammoth": "^1.6.0",
|
||||
"mammoth": "^1.9.0",
|
||||
"ws": "^8.18.0"
|
||||
}
|
||||
}
|
||||
|
@ -81,7 +81,7 @@
|
||||
"jimp": "^0.16.1",
|
||||
"jwt-simple": "^0.5.6",
|
||||
"libphonenumber-js": "^1.9.46",
|
||||
"mammoth": "^1.6.0",
|
||||
"mammoth": "^1.9.0",
|
||||
"mime-types": "~2.1.34",
|
||||
"pdfjs-dist": "2.12.313",
|
||||
"sharp": "~0.32.0",
|
||||
|
Loading…
Reference in New Issue
Block a user