EZQMS-1145: Fixes doc import tool (#6204)

* EZQMS-1145: Fixes doc import tool
Signed-off-by: Alexey Zinoviev <alexey.zinoviev@xored.com>
This commit is contained in:
Alexey Zinoviev 2024-08-01 09:05:07 +04:00 committed by GitHub
parent 0bb4bb04ac
commit c55e6f00b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 346 additions and 60 deletions

View File

@ -1403,6 +1403,9 @@ dependencies:
diff2html:
specifier: ~3.4.35
version: 3.4.48
docx4js:
specifier: ^3.2.20
version: 3.2.20
domhandler:
specifier: ^5.0.3
version: 5.0.3
@ -11414,6 +11417,15 @@ packages:
- debug
dev: false
/cfb@0.12.1:
resolution: {integrity: sha512-cP+4A0tTqtyza5gJwNwDetZ8FPjl0gPLE7mIxGKyUzOS6HkM23WaAWW/l3t7jIQSMqVXroa09Ey0lo7gV8LNxw==}
engines: {node: '>=0.8'}
hasBin: true
dependencies:
commander: 2.11.0
printj: 1.1.2
dev: false
/chalk@2.4.2:
resolution: {integrity: sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==}
engines: {node: '>=4'}
@ -11436,6 +11448,28 @@ packages:
engines: {node: '>=10'}
dev: false
/cheerio@0.22.0:
resolution: {integrity: sha512-8/MzidM6G/TgRelkzDG13y3Y9LxBjCb+8yOEZ9+wwq5gVF2w2pV0wmHvjfT0RvuxGyR7UEuK36r+yYMbT4uKgA==}
engines: {node: '>= 0.6'}
dependencies:
css-select: 1.2.0
dom-serializer: 0.1.1
entities: 1.1.2
htmlparser2: 3.10.1
lodash.assignin: 4.2.0
lodash.bind: 4.2.1
lodash.defaults: 4.2.0
lodash.filter: 4.6.0
lodash.flatten: 4.4.0
lodash.foreach: 4.5.0
lodash.map: 4.6.0
lodash.merge: 4.6.2
lodash.pick: 4.4.0
lodash.reduce: 4.6.0
lodash.reject: 4.6.0
lodash.some: 4.6.0
dev: false
/chokidar@3.6.0:
resolution: {integrity: sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==}
engines: {node: '>= 8.10.0'}
@ -11686,6 +11720,10 @@ packages:
engines: {node: '>=14'}
dev: false
/commander@2.11.0:
resolution: {integrity: sha512-b0553uYA5YAEGgyYIGYROzKQ7X5RAqedkfjiZxwi0kL1g3bOaBNNZfYkzt/CL0umgD5wc9Jec2FbB98CjkMRvQ==}
dev: false
/commander@2.20.3:
resolution: {integrity: sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==}
dev: false
@ -12110,6 +12148,15 @@ packages:
webpack: 5.90.3(esbuild@0.20.1)(webpack-cli@5.1.4)
dev: false
/css-select@1.2.0:
resolution: {integrity: sha512-dUQOBoqdR7QwV90WysXPLXG5LO7nhYBgiWVfxF80DKPF8zx1t/pUd2FYy73emg3zrjtM6dzmYgbHKfV2rxiHQA==}
dependencies:
boolbase: 1.0.0
css-what: 2.1.3
domutils: 1.5.1
nth-check: 1.0.2
dev: false
/css-select@4.3.0:
resolution: {integrity: sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==}
dependencies:
@ -12136,6 +12183,10 @@ packages:
source-map-js: 1.0.2
dev: false
/css-what@2.1.3:
resolution: {integrity: sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg==}
dev: false
/css-what@6.1.0:
resolution: {integrity: sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==}
engines: {node: '>= 6'}
@ -12636,6 +12687,16 @@ packages:
esutils: 2.0.3
dev: false
/docx4js@3.2.20:
resolution: {integrity: sha512-u7kfMRYAHsczusgnrRAnZ0bXSF1HqiQmKtAI5LNIoQfrYXUHmaukcXnJR98KEFW3fSVdYKuLyHP9vKhrJKzetw==}
dependencies:
cfb: 0.12.1
cheerio: 0.22.0
color: 3.2.1
htmlparser2: 3.10.1
jszip: 2.7.0
dev: false
/dom-accessibility-api@0.5.16:
resolution: {integrity: sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==}
dev: false
@ -12646,6 +12707,20 @@ packages:
utila: 0.4.0
dev: false
/dom-serializer@0.1.1:
resolution: {integrity: sha512-l0IU0pPzLWSHBcieZbpOKgkIn3ts3vAh7ZuFyXNwJxJXk/c4Gwj9xaTJwIDVQCXawWD0qb3IzMGH5rglQaO0XA==}
dependencies:
domelementtype: 1.3.1
entities: 1.1.2
dev: false
/dom-serializer@0.2.2:
resolution: {integrity: sha512-2/xPb3ORsQ42nHYiSunXkDjPLBaEj/xTwUO4B7XCZQTRk7EBtTOPaygh10YAAh2OI1Qrp6NWfpAhzswj0ydt9g==}
dependencies:
domelementtype: 2.3.0
entities: 2.2.0
dev: false
/dom-serializer@1.4.1:
resolution: {integrity: sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag==}
dependencies:
@ -12709,6 +12784,20 @@ packages:
domelementtype: 2.3.0
dev: false
/domutils@1.5.1:
resolution: {integrity: sha512-gSu5Oi/I+3wDENBsOWBiRK1eoGxcywYSqg3rR960/+EfY0CF4EX1VPkgHOZ3WiS/Jg2DtliF6BhWcHlfpYUcGw==}
dependencies:
dom-serializer: 0.1.1
domelementtype: 1.3.1
dev: false
/domutils@1.7.0:
resolution: {integrity: sha512-Lgd2XcJ/NjEw+7tFvfKxOzCYKZsdct5lczQ2ZaQY8Djz7pfAD3Gbp8ySJWtreII/vDlMVmxwa6pHmdxIYgttDg==}
dependencies:
dom-serializer: 0.2.2
domelementtype: 1.3.1
dev: false
/domutils@2.8.0:
resolution: {integrity: sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==}
dependencies:
@ -13056,6 +13145,10 @@ packages:
strip-ansi: 6.0.1
dev: false
/entities@1.1.2:
resolution: {integrity: sha512-f2LZMYl1Fzu7YSBKg+RoROelpOaNrcGmE9AZubeDfrCEia483oW4MI4VyFd5VNHIgQ/7qm1I0wUHK1eJnn2y2w==}
dev: false
/entities@2.2.0:
resolution: {integrity: sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==}
dev: false
@ -15402,6 +15495,17 @@ packages:
entities: 2.2.0
dev: false
/htmlparser2@3.10.1:
resolution: {integrity: sha512-IgieNijUMbkDovyoKObU1DUhm1iwNYE/fuifEoEHfd1oZKZDaONBSkal7Y01shxsM49R4XaMdGez3WnF9UfiCQ==}
dependencies:
domelementtype: 1.3.1
domhandler: 2.4.2
domutils: 1.7.0
entities: 1.1.2
inherits: 2.0.4
readable-stream: 3.6.2
dev: false
/htmlparser2@6.1.0:
resolution: {integrity: sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A==}
dependencies:
@ -16991,6 +17095,12 @@ packages:
object.values: 1.2.0
dev: false
/jszip@2.7.0:
resolution: {integrity: sha512-JIsRKRVC3gTRo2vM4Wy9WBC3TRcfnIZU8k65Phi3izkvPH975FowRYtKGT6PxevA0XnJ/yO8b0QwV0ydVyQwfw==}
dependencies:
pako: 1.0.11
dev: false
/jszip@3.10.1:
resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==}
dependencies:
@ -17384,14 +17494,38 @@ packages:
p-locate: 6.0.0
dev: false
/lodash.assignin@4.2.0:
resolution: {integrity: sha512-yX/rx6d/UTVh7sSVWVSIMjfnz95evAgDFdb1ZozC35I9mSFCkmzptOzevxjgbQUsc78NR44LVHWjsoMQXy9FDg==}
dev: false
/lodash.bind@4.2.1:
resolution: {integrity: sha512-lxdsn7xxlCymgLYo1gGvVrfHmkjDiyqVv62FAeF2i5ta72BipE1SLxw8hPEPLhD4/247Ijw07UQH7Hq/chT5LA==}
dev: false
/lodash.debounce@4.0.8:
resolution: {integrity: sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==}
dev: false
/lodash.defaults@4.2.0:
resolution: {integrity: sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==}
dev: false
/lodash.escaperegexp@4.1.2:
resolution: {integrity: sha512-TM9YBvyC84ZxE3rgfefxUWiQKLilstD6k7PTGt6wfbtXF8ixIJLOL3VYyV/z+ZiPLsVxAsKAFVwWlWeb2Y8Yyw==}
dev: false
/lodash.filter@4.6.0:
resolution: {integrity: sha512-pXYUy7PR8BCLwX5mgJ/aNtyOvuJTdZAo9EQFUvMIYugqmJxnrYaANvTbgndOzHSCSR0wnlBBfRXJL5SbWxo3FQ==}
dev: false
/lodash.flatten@4.4.0:
resolution: {integrity: sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==}
dev: false
/lodash.foreach@4.5.0:
resolution: {integrity: sha512-aEXTF4d+m05rVOAUG3z4vZZ4xVexLKZGF0lIxuHZ1Hplpk/3B6Z1+/ICICYRLm7c41Z2xiejbkCkJoTlypoXhQ==}
dev: false
/lodash.includes@4.3.0:
resolution: {integrity: sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==}
dev: false
@ -17420,6 +17554,10 @@ packages:
resolution: {integrity: sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==}
dev: false
/lodash.map@4.6.0:
resolution: {integrity: sha512-worNHGKLDetmcEYDvh2stPCrrQRkP20E4l0iIS7F8EvzMqBBi7ltvFN5m1HvTf1P7Jk1txKhvFcmYsCr8O2F1Q==}
dev: false
/lodash.memoize@4.1.2:
resolution: {integrity: sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==}
dev: false
@ -17432,6 +17570,22 @@ packages:
resolution: {integrity: sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==}
dev: false
/lodash.pick@4.4.0:
resolution: {integrity: sha512-hXt6Ul/5yWjfklSGvLQl8vM//l3FtyHZeuelpzK6mm99pNvN9yTDruNZPEJZD1oWrqo+izBmB7oUfWgcCX7s4Q==}
dev: false
/lodash.reduce@4.6.0:
resolution: {integrity: sha512-6raRe2vxCYBhpBu+B+TtNGUzah+hQjVdu3E17wfusjyrXBka2nBS8OH/gjVZ5PvHOhWmIZTYri09Z6n/QfnNMw==}
dev: false
/lodash.reject@4.6.0:
resolution: {integrity: sha512-qkTuvgEzYdyhiJBx42YPzPo71R1aEr0z79kAv7Ixg8wPFEjgRgJdUsGMG3Hf3OYSF/kHI79XhNlt+5Ar6OzwxQ==}
dev: false
/lodash.some@4.6.0:
resolution: {integrity: sha512-j7MJE+TuT51q9ggt4fSgVqro163BEFjAt3u97IqU+JA2DkWl80nFTrowzLpZ/BnpN7rrl0JA/593NAdd8p/scQ==}
dev: false
/lodash@4.17.21:
resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==}
dev: false
@ -18271,6 +18425,12 @@ packages:
path-key: 4.0.0
dev: false
/nth-check@1.0.2:
resolution: {integrity: sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg==}
dependencies:
boolbase: 1.0.0
dev: false
/nth-check@2.1.1:
resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==}
dependencies:
@ -19358,6 +19518,12 @@ packages:
engines: {node: '>= 0.8'}
dev: false
/printj@1.1.2:
resolution: {integrity: sha512-zA2SmoLaxZyArQTOPj5LXecR+RagfPSU5Kw1qP+jkWeNlrq+eJZyY2oS68SU1Z/7/myXM4lo9716laOFAVStCQ==}
engines: {node: '>=0.8'}
hasBin: true
dev: false
/process-nextick-args@2.0.1:
resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==}
dev: false
@ -28764,7 +28930,7 @@ packages:
dev: false
file:projects/pod-calendar.tgz(bufferutil@4.0.8)(ts-node@10.9.2)(utf-8-validate@6.0.4):
resolution: {integrity: sha512-a3IO+LV4RLUrLCIwlzK6HfwU0q4QkepgRaQi4/ybG+He0Oqy/eWvZnHHXry0FjArSrZjr2QOnxOlYsipXv/zfg==, tarball: file:projects/pod-calendar.tgz}
resolution: {integrity: sha512-DOvMMTPpDOAuO8Vi8gEBTXBMQATYdZgtNWxvUyG0xbCArW9jEmKbrOVBV1ydZGOJqfqhennjv09+uWR/YUR+SQ==, tarball: file:projects/pod-calendar.tgz}
id: file:projects/pod-calendar.tgz
name: '@rush-temp/pod-calendar'
version: 0.0.0
@ -28994,7 +29160,7 @@ packages:
dev: false
file:projects/pod-gmail.tgz(bufferutil@4.0.8)(ts-node@10.9.2)(utf-8-validate@6.0.4):
resolution: {integrity: sha512-EgPrNV5SyE8PVkKfTc30asvvzcTaDEZnQ1krUOi8rIzQcgLuPIRgMwTc6yuqu3SAzC2zPrwudTMqzDPpwJTRxw==, tarball: file:projects/pod-gmail.tgz}
resolution: {integrity: sha512-ybpB+uVlKzWvEVZnrp6iPuDYR7OwWBEsHl3ivSmte8BhfH9Q5QfzBm/FrEUsyEBIN4KS/cGDUoNGv5Us5OVWVw==, tarball: file:projects/pod-gmail.tgz}
id: file:projects/pod-gmail.tgz
name: '@rush-temp/pod-gmail'
version: 0.0.0
@ -29056,7 +29222,7 @@ packages:
dev: false
file:projects/pod-love.tgz(bufferutil@4.0.8)(utf-8-validate@6.0.4):
resolution: {integrity: sha512-h64U5de7eDYWI5zbLFQtUEArWz+OVeQ3BWtCd2yy9Yw3uzgPOE+etmib6KtcC0MEcDSl3ZNN0UOrV1IDQzy+nQ==, tarball: file:projects/pod-love.tgz}
resolution: {integrity: sha512-g12q7ZxpvWr3/5nAZazdcKgw2rC4LVpWLPYs7qYtm4avIIGeu4FIT8WfV0E4wXPVdjdV4CX2WwZ7+1zSgDxYCA==, tarball: file:projects/pod-love.tgz}
id: file:projects/pod-love.tgz
name: '@rush-temp/pod-love'
version: 0.0.0
@ -29282,7 +29448,7 @@ packages:
dev: false
file:projects/pod-telegram.tgz(bufferutil@4.0.8)(ts-node@10.9.2)(utf-8-validate@6.0.4):
resolution: {integrity: sha512-qt/UwzvrmgaVI2yI5vdGPHaghv5JF+ysgUY9GwwarkzqlZNBScnUo262CYNWy26iPdhVqNbvGYPrAgVecGHBFg==, tarball: file:projects/pod-telegram.tgz}
resolution: {integrity: sha512-mBbMXZhgRl0R9gvFN+uxfdFHgXdF9cFyC200rXnS4PgGkGVm9jRmE8lJQTxiOOJyX8rPDm8qlLhCrlKyof+pEQ==, tarball: file:projects/pod-telegram.tgz}
id: file:projects/pod-telegram.tgz
name: '@rush-temp/pod-telegram'
version: 0.0.0
@ -29733,7 +29899,7 @@ packages:
dev: false
file:projects/qms-doc-import-tool.tgz:
resolution: {integrity: sha512-s2EDYV09exzo01lbGDlzMq9D0cx2OILHS8bqQCgntVVWEK9nQypMd4gi/nCbmVhyHRapQih7D81Nf6zuSUohJg==, tarball: file:projects/qms-doc-import-tool.tgz}
resolution: {integrity: sha512-jxeLHsk5jNj+ABvXoCiX9VTv7ovVKEdkwddcmwpdTGjt7hBgtYsirnVxkryV8tfWMs5y99wjroKGfwSqtyzzOg==, tarball: file:projects/qms-doc-import-tool.tgz}
name: '@rush-temp/qms-doc-import-tool'
version: 0.0.0
dependencies:
@ -29747,6 +29913,7 @@ packages:
'@typescript-eslint/parser': 6.21.0(eslint@8.56.0)(typescript@5.3.3)
commander: 8.3.0
cross-env: 7.0.3
docx4js: 3.2.20
domhandler: 5.0.3
domutils: 3.1.0
esbuild: 0.20.1

View File

@ -65,6 +65,7 @@
"form-data": "^4.0.0",
"htmlparser2": "^9.0.0",
"mammoth": "^1.6.0",
"docx4js": "^3.2.20",
"node-fetch": "^2.6.6",
"zod": "^3.22.4"
}

View File

@ -0,0 +1,8 @@
_Note: if vscode fails to resolve docx4js.d.ts types the following fragment need to be added to compilerOptions in tsconfig.json_
```
"typeRoots": [
"./src/type",
"./node_modules/@types"
],
```

View File

@ -14,6 +14,4 @@
//
import { docImportTool } from '.'
const productId = process.env.PRODUCT_ID ?? 'ezqms'
docImportTool(productId)
docImportTool()

View File

@ -1,5 +1,8 @@
import docx4js from 'docx4js'
import { AnyNode } from 'domhandler'
import extract from './extract/extract'
import { read } from './extract/types'
import { MetadataContainer, read } from './extract/types'
import importExtractedFile from './import'
import convert from './convert/convert'
import { Config } from './config'
@ -10,9 +13,15 @@ export async function importDoc (config: Config): Promise<void> {
const spec = await read(specFile)
console.log(`Spec: ${JSON.stringify(spec, undefined, 2)}`)
let headerRoot: AnyNode | undefined
if (spec.metadata.in === MetadataContainer.PageHeaderTableRow) {
const headerIdx = spec.metadata.headerIdx ?? 1
const docx = await docx4js.load(config.doc)
headerRoot = docx.getObjectPart(`word/header${headerIdx}.xml`).root()[0]
}
const contents = await convert(doc, backend)
const extractedFile = await extract(contents, spec)
// console.log(`Extracted data: ${JSON.stringify(extractedFile, undefined, 2)}`)
const extractedFile = await extract(contents, spec, headerRoot)
await importExtractedFile(config, extractedFile)
}

View File

@ -1,5 +1,5 @@
import { parseDocument } from 'htmlparser2'
import { Document } from 'domhandler'
import { AnyNode, Document } from 'domhandler'
import { FileSpec, FileSpecType, TocFileSpec } from './types'
import { createMetadataExtractor } from './meta'
@ -28,10 +28,10 @@ class TocContentExtractor implements ContentExtractor {
readonly type = FileSpecType.TOC
) {}
extract (doc: Document): ExtractedFile {
extract (doc: Document, headerRoot?: AnyNode): ExtractedFile {
const metadataExtractor = createMetadataExtractor(this.spec.metadata)
const title = metadataExtractor.extractName(doc)
const oldId = metadataExtractor.extractId(doc)
const title = metadataExtractor.extractName(doc, headerRoot)
const oldId = metadataExtractor.extractId(doc, headerRoot)
const docSpec = this.spec.spec
@ -59,10 +59,10 @@ class TocContentExtractor implements ContentExtractor {
* @public
* Extracts HTML file contents
*/
export async function extract (contents: string, spec: FileSpec): Promise<ExtractedFile> {
export async function extract (contents: string, spec: FileSpec, headerRoot?: AnyNode): Promise<ExtractedFile> {
const extractor = new TocContentExtractor(spec)
const doc = parseDocument(contents)
return extractor.extract(doc)
return extractor.extract(doc, headerRoot)
}
export default extract

View File

@ -1,8 +1,15 @@
import { Document, Element } from 'domhandler'
import { AnyNode, Document, Element, Text } from 'domhandler'
import { find } from 'domutils'
import { ElementType } from 'htmlparser2'
import { DocMetadataSpec, MetadataContainer, DocTableRowMetadata, DocMetaTagsMetadata } from './types'
import {
DocMetadataSpec,
MetadataContainer,
DocTableRowMetadata,
DocMetaTagsMetadata,
PageHeaderTableRowMetadata,
MetadataTableCell
} from './types'
import { ELEMENT_LIMIT } from './common'
import { TableNodeExtractor } from './nodes'
import { TableContainer } from './container'
@ -73,7 +80,37 @@ export class TableRowDocMetadataExtractor implements DocMetadataExtractor {
}
}
type AnyDocMetadataExtractor = MetaTagsDocMetadataExtractor | TableRowDocMetadataExtractor
const maxElems = 10000
export class PageHeaderTableRowDocMetadataExtractor implements DocMetadataExtractor {
constructor (readonly tableMetadata: PageHeaderTableRowMetadata) {}
private getCellText (meta: MetadataTableCell, headerRoot?: AnyNode): string {
if (headerRoot === undefined) {
return ''
}
const rows = find((n) => n.type === ElementType.Tag && n.name === 'w:tr', [headerRoot], true, maxElems)
const { row, col, slice } = meta.extract
const cell = find((n) => n.type === ElementType.Tag && n.name === 'w:tc', [rows[row]], true, maxElems)[col]
const textNodes = find((n) => n.type === ElementType.Text, [cell], true, maxElems) as Text[]
const text = textNodes.map((n) => n.data).join('')
return slice === undefined ? text : text.slice(slice.start, slice.end)
}
extractName (doc: Document, headerRoot?: AnyNode): string {
return this.getCellText(this.tableMetadata.docName, headerRoot)
}
extractId (doc: Document, headerRoot?: AnyNode): string {
return this.getCellText(this.tableMetadata.docId, headerRoot)
}
}
type AnyDocMetadataExtractor =
| MetaTagsDocMetadataExtractor
| TableRowDocMetadataExtractor
| PageHeaderTableRowDocMetadataExtractor
export function createMetadataExtractor (metadata: DocMetadataSpec): AnyDocMetadataExtractor {
switch (metadata.in) {
@ -81,5 +118,7 @@ export function createMetadataExtractor (metadata: DocMetadataSpec): AnyDocMetad
return new MetaTagsDocMetadataExtractor(metadata)
case MetadataContainer.TableRow:
return new TableRowDocMetadataExtractor(metadata)
case MetadataContainer.PageHeaderTableRow:
return new PageHeaderTableRowDocMetadataExtractor(metadata)
}
}

View File

@ -103,7 +103,7 @@ export class TableNodeExtractor implements NodeExtractor {
private parseRows (table: AnyDomNode): AnyNode[][] {
const header = findOne((n) => n.tagName === 'thead', [table])
const body = findOne((n) => n.tagName === 'tbody', [table])
const bodyRows =
let bodyRows =
body != null
? getChildren(body).filter((n) => clean(innerText(n)) !== '')
: findAll((n) => n.tagName === 'tr' && clean(innerText(n)) !== '', [table])
@ -111,14 +111,28 @@ export class TableNodeExtractor implements NodeExtractor {
if (header != null) {
const firstRow = findOne((n) => n.tagName === 'tr', [header])
if (bodyRows.length > 0) {
if (getChildren(bodyRows[0]).find((n) => n.type === ElementType.Tag && n.tagName === 'th') != null) {
bodyRows = bodyRows.slice(1)
}
}
return [
findAll((n) => n.tagName === 'th', firstRow != null ? [firstRow] : []),
...bodyRows.map((r) => getChildren(r).filter((n) => n.type === ElementType.Tag && n.tagName === 'td'))
...bodyRows.map((r) =>
getChildren(r).filter((n) => n.type === ElementType.Tag && (n.tagName === 'td' || n.tagName === 'th'))
)
]
} else if (bodyRows.length > 0) {
return [
getChildren(bodyRows[0]).filter((n) => n.type === ElementType.Tag && n.tagName === 'td'),
...bodyRows.slice(1).map((r) => getChildren(r).filter((n) => n.type === ElementType.Tag && n.tagName === 'td'))
getChildren(bodyRows[0]).filter(
(n) => n.type === ElementType.Tag && (n.tagName === 'td' || n.tagName === 'th')
),
...bodyRows
.slice(1)
.map((r) =>
getChildren(r).filter((n) => n.type === ElementType.Tag && (n.tagName === 'td' || n.tagName === 'th'))
)
]
}

View File

@ -114,7 +114,8 @@ export type TocSectionSpec = z.infer<typeof tocSection>
export enum MetadataContainer {
MetaTags = 'meta-tags',
TableRow = 'table-row'
TableRow = 'table-row',
PageHeaderTableRow = 'page-header-table-row'
}
const metaTagsMetadata = z.object({
@ -126,9 +127,16 @@ const metaTagsMetadata = z.object({
const metadataTableCell = z.object({
extract: z.object({
row: z.number().min(0),
col: z.number().min(0)
col: z.number().min(0),
slice: z
.object({
start: z.number().min(0).optional(),
end: z.number().min(0).optional()
})
.optional()
})
})
export type MetadataTableCell = z.infer<typeof metadataTableCell>
const tableRowMetadata = z.object({
in: z.literal(MetadataContainer.TableRow),
@ -137,11 +145,19 @@ const tableRowMetadata = z.object({
docId: metadataTableCell
})
const docMetadata = z.union([metaTagsMetadata, tableRowMetadata])
const pageHeaderTableRowMetadata = z.object({
in: z.literal(MetadataContainer.PageHeaderTableRow),
headerIdx: z.number().min(1).optional(),
docName: metadataTableCell,
docId: metadataTableCell
})
const docMetadata = z.union([metaTagsMetadata, tableRowMetadata, pageHeaderTableRowMetadata])
export type DocMetadataSpec = z.infer<typeof docMetadata>
export type DocMetaTagsMetadata = z.infer<typeof metaTagsMetadata>
export type DocTableRowMetadata = z.infer<typeof tableRowMetadata>
export type PageHeaderTableRowMetadata = z.infer<typeof pageHeaderTableRowMetadata>
// #endregion

View File

@ -41,18 +41,13 @@ import { compareStrExact, uploadFile } from './helpers'
export default async function importExtractedFile (config: Config, extractedFile: ExtractedFile): Promise<void> {
const { workspaceId } = config
const token = generateToken(systemAccountEmail, workspaceId)
const transactorUrl = await getTransactorEndpoint(token)
const transactorUrl = await getTransactorEndpoint(token, 'external')
console.log(`Connecting to transactor: ${transactorUrl} (ws: '${workspaceId.name}')`)
const connection = (await createClient(transactorUrl, token)) as CoreClient & BackupClient
try {
console.log(`Connected to ${transactorUrl}`)
const txops = new TxOperations(connection, core.account.System)
try {
@ -73,17 +68,18 @@ async function createDocument (
config: Config
): Promise<Ref<Document>> {
const { owner, space } = config
console.log('Creating document from extracted data')
const templateId = await createTemplateIfNotExist(txops, extractedFile.prefix, config)
const { title, prefix } = extractedFile
const { title, prefix, oldId } = extractedFile
const docId: Ref<ControlledDocument> = generateId()
const ccRecordId = generateId<ChangeControl>()
const data: AttachedData<ControlledDocument> = {
title,
prefix,
code: '',
code: oldId,
seqNumber: 0,
major: 0,
minor: 1,
@ -95,7 +91,7 @@ async function createDocument (
reviewers: [],
approvers: [],
coAuthors: [],
changeControl: '' as Ref<ChangeControl>,
changeControl: ccRecordId,
author: owner,
owner,
category: '' as Ref<DocumentCategory>,
@ -103,13 +99,13 @@ async function createDocument (
effectiveDate: 0,
reviewInterval: DEFAULT_PERIODIC_REVIEW_INTERVAL,
content: getCollaborativeDoc(generateId()),
snapshots: 0
snapshots: 0,
plannedEffectiveDate: 0
}
const ccRecordId = generateId<ChangeControl>()
const ccRecord: Data<ChangeControl> = {
description: '',
reason: '', // TODO: move to config
reason: 'Imported document', // TODO: move to config
impact: '',
impactedDocuments: []
}
@ -135,29 +131,28 @@ async function createTemplateIfNotExist (
): Promise<Ref<DocumentTemplate>> {
const { owner, space } = config
console.log(`Getting template ${prefix}`)
console.log(`Getting template with doc ${prefix}`)
const template = await txops.findOne(documents.mixin.DocumentTemplate, { prefix })
const template = await txops.findOne(documents.mixin.DocumentTemplate, { docPrefix: prefix })
if (template != null) {
return template._id
}
console.log(`Creating template with prefix: ${prefix}`)
console.log(`Creating template with doc prefix: ${prefix}`)
const ccRecordId = generateId<ChangeControl>()
const ccRecord: Data<ChangeControl> = {
description: '',
reason: '', // TODO: move to config
reason: 'Imported template', // TODO: move to config
impact: '',
impactedDocuments: []
}
const templateId: Ref<ControlledDocument> = generateId()
const category = '' as Ref<DocumentCategory> // TODO: move to config
const data: AttachedData<ControlledDocument> = {
prefix: 'IMP',
const data = {
title: 'Import template',
code: templateId,
code: '',
seqNumber: 0,
sections: 0,
category,
@ -172,12 +167,13 @@ async function createTemplateIfNotExist (
coAuthors: [],
changeControl: ccRecordId,
content: getCollaborativeDoc(generateId()),
snapshots: 0
snapshots: 0,
plannedEffectiveDate: 0
}
const { success } = await createDocumentTemplate(
txops,
documents.class.Document,
documents.class.ControlledDocument,
space,
documents.mixin.DocumentTemplate,
documents.ids.NoProject,

View File

@ -28,16 +28,16 @@ import { getBackend } from './convert/convert'
/**
* @public
*/
export function docImportTool (productId: string): void {
export function docImportTool (): void {
const serverSecret = process.env.SERVER_SECRET
if (serverSecret === undefined) {
console.error('please provide server secret')
process.exit(1)
}
const accountUrl = process.env.ACCOUNT_URL
const accountUrl = process.env.ACCOUNTS_URL
if (accountUrl === undefined) {
console.error('please provide transactor url')
console.error('please provide account url')
process.exit(1)
}
@ -64,7 +64,7 @@ export function docImportTool (productId: string): void {
program
.command('import <doc> <workspace> <owner>')
.description('import doc into workspace')
.option('-s|--spec <specFile>', 'Specification file')
.option('-s|--spec <spec>', 'Specification file')
.option('-b|--backend <backend>', 'Conversion backend', 'pandoc')
.option('--space <space>', 'Doc space ID', documents.space.QualityDocuments)
.action(
@ -72,22 +72,22 @@ export function docImportTool (productId: string): void {
doc: string,
workspace: string,
owner: Ref<Employee>,
cmd: { backend: string, space: Ref<DocumentSpace>, specFile?: string }
cmd: { backend: string, space: Ref<DocumentSpace>, spec?: string }
) => {
console.log(
`Importing document '${doc}' into workspace '${workspace}', owner: ${JSON.stringify(owner)}, spec: ${
cmd.specFile
cmd.spec
}, space: ${cmd.space}, backend: ${cmd.backend}`
)
try {
const workspaceId = getWorkspaceId(workspace, productId)
const workspaceId = getWorkspaceId(workspace)
const config: Config = {
doc,
workspaceId,
owner,
backend: getBackend(cmd.backend),
specFile: cmd.specFile,
specFile: cmd.spec,
space: cmd.space,
uploadURL: uploadUrl,
collaboratorApiURL: collaboratorApiUrl,

View File

@ -0,0 +1,3 @@
declare module 'docx4js' {
export = any
}

View File

@ -0,0 +1,34 @@
{
"prefix": "SOP5",
"type": "toc",
"metadata": {
"in": "page-header-table-row",
"headerIdx": 2,
"docName": {
"extract": { "row": 2, "col": 1 }
},
"docId": {
"extract": { "row": 2, "col": 0 }
}
},
"spec": {
"toc": {
"type": "toc",
"node": {
"type": "toc-paragraph-seq",
"params": {
"sectionHeaders": {
"tags": ["h1"]
},
"start": {
"patterns": ["Table of contents", "Table des matières", "CONTENTS"],
"tags": ["h1", "h2", "h3", "p"]
},
"end": {
"tags": ["h1"]
}
}
}
}
}
}

View File

@ -148,7 +148,6 @@ async function createProductChangeControlTemplate (tx: TxOperations): Promise<vo
approvers: [],
coAuthors: [],
code: `TMPL-${seq.sequence + 1}`,
prefix: '',
seqNumber: 0,
major: 0,
minor: 1,

View File

@ -206,7 +206,7 @@ export async function createDocumentTemplate (
parent: Ref<ProjectDocument> | undefined,
templateId: Ref<ControlledDocument>,
prefix: string,
spec: AttachedData<ControlledDocument>,
spec: Omit<AttachedData<ControlledDocument>, 'prefix'>,
category: Ref<DocumentCategory>,
author?: Ref<Employee>,
defaultSection?: { title: string }
@ -224,6 +224,7 @@ export async function createDocumentTemplate (
)
const seqNumber = (incResult as any).object.sequence as number
const collaborativeDocId = getCollaborativeDocForDocument('TPL-DOC', seqNumber, 0, 1)
const code = spec.code === '' ? `${TEMPLATE_PREFIX}-${seqNumber}` : spec.code
let path: Array<Ref<DocumentMeta>> = []
@ -239,7 +240,7 @@ export async function createDocumentTemplate (
})
ops.notMatch(documents.class.Document, {
code: spec.code
code
})
ops.notMatch(documents.mixin.DocumentTemplate, {
@ -280,6 +281,7 @@ export async function createDocumentTemplate (
'documents',
{
...spec,
code,
seqNumber,
category,
prefix: TEMPLATE_PREFIX,