mirror of
https://github.com/hcengineering/platform.git
synced 2025-04-22 00:10:37 +00:00
ezqms-1171: drop h4-h6 during import of controlled doc (#6487)
Signed-off-by: Alexey Zinoviev <alexey.zinoviev@xored.com>
This commit is contained in:
parent
3eb7d6e965
commit
b8b196ecd9
@ -1,5 +1,6 @@
|
|||||||
import { parseDocument } from 'htmlparser2'
|
import { parseDocument } from 'htmlparser2'
|
||||||
import { AnyNode, Document } from 'domhandler'
|
import { AnyNode, Document } from 'domhandler'
|
||||||
|
import { findAll } from 'domutils'
|
||||||
|
|
||||||
import { FileSpec, FileSpecType, TocFileSpec } from './types'
|
import { FileSpec, FileSpecType, TocFileSpec } from './types'
|
||||||
import { createMetadataExtractor } from './meta'
|
import { createMetadataExtractor } from './meta'
|
||||||
@ -62,6 +63,13 @@ class TocContentExtractor implements ContentExtractor {
|
|||||||
export async function extract (contents: string, spec: FileSpec, headerRoot?: AnyNode): Promise<ExtractedFile> {
|
export async function extract (contents: string, spec: FileSpec, headerRoot?: AnyNode): Promise<ExtractedFile> {
|
||||||
const extractor = new TocContentExtractor(spec)
|
const extractor = new TocContentExtractor(spec)
|
||||||
const doc = parseDocument(contents)
|
const doc = parseDocument(contents)
|
||||||
|
|
||||||
|
// We do not support headers > 3 so
|
||||||
|
// Traverse all Document's childrent and replace all h4-h6 with paragraphs
|
||||||
|
findAll((n) => ['h4', 'h5', 'h6'].includes(n.tagName), doc.childNodes).forEach((node) => {
|
||||||
|
node.name = 'p'
|
||||||
|
})
|
||||||
|
|
||||||
return extractor.extract(doc, headerRoot)
|
return extractor.extract(doc, headerRoot)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user