From c809de247d75cb251856b3e23e07769489ed8823 Mon Sep 17 00:00:00 2001 From: akhismat Date: Tue, 10 Sep 2024 00:32:51 +0700 Subject: [PATCH] Support import fom Notion (#6498) --- .vscode/launch.json | 25 ++ dev/tool/package.json | 1 + dev/tool/src/index.ts | 84 +++- dev/tool/src/notion.ts | 653 ++++++++++++++++++++++++++++++ packages/text/src/markup/model.ts | 1 + 5 files changed, 763 insertions(+), 1 deletion(-) create mode 100644 dev/tool/src/notion.ts diff --git a/.vscode/launch.json b/.vscode/launch.json index 17bdb15d4c..7f3d2c0ac7 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -4,6 +4,30 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { + "name": "Debug notion import", + "type": "node", + "request": "launch", + // "args": ["src/__start.ts", "import-notion-to-teamspace", "/home/anna/work/notion/natalya/Export-fad9ecb4-a1a5-4623-920d-df32dd423743", "-ws", "w-user1-ws1-66d8018b-ce1e0c3164-006bb0", "-ts", "notion"], + "args": ["src/__start.ts", "import-notion", "/home/anna/work/notion/natalya/Export-fad9ecb4-a1a5-4623-920d-df32dd423743", "-ws", "w-user1-ws1-66d8018b-ce1e0c3164-006bb0"], + "env": { + "SERVER_SECRET": "secret", + "MINIO_ACCESS_KEY": "minioadmin", + "MINIO_SECRET_KEY": "minioadmin", + "MINIO_ENDPOINT": "localhost", + "TRANSACTOR_URL": "ws://localhost:3333", + "MONGO_URL": "mongodb://localhost:27017", + "ACCOUNTS_URL": "http://localhost:3000", + "TELEGRAM_DATABASE": "telegram-service", + "ELASTIC_URL": "http://localhost:9200", + "REKONI_URL": "http://localhost:4004" + }, + "runtimeVersion": "20", + "runtimeArgs": ["--nolazy", "-r", "ts-node/register"], + "sourceMaps": true, + "outputCapture": "std", + "cwd": "${workspaceRoot}/dev/tool" + }, { "address": "127.0.0.1", "localRoot": "${workspaceFolder}", @@ -229,6 +253,7 @@ "ELASTIC_URL": "http://localhost:9200", "REKONI_URL": "http://localhost:4004" }, + "runtimeVersion": "20", "runtimeArgs": ["--nolazy", "-r", "ts-node/register"], "sourceMaps": true, "outputCapture": "std", diff --git a/dev/tool/package.json b/dev/tool/package.json index ca8e811c1c..700f121318 100644 --- a/dev/tool/package.json +++ b/dev/tool/package.json @@ -64,6 +64,7 @@ "@hcengineering/client-resources": "^0.6.27", "@hcengineering/contact": "^0.6.24", "@hcengineering/core": "^0.6.32", + "@hcengineering/document": "^0.6.0", "@hcengineering/elastic": "^0.6.0", "@hcengineering/lead": "^0.6.0", "@hcengineering/minio": "^0.6.0", diff --git a/dev/tool/src/index.ts b/dev/tool/src/index.ts index bca374fafe..30a53c1296 100644 --- a/dev/tool/src/index.ts +++ b/dev/tool/src/index.ts @@ -48,7 +48,7 @@ import { } from '@hcengineering/server-backup' import serverClientPlugin, { BlobClient, createClient, getTransactorEndpoint } from '@hcengineering/server-client' import serverToken, { decodeToken, generateToken } from '@hcengineering/server-token' -import toolPlugin, { FileModelLogger } from '@hcengineering/server-tool' +import toolPlugin, { connect, FileModelLogger } from '@hcengineering/server-tool' import path from 'path' import { buildStorageFromConfig, storageConfigFromEnv } from '@hcengineering/server-storage' @@ -63,7 +63,10 @@ import core, { MeasureMetricsContext, metricsToString, systemAccountEmail, + TxOperations, versionToString, + type WorkspaceIdWithUrl, + type Client as CoreClient, type Data, type Doc, type Ref, @@ -95,6 +98,7 @@ import { fixJsonMarkup, migrateMarkup } from './markup' import { fixMixinForeignAttributes, showMixinForeignAttributes } from './mixin' import { fixAccountEmails, renameAccount } from './renameAccount' import { moveFiles, syncFiles } from './storage' +import { importNotion, importToTeamspace } from './notion' const colorConstants = { colorRed: '\u001b[31m', @@ -202,6 +206,84 @@ export function devTool ( }) }) + // import-notion /home/anna/work/notion/pages/exported --workspace workspace + program + .command('import-notion ') + .description('import extracted archive exported from Notion as "Markdown & CSV"') + .requiredOption('-ws, --workspace ', 'workspace where the documents should be imported to') + .action(async (dir: string, cmd) => { + if (cmd.workspace === '') return + + const { mongodbUri } = prepareTools() + + await withDatabase(mongodbUri, async (db) => { + const ws = await getWorkspaceById(db, cmd.workspace) + if (ws === null) { + console.log('Workspace not found: ', cmd.workspace) + return + } + + const wsUrl: WorkspaceIdWithUrl = { + name: ws.workspace, + workspaceName: ws.workspaceName ?? '', + workspaceUrl: ws.workspaceUrl ?? '' + } + + await withStorage(mongodbUri, async (storageAdapter) => { + const token = generateToken(systemAccountEmail, { name: ws.workspace }) + const endpoint = await getTransactorEndpoint(token, 'external') + const connection = (await connect(endpoint, wsUrl, undefined, { + mode: 'backup' + })) as unknown as CoreClient + const client = new TxOperations(connection, core.account.System) + + await importNotion(toolCtx, client, storageAdapter, dir, wsUrl) + + await connection.close() + }) + }) + }) + + // import-notion-to-teamspace /home/anna/work/notion/pages/exported --workspace workspace --teamspace notion + program + .command('import-notion-to-teamspace ') + .description('import extracted archive exported from Notion as "Markdown & CSV"') + .requiredOption('-ws, --workspace ', 'workspace where the documents should be imported to') + .requiredOption('-ts, --teamspace ', 'teamspace where the documents should be imported to') + .action(async (dir: string, cmd) => { + if (cmd.workspace === '') return + if (cmd.teamspace === '') return + + const { mongodbUri } = prepareTools() + + await withDatabase(mongodbUri, async (db) => { + const ws = await getWorkspaceById(db, cmd.workspace) + if (ws === null) { + console.log('Workspace not found: ', cmd.workspace) + return + } + + const wsUrl: WorkspaceIdWithUrl = { + name: ws.workspace, + workspaceName: ws.workspaceName ?? '', + workspaceUrl: ws.workspaceUrl ?? '' + } + + await withStorage(mongodbUri, async (storageAdapter) => { + const token = generateToken(systemAccountEmail, { name: ws.workspace }) + const endpoint = await getTransactorEndpoint(token, 'external') + const connection = (await connect(endpoint, wsUrl, undefined, { + mode: 'backup' + })) as unknown as CoreClient + const client = new TxOperations(connection, core.account.System) + + await importToTeamspace(toolCtx, client, storageAdapter, dir, wsUrl, cmd.teamspace) + + await connection.close() + }) + }) + }) + program .command('reset-account ') .description('create user and corresponding account in master database') diff --git a/dev/tool/src/notion.ts b/dev/tool/src/notion.ts new file mode 100644 index 0000000000..c99c0f0bc7 --- /dev/null +++ b/dev/tool/src/notion.ts @@ -0,0 +1,653 @@ +import { + generateId, + type AttachedData, + type Ref, + type WorkspaceIdWithUrl, + makeCollaborativeDoc, + type MeasureMetricsContext, + type TxOperations, + type Blob +} from '@hcengineering/core' +import { saveCollaborativeDoc } from '@hcengineering/collaboration' +import document, { type Document, type Teamspace } from '@hcengineering/document' +import { type StorageAdapter } from '@hcengineering/server-core' +import { + MarkupMarkType, + type MarkupNode, + MarkupNodeType, + parseMessageMarkdown, + traverseNode, + traverseNodeMarks, + jsonToYDocNoSchema +} from '@hcengineering/text' + +import attachment from '@hcengineering/model-attachment' +import { type Attachment } from '@hcengineering/attachment' +import { contentType } from 'mime-types' +import core from '@hcengineering/model-core' +import { readdir, stat, readFile } from 'fs/promises' +import { type Dirent } from 'fs' +import { basename, join, parse } from 'path' + +interface DocumentMetadata { + id: string + name: string + notionId: string + notionSubRootId?: string + notionParentId?: string + mimeType?: string + size?: number +} + +interface FileMetadata { + isFolder: boolean + level: number + hasChildren: boolean + fileName: string + extension?: string +} + +const MD_EXTENSION = '.md' +const CSV_EXTENSION = '.csv' +const DEFAULT_ATTACHMENT_MIME_TYPE = 'application/octet-stream' + +enum NOTION_MD_LINK_TYPES { + INTERNAL_LINK, + EXTERNAL_LINK, + ATTACHMENT, + UNKNOWN +} + +export async function importNotion ( + ctx: MeasureMetricsContext, + client: TxOperations, + storage: StorageAdapter, + dir: string, + ws: WorkspaceIdWithUrl +): Promise { + const files = await getFilesForImport(dir) + + const fileMetaMap = new Map() + const documentMetaMap = new Map() + + await collectMetadata(dir, files, fileMetaMap, documentMetaMap) + console.log(fileMetaMap) + console.log(documentMetaMap) + + const spaceIdMap = await createTeamspaces(fileMetaMap, client) + if (spaceIdMap.size === 0) { + console.error('No teamspaces found in directory: ', dir) + return + } + + await importFiles(ctx, client, storage, fileMetaMap, documentMetaMap, spaceIdMap, ws) +} + +async function getFilesForImport (dir: string): Promise { + const filesAndDirs = await readdir(dir, { recursive: true, withFileTypes: true }) + const files = filesAndDirs.filter((file) => { + return !file.isDirectory() && !(file.name === 'index.html' && file.path === dir) + }) + return files +} + +export async function importToTeamspace ( + ctx: MeasureMetricsContext, + client: TxOperations, + storage: StorageAdapter, + dir: string, + ws: WorkspaceIdWithUrl, + teamspace: string +): Promise { + const files = await getFilesForImport(dir) + + const fileMetaMap = new Map() + const documentMetaMap = new Map() + + await collectMetadata(dir, files, fileMetaMap, documentMetaMap) + console.log(fileMetaMap) + console.log(documentMetaMap) + + const spaceId = await createTeamspace(teamspace, client) + + await importFilesToSpace(ctx, client, storage, fileMetaMap, documentMetaMap, spaceId, ws) +} + +async function collectMetadata ( + root: string, + files: Dirent[], + fileMetaMap: Map, + documentMetaMap: Map +): Promise { + for (const file of files) { + const st = await stat(file.path) + collectFileMetadata(root, file, st.size, fileMetaMap, documentMetaMap) + } +} + +function collectFileMetadata ( + root: string, + file: Dirent, + fileSize: number, + fileMetaMap: Map, + documentMetaMap: Map +): void { + const notionId = getFileId(file.path, file.name) + const extension = extractExtension(file.name) + const ancestors = getAncestorEntries(root, file.path) + const meta = fileMetaMap.get(notionId) + fileMetaMap.set(notionId, { + level: ancestors.length, + isFolder: false, + extension, + fileName: join(file.path, file.name), + hasChildren: meta?.hasChildren ?? false + }) + + ancestors.forEach((folder, i) => { + const id = getFileId('', folder) + const meta = fileMetaMap.get(id) + fileMetaMap.set(id, { + level: meta?.level ?? i, + isFolder: meta?.isFolder ?? true, + fileName: meta?.fileName ?? folder, + extension: meta?.extension, + hasChildren: true + }) + }) + + const notionParentId = + ancestors[ancestors.length - 1] !== undefined ? extractNotionId(ancestors[ancestors.length - 1]) : undefined + const notionSubRootId = + ancestors[1] !== undefined ? extractNotionId(ancestors[1]) ?? extractOriginalName(ancestors[1]) : undefined + + documentMetaMap.set(notionId, { + id: generateId(), + name: extractOriginalName(file.name), + notionId, + notionParentId, + notionSubRootId, + mimeType: getContentType(file.name), + size: fileSize + }) +} + +async function createTeamspaces ( + fileMetaMap: Map, + client: TxOperations +): Promise>> { + const spaceIdMap = new Map>() + for (const [notionId, meta] of fileMetaMap) { + if (meta.isFolder && meta.level === 1) { + console.log('TEAMSPACE: ', meta.fileName) + const teamspacName = extractOriginalName(meta.fileName) + const teamspaceId = await createTeamspace(teamspacName, client) + spaceIdMap.set(notionId, teamspaceId) + } + } + return spaceIdMap +} + +async function createTeamspace (name: string, client: TxOperations): Promise> { + const teamspaceId = generateId() + const data = { + type: document.spaceType.DefaultTeamspaceType, + description: 'Imported from Notion', + name, + private: false, + members: [], + owners: [], + autoJoin: false, + archived: false + } + await client.createDoc(document.class.Teamspace, core.space.Space, data, teamspaceId) + return teamspaceId +} + +async function importFilesToSpace ( + ctx: MeasureMetricsContext, + client: TxOperations, + storage: StorageAdapter, + fileMetaMap: Map, + documentMetaMap: Map, + spaceId: Ref, + ws: WorkspaceIdWithUrl +): Promise { + for (const [notionId, fileMeta] of fileMetaMap) { + if (!fileMeta.isFolder) { + const docMeta = documentMetaMap.get(notionId) + if (docMeta === undefined) throw new Error('Cannot find metadata for entry: ' + fileMeta.fileName) + await importFile(ctx, client, storage, fileMeta, docMeta, spaceId, documentMetaMap, ws) + } + } +} + +async function importFiles ( + ctx: MeasureMetricsContext, + client: TxOperations, + storage: StorageAdapter, + fileMetaMap: Map, + documentMetaMap: Map, + spaceIdMap: Map>, + ws: WorkspaceIdWithUrl +): Promise { + for (const [notionId, fileMeta] of fileMetaMap) { + if (!fileMeta.isFolder) { + const docMeta = documentMetaMap.get(notionId) + if (docMeta === undefined) throw new Error('Cannot find metadata for entry: ' + fileMeta.fileName) + + const spaceId = docMeta.notionSubRootId !== undefined && spaceIdMap.get(docMeta.notionSubRootId) + if (spaceId === undefined || spaceId === false) { + throw new Error('Teamspace not found for document: ' + docMeta.name) + } + + await importFile(ctx, client, storage, fileMeta, docMeta, spaceId, documentMetaMap, ws) + } + } +} + +async function importFile ( + ctx: MeasureMetricsContext, + client: TxOperations, + storage: StorageAdapter, + fileMeta: FileMetadata, + docMeta: DocumentMetadata, + spaceId: Ref, + documentMetaMap: Map, + ws: WorkspaceIdWithUrl +): Promise { + await new Promise((resolve, reject) => { + if (fileMeta.isFolder) throw new Error('Importing folder entry is not supported: ' + fileMeta.fileName) + + console.log('IMPORT STARTED:', fileMeta.fileName) + readFile(fileMeta.fileName) + .then((data) => { + const { notionParentId } = docMeta + + const parentMeta = + notionParentId !== undefined && notionParentId !== '' ? documentMetaMap.get(notionParentId) : undefined + + const processFileData = getDataProcessor(fileMeta, docMeta) + processFileData(ctx, client, storage, ws, data, docMeta, spaceId, parentMeta, documentMetaMap) + .then(() => { + console.log('IMPORT SUCCEED:', docMeta.name) + console.log('------------------------------------------------------------------') + resolve() + }) + .catch((error) => { + handleImportFailure(docMeta.name, error, reject) + }) + }) + .catch((error) => { + handleImportFailure(docMeta.name, error, reject) + }) + + function handleImportFailure (docName: string, error: any, reject: (reason?: any) => void): void { + console.warn('IMPORT FAILED:', docName) + console.log(error.stack) + console.log('------------------------------------------------------------------') + reject(error) + } + }) +} + +type DataProcessor = ( + ctx: MeasureMetricsContext, + client: TxOperations, + storage: StorageAdapter, + ws: WorkspaceIdWithUrl, + data: Buffer, + docMeta: DocumentMetadata, + space: Ref, + parentMeta?: DocumentMetadata, + documentMetaMap?: Map +) => Promise + +function getDataProcessor (fileMeta: FileMetadata, docMeta: DocumentMetadata): DataProcessor { + if (fileMeta.isFolder && fileMeta.level === 1) { + console.log('TEAMSPACE: ', docMeta.name) + return skip + } + if (fileMeta.extension === MD_EXTENSION) { + console.log('PAGE: ', docMeta.name) + return importPageDocument + } + if (fileMeta.extension === CSV_EXTENSION && fileMeta.hasChildren) { + console.log('DB FILE: ', docMeta.name) + return createDBPageWithAttachments + } + if (fileMeta.extension === CSV_EXTENSION && /[\d\w]*_all$/.test(docMeta.notionId)) { + console.log('DB FILE (ALL): ', docMeta.name) + return importDBAttachment + } + if (!fileMeta.isFolder && fileMeta.extension !== '' && docMeta.notionParentId !== undefined) { + console.log('ATTACHMENT: ', docMeta.name) + return importAttachment + } + return skip +} + +async function createDBPageWithAttachments ( + ctx: MeasureMetricsContext, + client: TxOperations, + storage: StorageAdapter, + ws: WorkspaceIdWithUrl, + data: Buffer, + docMeta: DocumentMetadata, + space: Ref, + parentMeta?: DocumentMetadata, + documentMetaMap?: Map +): Promise { + const pageId = docMeta.id as Ref + const collabId = makeCollaborativeDoc(pageId, 'content') + + const parentId = parentMeta !== undefined ? (parentMeta.id as Ref) : document.ids.NoParent + + const object: AttachedData = { + name: docMeta.name, + content: collabId, + attachments: 0, + children: 0, + embeddings: 0, + labels: 0, + comments: 0, + references: 0 + } + + await client.addCollection( + document.class.Document, + space, + parentId, + document.class.Document, + 'children', + object, + pageId + ) + + const dbPage: DocumentMetadata = { + id: pageId, + notionParentId: docMeta.notionParentId, + name: docMeta.name, + notionId: docMeta.notionId + } + + const attachment: DocumentMetadata = { + id: generateId(), + notionParentId: pageId, + name: docMeta.name, + notionId: docMeta.notionId, + mimeType: docMeta.mimeType, + size: docMeta.size + } + + await importAttachment(ctx, client, storage, ws, data, attachment, space, dbPage) +} + +async function importDBAttachment ( + ctx: MeasureMetricsContext, + client: TxOperations, + storage: StorageAdapter, + ws: WorkspaceIdWithUrl, + data: Buffer, + docMeta: DocumentMetadata, + space: Ref, + parentMeta?: DocumentMetadata, + documentMetaMap?: Map +): Promise { + const matched = docMeta.notionId.match(/([\d\w]*)_all$/) + if (matched == null || matched.length < 2) { + throw new Error('DB file not found: ' + docMeta.name) + } + + const originalNotionId = matched[1] + const dbPage = documentMetaMap?.get(originalNotionId) + if (dbPage === undefined) { + throw new Error('DB page metadata not found: ' + docMeta.name) + } + + const attachment: DocumentMetadata = { + id: docMeta.id, + notionParentId: dbPage.id, + name: docMeta.name, + notionId: docMeta.notionId, + mimeType: docMeta.mimeType, + size: docMeta.size + } + await importAttachment(ctx, client, storage, ws, data, attachment, space, dbPage) +} + +async function importAttachment ( + ctx: MeasureMetricsContext, + client: TxOperations, + storage: StorageAdapter, + ws: WorkspaceIdWithUrl, + data: Buffer, + docMeta: DocumentMetadata, + space: Ref, + parentMeta?: DocumentMetadata, + documentMetaMap?: Map +): Promise { + if (parentMeta === undefined) { + throw new Error('Cannot import attachment without parent doc: ' + docMeta.id) + } + + const size = docMeta.size ?? 0 + const type = docMeta.mimeType ?? DEFAULT_ATTACHMENT_MIME_TYPE + await storage.put(ctx, ws, docMeta.id, data, type, size) + + const attachedData: AttachedData = { + file: docMeta.id as Ref, + name: docMeta.name, + lastModified: Date.now(), + type, + size + } + + await client.addCollection( + attachment.class.Attachment, + space, + parentMeta.id as Ref, + document.class.Document, + 'attachments', + attachedData, + docMeta.id as Ref + ) +} + +async function importPageDocument ( + ctx: MeasureMetricsContext, + client: TxOperations, + storage: StorageAdapter, + ws: WorkspaceIdWithUrl, + data: Buffer, + docMeta: DocumentMetadata, + space: Ref, + parentMeta?: DocumentMetadata, + documentMetaMap?: Map +): Promise { + const md = data.toString() ?? '' + const json = parseMessageMarkdown(md ?? '', 'image://') + if (documentMetaMap !== undefined) { + preProcessMarkdown(json, documentMetaMap) + } + + const id = docMeta.id as Ref + const collabId = makeCollaborativeDoc(id, 'content') + const yDoc = jsonToYDocNoSchema(json, 'content') + await saveCollaborativeDoc(storage, ws, collabId, yDoc, ctx) + + const parentId = parentMeta?.id ?? document.ids.NoParent + + const attachedData: AttachedData = { + name: docMeta.name, + content: collabId, + attachments: 0, + children: 0, + embeddings: 0, + labels: 0, + comments: 0, + references: 0 + } + + await client.addCollection( + document.class.Document, + space, + parentId as Ref, + document.class.Document, + 'children', + attachedData, + id + ) +} + +function preProcessMarkdown (json: MarkupNode, documentMetaMap: Map): void { + traverseNode(json, (node) => { + if (node.type === MarkupNodeType.image) { + const src = node.attrs?.src + if (src !== undefined) { + const notionId = getFileId('', src as string) + const meta = documentMetaMap.get(notionId) + if (meta !== undefined) { + alterImageNode(node, meta) + } + } + } else { + traverseNodeMarks(node, (mark) => { + if (mark.type === MarkupMarkType.link) { + const href = mark.attrs.href + switch (getLinkType(href)) { + case NOTION_MD_LINK_TYPES.UNKNOWN: + case NOTION_MD_LINK_TYPES.EXTERNAL_LINK: { + console.log('skip this type of link: ', href) + return + } + case NOTION_MD_LINK_TYPES.INTERNAL_LINK: { + const notionId = getFileId('', href) + const targetMeta = documentMetaMap.get(notionId) + console.log('Target HULY page ID:', targetMeta?.id) + if (targetMeta !== undefined) { + alterInternalLinkNode(node, targetMeta) + } else { + console.warn('Linked page not found (outside of this import): ' + href) + } + return + } + case NOTION_MD_LINK_TYPES.ATTACHMENT: { + const notionId = getFileId('', href) + const attachmentMeta = documentMetaMap.get(notionId) + if (attachmentMeta !== undefined) { + console.log('Attachment found: ', attachmentMeta) + alterAttachmentNode(node, attachmentMeta, href) + } else { + console.warn('Attachment not found: ', href) + } + } + } + } + }) + } + return true + }) +} + +function getLinkType (href: string): NOTION_MD_LINK_TYPES { + console.log('original link href: ' + href) + if (isExternalLink(href)) return NOTION_MD_LINK_TYPES.EXTERNAL_LINK + + const notionId = extractNotionId(href) + if (notionId !== null && notionId !== undefined && notionId !== '') { + return NOTION_MD_LINK_TYPES.INTERNAL_LINK + } + + const shortName = extractNameWoExtension(href) + if (shortName !== undefined && shortName !== '') { + return NOTION_MD_LINK_TYPES.ATTACHMENT + } + + return NOTION_MD_LINK_TYPES.UNKNOWN +} + +function alterAttachmentNode (node: MarkupNode, targetMeta: DocumentMetadata, href: string): void { + node.type = MarkupNodeType.file + node.attrs = { + 'file-id': targetMeta.id, + 'data-file-name': targetMeta.name, + 'data-file-size': targetMeta.size ?? 0, + 'data-file-type': targetMeta.mimeType ?? DEFAULT_ATTACHMENT_MIME_TYPE, + 'data-file-href': href + } +} + +function alterInternalLinkNode (node: MarkupNode, targetMeta: DocumentMetadata): void { + node.type = MarkupNodeType.reference + node.attrs = { + id: targetMeta.id, + label: targetMeta.name, + objectclass: document.class.Document, + text: '', + content: '' + } +} + +function alterImageNode (node: MarkupNode, meta: DocumentMetadata): void { + node.type = MarkupNodeType.image + if (node.attrs !== undefined) { + node.attrs['file-id'] = meta.id + if (meta.mimeType !== undefined) { + node.attrs['data-file-type'] = meta.mimeType + } + } +} + +async function skip (...args: any): Promise { + const docMeta = args[5] + console.warn('Unsupported entry type, skipping: ', docMeta) +} + +function isExternalLink (href: any): boolean { + return URL.canParse(href) +} + +function extractNotionId (fileName: string): string | undefined { + const decoded = decodeURI(fileName).trimEnd() + const matched = decoded.match(/ ([\w\d]{32}(_all)?)(\.|$)/) + return matched !== null && matched.length >= 2 ? matched[1] : undefined +} + +function extractExtension (fileName: string): string { + const decoded = decodeURI(fileName) + return parse(decoded).ext.toLowerCase() +} + +function extractNameWoExtension (fileName: string): string { + const decoded = decodeURI(fileName) + return parse(decoded).name +} + +function extractOriginalName (fileName: string): string { + const woExtension = extractNameWoExtension(fileName) + const notionId = extractNotionId(woExtension) + const nameOnly = notionId !== undefined ? woExtension.replace(notionId, '') : woExtension + return nameOnly.trimEnd() +} + +function getFileId (filePath: string, fileName: string): string { + const notionId = extractNotionId(fileName) + if (notionId !== '' && notionId !== undefined) { + return notionId + } + const decodedPath = decodeURI(filePath) + const decodedName = decodeURI(fileName) + return join(basename(decodedPath), decodedName) +} + +function getAncestorEntries (root: string, filePath: string): string[] { + const relativePath = filePath.replace(root, '') + const ancestors = relativePath.split('/') + return ancestors +} + +function getContentType (fileName: string): string | undefined { + const mimeType = contentType(fileName) + return mimeType !== false ? mimeType : undefined +} diff --git a/packages/text/src/markup/model.ts b/packages/text/src/markup/model.ts index 344105b5b9..78fe89d876 100644 --- a/packages/text/src/markup/model.ts +++ b/packages/text/src/markup/model.ts @@ -23,6 +23,7 @@ export enum MarkupNodeType { code_block = 'codeBlock', text = 'text', image = 'image', + file = 'file', reference = 'reference', hard_break = 'hardBreak', ordered_list = 'orderedList',