Support import fom Notion (#6498)

This commit is contained in:
akhismat 2024-09-10 00:32:51 +07:00 committed by GitHub
parent 92b20ad47f
commit c809de247d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 763 additions and 1 deletions

25
.vscode/launch.json vendored
View File

@ -4,6 +4,30 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Debug notion import",
"type": "node",
"request": "launch",
// "args": ["src/__start.ts", "import-notion-to-teamspace", "/home/anna/work/notion/natalya/Export-fad9ecb4-a1a5-4623-920d-df32dd423743", "-ws", "w-user1-ws1-66d8018b-ce1e0c3164-006bb0", "-ts", "notion"],
"args": ["src/__start.ts", "import-notion", "/home/anna/work/notion/natalya/Export-fad9ecb4-a1a5-4623-920d-df32dd423743", "-ws", "w-user1-ws1-66d8018b-ce1e0c3164-006bb0"],
"env": {
"SERVER_SECRET": "secret",
"MINIO_ACCESS_KEY": "minioadmin",
"MINIO_SECRET_KEY": "minioadmin",
"MINIO_ENDPOINT": "localhost",
"TRANSACTOR_URL": "ws://localhost:3333",
"MONGO_URL": "mongodb://localhost:27017",
"ACCOUNTS_URL": "http://localhost:3000",
"TELEGRAM_DATABASE": "telegram-service",
"ELASTIC_URL": "http://localhost:9200",
"REKONI_URL": "http://localhost:4004"
},
"runtimeVersion": "20",
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"sourceMaps": true,
"outputCapture": "std",
"cwd": "${workspaceRoot}/dev/tool"
},
{
"address": "127.0.0.1",
"localRoot": "${workspaceFolder}",
@ -229,6 +253,7 @@
"ELASTIC_URL": "http://localhost:9200",
"REKONI_URL": "http://localhost:4004"
},
"runtimeVersion": "20",
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"sourceMaps": true,
"outputCapture": "std",

View File

@ -64,6 +64,7 @@
"@hcengineering/client-resources": "^0.6.27",
"@hcengineering/contact": "^0.6.24",
"@hcengineering/core": "^0.6.32",
"@hcengineering/document": "^0.6.0",
"@hcengineering/elastic": "^0.6.0",
"@hcengineering/lead": "^0.6.0",
"@hcengineering/minio": "^0.6.0",

View File

@ -48,7 +48,7 @@ import {
} from '@hcengineering/server-backup'
import serverClientPlugin, { BlobClient, createClient, getTransactorEndpoint } from '@hcengineering/server-client'
import serverToken, { decodeToken, generateToken } from '@hcengineering/server-token'
import toolPlugin, { FileModelLogger } from '@hcengineering/server-tool'
import toolPlugin, { connect, FileModelLogger } from '@hcengineering/server-tool'
import path from 'path'
import { buildStorageFromConfig, storageConfigFromEnv } from '@hcengineering/server-storage'
@ -63,7 +63,10 @@ import core, {
MeasureMetricsContext,
metricsToString,
systemAccountEmail,
TxOperations,
versionToString,
type WorkspaceIdWithUrl,
type Client as CoreClient,
type Data,
type Doc,
type Ref,
@ -95,6 +98,7 @@ import { fixJsonMarkup, migrateMarkup } from './markup'
import { fixMixinForeignAttributes, showMixinForeignAttributes } from './mixin'
import { fixAccountEmails, renameAccount } from './renameAccount'
import { moveFiles, syncFiles } from './storage'
import { importNotion, importToTeamspace } from './notion'
const colorConstants = {
colorRed: '\u001b[31m',
@ -202,6 +206,84 @@ export function devTool (
})
})
// import-notion /home/anna/work/notion/pages/exported --workspace workspace
program
.command('import-notion <dir>')
.description('import extracted archive exported from Notion as "Markdown & CSV"')
.requiredOption('-ws, --workspace <workspace>', 'workspace where the documents should be imported to')
.action(async (dir: string, cmd) => {
if (cmd.workspace === '') return
const { mongodbUri } = prepareTools()
await withDatabase(mongodbUri, async (db) => {
const ws = await getWorkspaceById(db, cmd.workspace)
if (ws === null) {
console.log('Workspace not found: ', cmd.workspace)
return
}
const wsUrl: WorkspaceIdWithUrl = {
name: ws.workspace,
workspaceName: ws.workspaceName ?? '',
workspaceUrl: ws.workspaceUrl ?? ''
}
await withStorage(mongodbUri, async (storageAdapter) => {
const token = generateToken(systemAccountEmail, { name: ws.workspace })
const endpoint = await getTransactorEndpoint(token, 'external')
const connection = (await connect(endpoint, wsUrl, undefined, {
mode: 'backup'
})) as unknown as CoreClient
const client = new TxOperations(connection, core.account.System)
await importNotion(toolCtx, client, storageAdapter, dir, wsUrl)
await connection.close()
})
})
})
// import-notion-to-teamspace /home/anna/work/notion/pages/exported --workspace workspace --teamspace notion
program
.command('import-notion-to-teamspace <dir>')
.description('import extracted archive exported from Notion as "Markdown & CSV"')
.requiredOption('-ws, --workspace <workspace>', 'workspace where the documents should be imported to')
.requiredOption('-ts, --teamspace <teamspace>', 'teamspace where the documents should be imported to')
.action(async (dir: string, cmd) => {
if (cmd.workspace === '') return
if (cmd.teamspace === '') return
const { mongodbUri } = prepareTools()
await withDatabase(mongodbUri, async (db) => {
const ws = await getWorkspaceById(db, cmd.workspace)
if (ws === null) {
console.log('Workspace not found: ', cmd.workspace)
return
}
const wsUrl: WorkspaceIdWithUrl = {
name: ws.workspace,
workspaceName: ws.workspaceName ?? '',
workspaceUrl: ws.workspaceUrl ?? ''
}
await withStorage(mongodbUri, async (storageAdapter) => {
const token = generateToken(systemAccountEmail, { name: ws.workspace })
const endpoint = await getTransactorEndpoint(token, 'external')
const connection = (await connect(endpoint, wsUrl, undefined, {
mode: 'backup'
})) as unknown as CoreClient
const client = new TxOperations(connection, core.account.System)
await importToTeamspace(toolCtx, client, storageAdapter, dir, wsUrl, cmd.teamspace)
await connection.close()
})
})
})
program
.command('reset-account <email>')
.description('create user and corresponding account in master database')

653
dev/tool/src/notion.ts Normal file
View File

@ -0,0 +1,653 @@
import {
generateId,
type AttachedData,
type Ref,
type WorkspaceIdWithUrl,
makeCollaborativeDoc,
type MeasureMetricsContext,
type TxOperations,
type Blob
} from '@hcengineering/core'
import { saveCollaborativeDoc } from '@hcengineering/collaboration'
import document, { type Document, type Teamspace } from '@hcengineering/document'
import { type StorageAdapter } from '@hcengineering/server-core'
import {
MarkupMarkType,
type MarkupNode,
MarkupNodeType,
parseMessageMarkdown,
traverseNode,
traverseNodeMarks,
jsonToYDocNoSchema
} from '@hcengineering/text'
import attachment from '@hcengineering/model-attachment'
import { type Attachment } from '@hcengineering/attachment'
import { contentType } from 'mime-types'
import core from '@hcengineering/model-core'
import { readdir, stat, readFile } from 'fs/promises'
import { type Dirent } from 'fs'
import { basename, join, parse } from 'path'
interface DocumentMetadata {
id: string
name: string
notionId: string
notionSubRootId?: string
notionParentId?: string
mimeType?: string
size?: number
}
interface FileMetadata {
isFolder: boolean
level: number
hasChildren: boolean
fileName: string
extension?: string
}
const MD_EXTENSION = '.md'
const CSV_EXTENSION = '.csv'
const DEFAULT_ATTACHMENT_MIME_TYPE = 'application/octet-stream'
enum NOTION_MD_LINK_TYPES {
INTERNAL_LINK,
EXTERNAL_LINK,
ATTACHMENT,
UNKNOWN
}
export async function importNotion (
ctx: MeasureMetricsContext,
client: TxOperations,
storage: StorageAdapter,
dir: string,
ws: WorkspaceIdWithUrl
): Promise<void> {
const files = await getFilesForImport(dir)
const fileMetaMap = new Map<string, FileMetadata>()
const documentMetaMap = new Map<string, DocumentMetadata>()
await collectMetadata(dir, files, fileMetaMap, documentMetaMap)
console.log(fileMetaMap)
console.log(documentMetaMap)
const spaceIdMap = await createTeamspaces(fileMetaMap, client)
if (spaceIdMap.size === 0) {
console.error('No teamspaces found in directory: ', dir)
return
}
await importFiles(ctx, client, storage, fileMetaMap, documentMetaMap, spaceIdMap, ws)
}
async function getFilesForImport (dir: string): Promise<Dirent[]> {
const filesAndDirs = await readdir(dir, { recursive: true, withFileTypes: true })
const files = filesAndDirs.filter((file) => {
return !file.isDirectory() && !(file.name === 'index.html' && file.path === dir)
})
return files
}
export async function importToTeamspace (
ctx: MeasureMetricsContext,
client: TxOperations,
storage: StorageAdapter,
dir: string,
ws: WorkspaceIdWithUrl,
teamspace: string
): Promise<void> {
const files = await getFilesForImport(dir)
const fileMetaMap = new Map<string, FileMetadata>()
const documentMetaMap = new Map<string, DocumentMetadata>()
await collectMetadata(dir, files, fileMetaMap, documentMetaMap)
console.log(fileMetaMap)
console.log(documentMetaMap)
const spaceId = await createTeamspace(teamspace, client)
await importFilesToSpace(ctx, client, storage, fileMetaMap, documentMetaMap, spaceId, ws)
}
async function collectMetadata (
root: string,
files: Dirent[],
fileMetaMap: Map<string, FileMetadata>,
documentMetaMap: Map<string, DocumentMetadata>
): Promise<void> {
for (const file of files) {
const st = await stat(file.path)
collectFileMetadata(root, file, st.size, fileMetaMap, documentMetaMap)
}
}
function collectFileMetadata (
root: string,
file: Dirent,
fileSize: number,
fileMetaMap: Map<string, FileMetadata>,
documentMetaMap: Map<string, DocumentMetadata>
): void {
const notionId = getFileId(file.path, file.name)
const extension = extractExtension(file.name)
const ancestors = getAncestorEntries(root, file.path)
const meta = fileMetaMap.get(notionId)
fileMetaMap.set(notionId, {
level: ancestors.length,
isFolder: false,
extension,
fileName: join(file.path, file.name),
hasChildren: meta?.hasChildren ?? false
})
ancestors.forEach((folder, i) => {
const id = getFileId('', folder)
const meta = fileMetaMap.get(id)
fileMetaMap.set(id, {
level: meta?.level ?? i,
isFolder: meta?.isFolder ?? true,
fileName: meta?.fileName ?? folder,
extension: meta?.extension,
hasChildren: true
})
})
const notionParentId =
ancestors[ancestors.length - 1] !== undefined ? extractNotionId(ancestors[ancestors.length - 1]) : undefined
const notionSubRootId =
ancestors[1] !== undefined ? extractNotionId(ancestors[1]) ?? extractOriginalName(ancestors[1]) : undefined
documentMetaMap.set(notionId, {
id: generateId(),
name: extractOriginalName(file.name),
notionId,
notionParentId,
notionSubRootId,
mimeType: getContentType(file.name),
size: fileSize
})
}
async function createTeamspaces (
fileMetaMap: Map<string, FileMetadata>,
client: TxOperations
): Promise<Map<string, Ref<Teamspace>>> {
const spaceIdMap = new Map<string, Ref<Teamspace>>()
for (const [notionId, meta] of fileMetaMap) {
if (meta.isFolder && meta.level === 1) {
console.log('TEAMSPACE: ', meta.fileName)
const teamspacName = extractOriginalName(meta.fileName)
const teamspaceId = await createTeamspace(teamspacName, client)
spaceIdMap.set(notionId, teamspaceId)
}
}
return spaceIdMap
}
async function createTeamspace (name: string, client: TxOperations): Promise<Ref<Teamspace>> {
const teamspaceId = generateId<Teamspace>()
const data = {
type: document.spaceType.DefaultTeamspaceType,
description: 'Imported from Notion',
name,
private: false,
members: [],
owners: [],
autoJoin: false,
archived: false
}
await client.createDoc(document.class.Teamspace, core.space.Space, data, teamspaceId)
return teamspaceId
}
async function importFilesToSpace (
ctx: MeasureMetricsContext,
client: TxOperations,
storage: StorageAdapter,
fileMetaMap: Map<string, FileMetadata>,
documentMetaMap: Map<string, DocumentMetadata>,
spaceId: Ref<Teamspace>,
ws: WorkspaceIdWithUrl
): Promise<void> {
for (const [notionId, fileMeta] of fileMetaMap) {
if (!fileMeta.isFolder) {
const docMeta = documentMetaMap.get(notionId)
if (docMeta === undefined) throw new Error('Cannot find metadata for entry: ' + fileMeta.fileName)
await importFile(ctx, client, storage, fileMeta, docMeta, spaceId, documentMetaMap, ws)
}
}
}
async function importFiles (
ctx: MeasureMetricsContext,
client: TxOperations,
storage: StorageAdapter,
fileMetaMap: Map<string, FileMetadata>,
documentMetaMap: Map<string, DocumentMetadata>,
spaceIdMap: Map<string, Ref<Teamspace>>,
ws: WorkspaceIdWithUrl
): Promise<void> {
for (const [notionId, fileMeta] of fileMetaMap) {
if (!fileMeta.isFolder) {
const docMeta = documentMetaMap.get(notionId)
if (docMeta === undefined) throw new Error('Cannot find metadata for entry: ' + fileMeta.fileName)
const spaceId = docMeta.notionSubRootId !== undefined && spaceIdMap.get(docMeta.notionSubRootId)
if (spaceId === undefined || spaceId === false) {
throw new Error('Teamspace not found for document: ' + docMeta.name)
}
await importFile(ctx, client, storage, fileMeta, docMeta, spaceId, documentMetaMap, ws)
}
}
}
async function importFile (
ctx: MeasureMetricsContext,
client: TxOperations,
storage: StorageAdapter,
fileMeta: FileMetadata,
docMeta: DocumentMetadata,
spaceId: Ref<Teamspace>,
documentMetaMap: Map<string, DocumentMetadata>,
ws: WorkspaceIdWithUrl
): Promise<void> {
await new Promise<void>((resolve, reject) => {
if (fileMeta.isFolder) throw new Error('Importing folder entry is not supported: ' + fileMeta.fileName)
console.log('IMPORT STARTED:', fileMeta.fileName)
readFile(fileMeta.fileName)
.then((data) => {
const { notionParentId } = docMeta
const parentMeta =
notionParentId !== undefined && notionParentId !== '' ? documentMetaMap.get(notionParentId) : undefined
const processFileData = getDataProcessor(fileMeta, docMeta)
processFileData(ctx, client, storage, ws, data, docMeta, spaceId, parentMeta, documentMetaMap)
.then(() => {
console.log('IMPORT SUCCEED:', docMeta.name)
console.log('------------------------------------------------------------------')
resolve()
})
.catch((error) => {
handleImportFailure(docMeta.name, error, reject)
})
})
.catch((error) => {
handleImportFailure(docMeta.name, error, reject)
})
function handleImportFailure (docName: string, error: any, reject: (reason?: any) => void): void {
console.warn('IMPORT FAILED:', docName)
console.log(error.stack)
console.log('------------------------------------------------------------------')
reject(error)
}
})
}
type DataProcessor = (
ctx: MeasureMetricsContext,
client: TxOperations,
storage: StorageAdapter,
ws: WorkspaceIdWithUrl,
data: Buffer,
docMeta: DocumentMetadata,
space: Ref<Teamspace>,
parentMeta?: DocumentMetadata,
documentMetaMap?: Map<string, DocumentMetadata>
) => Promise<void>
function getDataProcessor (fileMeta: FileMetadata, docMeta: DocumentMetadata): DataProcessor {
if (fileMeta.isFolder && fileMeta.level === 1) {
console.log('TEAMSPACE: ', docMeta.name)
return skip
}
if (fileMeta.extension === MD_EXTENSION) {
console.log('PAGE: ', docMeta.name)
return importPageDocument
}
if (fileMeta.extension === CSV_EXTENSION && fileMeta.hasChildren) {
console.log('DB FILE: ', docMeta.name)
return createDBPageWithAttachments
}
if (fileMeta.extension === CSV_EXTENSION && /[\d\w]*_all$/.test(docMeta.notionId)) {
console.log('DB FILE (ALL): ', docMeta.name)
return importDBAttachment
}
if (!fileMeta.isFolder && fileMeta.extension !== '' && docMeta.notionParentId !== undefined) {
console.log('ATTACHMENT: ', docMeta.name)
return importAttachment
}
return skip
}
async function createDBPageWithAttachments (
ctx: MeasureMetricsContext,
client: TxOperations,
storage: StorageAdapter,
ws: WorkspaceIdWithUrl,
data: Buffer,
docMeta: DocumentMetadata,
space: Ref<Teamspace>,
parentMeta?: DocumentMetadata,
documentMetaMap?: Map<string, DocumentMetadata>
): Promise<void> {
const pageId = docMeta.id as Ref<Document>
const collabId = makeCollaborativeDoc(pageId, 'content')
const parentId = parentMeta !== undefined ? (parentMeta.id as Ref<Document>) : document.ids.NoParent
const object: AttachedData<Document> = {
name: docMeta.name,
content: collabId,
attachments: 0,
children: 0,
embeddings: 0,
labels: 0,
comments: 0,
references: 0
}
await client.addCollection(
document.class.Document,
space,
parentId,
document.class.Document,
'children',
object,
pageId
)
const dbPage: DocumentMetadata = {
id: pageId,
notionParentId: docMeta.notionParentId,
name: docMeta.name,
notionId: docMeta.notionId
}
const attachment: DocumentMetadata = {
id: generateId(),
notionParentId: pageId,
name: docMeta.name,
notionId: docMeta.notionId,
mimeType: docMeta.mimeType,
size: docMeta.size
}
await importAttachment(ctx, client, storage, ws, data, attachment, space, dbPage)
}
async function importDBAttachment (
ctx: MeasureMetricsContext,
client: TxOperations,
storage: StorageAdapter,
ws: WorkspaceIdWithUrl,
data: Buffer,
docMeta: DocumentMetadata,
space: Ref<Teamspace>,
parentMeta?: DocumentMetadata,
documentMetaMap?: Map<string, DocumentMetadata>
): Promise<void> {
const matched = docMeta.notionId.match(/([\d\w]*)_all$/)
if (matched == null || matched.length < 2) {
throw new Error('DB file not found: ' + docMeta.name)
}
const originalNotionId = matched[1]
const dbPage = documentMetaMap?.get(originalNotionId)
if (dbPage === undefined) {
throw new Error('DB page metadata not found: ' + docMeta.name)
}
const attachment: DocumentMetadata = {
id: docMeta.id,
notionParentId: dbPage.id,
name: docMeta.name,
notionId: docMeta.notionId,
mimeType: docMeta.mimeType,
size: docMeta.size
}
await importAttachment(ctx, client, storage, ws, data, attachment, space, dbPage)
}
async function importAttachment (
ctx: MeasureMetricsContext,
client: TxOperations,
storage: StorageAdapter,
ws: WorkspaceIdWithUrl,
data: Buffer,
docMeta: DocumentMetadata,
space: Ref<Teamspace>,
parentMeta?: DocumentMetadata,
documentMetaMap?: Map<string, DocumentMetadata>
): Promise<void> {
if (parentMeta === undefined) {
throw new Error('Cannot import attachment without parent doc: ' + docMeta.id)
}
const size = docMeta.size ?? 0
const type = docMeta.mimeType ?? DEFAULT_ATTACHMENT_MIME_TYPE
await storage.put(ctx, ws, docMeta.id, data, type, size)
const attachedData: AttachedData<Attachment> = {
file: docMeta.id as Ref<Blob>,
name: docMeta.name,
lastModified: Date.now(),
type,
size
}
await client.addCollection(
attachment.class.Attachment,
space,
parentMeta.id as Ref<Document>,
document.class.Document,
'attachments',
attachedData,
docMeta.id as Ref<Attachment>
)
}
async function importPageDocument (
ctx: MeasureMetricsContext,
client: TxOperations,
storage: StorageAdapter,
ws: WorkspaceIdWithUrl,
data: Buffer,
docMeta: DocumentMetadata,
space: Ref<Teamspace>,
parentMeta?: DocumentMetadata,
documentMetaMap?: Map<string, DocumentMetadata>
): Promise<void> {
const md = data.toString() ?? ''
const json = parseMessageMarkdown(md ?? '', 'image://')
if (documentMetaMap !== undefined) {
preProcessMarkdown(json, documentMetaMap)
}
const id = docMeta.id as Ref<Document>
const collabId = makeCollaborativeDoc(id, 'content')
const yDoc = jsonToYDocNoSchema(json, 'content')
await saveCollaborativeDoc(storage, ws, collabId, yDoc, ctx)
const parentId = parentMeta?.id ?? document.ids.NoParent
const attachedData: AttachedData<Document> = {
name: docMeta.name,
content: collabId,
attachments: 0,
children: 0,
embeddings: 0,
labels: 0,
comments: 0,
references: 0
}
await client.addCollection(
document.class.Document,
space,
parentId as Ref<Document>,
document.class.Document,
'children',
attachedData,
id
)
}
function preProcessMarkdown (json: MarkupNode, documentMetaMap: Map<string, DocumentMetadata>): void {
traverseNode(json, (node) => {
if (node.type === MarkupNodeType.image) {
const src = node.attrs?.src
if (src !== undefined) {
const notionId = getFileId('', src as string)
const meta = documentMetaMap.get(notionId)
if (meta !== undefined) {
alterImageNode(node, meta)
}
}
} else {
traverseNodeMarks(node, (mark) => {
if (mark.type === MarkupMarkType.link) {
const href = mark.attrs.href
switch (getLinkType(href)) {
case NOTION_MD_LINK_TYPES.UNKNOWN:
case NOTION_MD_LINK_TYPES.EXTERNAL_LINK: {
console.log('skip this type of link: ', href)
return
}
case NOTION_MD_LINK_TYPES.INTERNAL_LINK: {
const notionId = getFileId('', href)
const targetMeta = documentMetaMap.get(notionId)
console.log('Target HULY page ID:', targetMeta?.id)
if (targetMeta !== undefined) {
alterInternalLinkNode(node, targetMeta)
} else {
console.warn('Linked page not found (outside of this import): ' + href)
}
return
}
case NOTION_MD_LINK_TYPES.ATTACHMENT: {
const notionId = getFileId('', href)
const attachmentMeta = documentMetaMap.get(notionId)
if (attachmentMeta !== undefined) {
console.log('Attachment found: ', attachmentMeta)
alterAttachmentNode(node, attachmentMeta, href)
} else {
console.warn('Attachment not found: ', href)
}
}
}
}
})
}
return true
})
}
function getLinkType (href: string): NOTION_MD_LINK_TYPES {
console.log('original link href: ' + href)
if (isExternalLink(href)) return NOTION_MD_LINK_TYPES.EXTERNAL_LINK
const notionId = extractNotionId(href)
if (notionId !== null && notionId !== undefined && notionId !== '') {
return NOTION_MD_LINK_TYPES.INTERNAL_LINK
}
const shortName = extractNameWoExtension(href)
if (shortName !== undefined && shortName !== '') {
return NOTION_MD_LINK_TYPES.ATTACHMENT
}
return NOTION_MD_LINK_TYPES.UNKNOWN
}
function alterAttachmentNode (node: MarkupNode, targetMeta: DocumentMetadata, href: string): void {
node.type = MarkupNodeType.file
node.attrs = {
'file-id': targetMeta.id,
'data-file-name': targetMeta.name,
'data-file-size': targetMeta.size ?? 0,
'data-file-type': targetMeta.mimeType ?? DEFAULT_ATTACHMENT_MIME_TYPE,
'data-file-href': href
}
}
function alterInternalLinkNode (node: MarkupNode, targetMeta: DocumentMetadata): void {
node.type = MarkupNodeType.reference
node.attrs = {
id: targetMeta.id,
label: targetMeta.name,
objectclass: document.class.Document,
text: '',
content: ''
}
}
function alterImageNode (node: MarkupNode, meta: DocumentMetadata): void {
node.type = MarkupNodeType.image
if (node.attrs !== undefined) {
node.attrs['file-id'] = meta.id
if (meta.mimeType !== undefined) {
node.attrs['data-file-type'] = meta.mimeType
}
}
}
async function skip (...args: any): Promise<void> {
const docMeta = args[5]
console.warn('Unsupported entry type, skipping: ', docMeta)
}
function isExternalLink (href: any): boolean {
return URL.canParse(href)
}
function extractNotionId (fileName: string): string | undefined {
const decoded = decodeURI(fileName).trimEnd()
const matched = decoded.match(/ ([\w\d]{32}(_all)?)(\.|$)/)
return matched !== null && matched.length >= 2 ? matched[1] : undefined
}
function extractExtension (fileName: string): string {
const decoded = decodeURI(fileName)
return parse(decoded).ext.toLowerCase()
}
function extractNameWoExtension (fileName: string): string {
const decoded = decodeURI(fileName)
return parse(decoded).name
}
function extractOriginalName (fileName: string): string {
const woExtension = extractNameWoExtension(fileName)
const notionId = extractNotionId(woExtension)
const nameOnly = notionId !== undefined ? woExtension.replace(notionId, '') : woExtension
return nameOnly.trimEnd()
}
function getFileId (filePath: string, fileName: string): string {
const notionId = extractNotionId(fileName)
if (notionId !== '' && notionId !== undefined) {
return notionId
}
const decodedPath = decodeURI(filePath)
const decodedName = decodeURI(fileName)
return join(basename(decodedPath), decodedName)
}
function getAncestorEntries (root: string, filePath: string): string[] {
const relativePath = filePath.replace(root, '')
const ancestors = relativePath.split('/')
return ancestors
}
function getContentType (fileName: string): string | undefined {
const mimeType = contentType(fileName)
return mimeType !== false ? mimeType : undefined
}

View File

@ -23,6 +23,7 @@ export enum MarkupNodeType {
code_block = 'codeBlock',
text = 'text',
image = 'image',
file = 'file',
reference = 'reference',
hard_break = 'hardBreak',
ordered_list = 'orderedList',