mirror of
https://github.com/hcengineering/platform.git
synced 2025-04-13 19:58:09 +00:00
Support import fom Notion (#6498)
This commit is contained in:
parent
92b20ad47f
commit
c809de247d
25
.vscode/launch.json
vendored
25
.vscode/launch.json
vendored
@ -4,6 +4,30 @@
|
|||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
"version": "0.2.0",
|
"version": "0.2.0",
|
||||||
"configurations": [
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Debug notion import",
|
||||||
|
"type": "node",
|
||||||
|
"request": "launch",
|
||||||
|
// "args": ["src/__start.ts", "import-notion-to-teamspace", "/home/anna/work/notion/natalya/Export-fad9ecb4-a1a5-4623-920d-df32dd423743", "-ws", "w-user1-ws1-66d8018b-ce1e0c3164-006bb0", "-ts", "notion"],
|
||||||
|
"args": ["src/__start.ts", "import-notion", "/home/anna/work/notion/natalya/Export-fad9ecb4-a1a5-4623-920d-df32dd423743", "-ws", "w-user1-ws1-66d8018b-ce1e0c3164-006bb0"],
|
||||||
|
"env": {
|
||||||
|
"SERVER_SECRET": "secret",
|
||||||
|
"MINIO_ACCESS_KEY": "minioadmin",
|
||||||
|
"MINIO_SECRET_KEY": "minioadmin",
|
||||||
|
"MINIO_ENDPOINT": "localhost",
|
||||||
|
"TRANSACTOR_URL": "ws://localhost:3333",
|
||||||
|
"MONGO_URL": "mongodb://localhost:27017",
|
||||||
|
"ACCOUNTS_URL": "http://localhost:3000",
|
||||||
|
"TELEGRAM_DATABASE": "telegram-service",
|
||||||
|
"ELASTIC_URL": "http://localhost:9200",
|
||||||
|
"REKONI_URL": "http://localhost:4004"
|
||||||
|
},
|
||||||
|
"runtimeVersion": "20",
|
||||||
|
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
|
||||||
|
"sourceMaps": true,
|
||||||
|
"outputCapture": "std",
|
||||||
|
"cwd": "${workspaceRoot}/dev/tool"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"address": "127.0.0.1",
|
"address": "127.0.0.1",
|
||||||
"localRoot": "${workspaceFolder}",
|
"localRoot": "${workspaceFolder}",
|
||||||
@ -229,6 +253,7 @@
|
|||||||
"ELASTIC_URL": "http://localhost:9200",
|
"ELASTIC_URL": "http://localhost:9200",
|
||||||
"REKONI_URL": "http://localhost:4004"
|
"REKONI_URL": "http://localhost:4004"
|
||||||
},
|
},
|
||||||
|
"runtimeVersion": "20",
|
||||||
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
|
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
|
||||||
"sourceMaps": true,
|
"sourceMaps": true,
|
||||||
"outputCapture": "std",
|
"outputCapture": "std",
|
||||||
|
@ -64,6 +64,7 @@
|
|||||||
"@hcengineering/client-resources": "^0.6.27",
|
"@hcengineering/client-resources": "^0.6.27",
|
||||||
"@hcengineering/contact": "^0.6.24",
|
"@hcengineering/contact": "^0.6.24",
|
||||||
"@hcengineering/core": "^0.6.32",
|
"@hcengineering/core": "^0.6.32",
|
||||||
|
"@hcengineering/document": "^0.6.0",
|
||||||
"@hcengineering/elastic": "^0.6.0",
|
"@hcengineering/elastic": "^0.6.0",
|
||||||
"@hcengineering/lead": "^0.6.0",
|
"@hcengineering/lead": "^0.6.0",
|
||||||
"@hcengineering/minio": "^0.6.0",
|
"@hcengineering/minio": "^0.6.0",
|
||||||
|
@ -48,7 +48,7 @@ import {
|
|||||||
} from '@hcengineering/server-backup'
|
} from '@hcengineering/server-backup'
|
||||||
import serverClientPlugin, { BlobClient, createClient, getTransactorEndpoint } from '@hcengineering/server-client'
|
import serverClientPlugin, { BlobClient, createClient, getTransactorEndpoint } from '@hcengineering/server-client'
|
||||||
import serverToken, { decodeToken, generateToken } from '@hcengineering/server-token'
|
import serverToken, { decodeToken, generateToken } from '@hcengineering/server-token'
|
||||||
import toolPlugin, { FileModelLogger } from '@hcengineering/server-tool'
|
import toolPlugin, { connect, FileModelLogger } from '@hcengineering/server-tool'
|
||||||
import path from 'path'
|
import path from 'path'
|
||||||
|
|
||||||
import { buildStorageFromConfig, storageConfigFromEnv } from '@hcengineering/server-storage'
|
import { buildStorageFromConfig, storageConfigFromEnv } from '@hcengineering/server-storage'
|
||||||
@ -63,7 +63,10 @@ import core, {
|
|||||||
MeasureMetricsContext,
|
MeasureMetricsContext,
|
||||||
metricsToString,
|
metricsToString,
|
||||||
systemAccountEmail,
|
systemAccountEmail,
|
||||||
|
TxOperations,
|
||||||
versionToString,
|
versionToString,
|
||||||
|
type WorkspaceIdWithUrl,
|
||||||
|
type Client as CoreClient,
|
||||||
type Data,
|
type Data,
|
||||||
type Doc,
|
type Doc,
|
||||||
type Ref,
|
type Ref,
|
||||||
@ -95,6 +98,7 @@ import { fixJsonMarkup, migrateMarkup } from './markup'
|
|||||||
import { fixMixinForeignAttributes, showMixinForeignAttributes } from './mixin'
|
import { fixMixinForeignAttributes, showMixinForeignAttributes } from './mixin'
|
||||||
import { fixAccountEmails, renameAccount } from './renameAccount'
|
import { fixAccountEmails, renameAccount } from './renameAccount'
|
||||||
import { moveFiles, syncFiles } from './storage'
|
import { moveFiles, syncFiles } from './storage'
|
||||||
|
import { importNotion, importToTeamspace } from './notion'
|
||||||
|
|
||||||
const colorConstants = {
|
const colorConstants = {
|
||||||
colorRed: '\u001b[31m',
|
colorRed: '\u001b[31m',
|
||||||
@ -202,6 +206,84 @@ export function devTool (
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// import-notion /home/anna/work/notion/pages/exported --workspace workspace
|
||||||
|
program
|
||||||
|
.command('import-notion <dir>')
|
||||||
|
.description('import extracted archive exported from Notion as "Markdown & CSV"')
|
||||||
|
.requiredOption('-ws, --workspace <workspace>', 'workspace where the documents should be imported to')
|
||||||
|
.action(async (dir: string, cmd) => {
|
||||||
|
if (cmd.workspace === '') return
|
||||||
|
|
||||||
|
const { mongodbUri } = prepareTools()
|
||||||
|
|
||||||
|
await withDatabase(mongodbUri, async (db) => {
|
||||||
|
const ws = await getWorkspaceById(db, cmd.workspace)
|
||||||
|
if (ws === null) {
|
||||||
|
console.log('Workspace not found: ', cmd.workspace)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const wsUrl: WorkspaceIdWithUrl = {
|
||||||
|
name: ws.workspace,
|
||||||
|
workspaceName: ws.workspaceName ?? '',
|
||||||
|
workspaceUrl: ws.workspaceUrl ?? ''
|
||||||
|
}
|
||||||
|
|
||||||
|
await withStorage(mongodbUri, async (storageAdapter) => {
|
||||||
|
const token = generateToken(systemAccountEmail, { name: ws.workspace })
|
||||||
|
const endpoint = await getTransactorEndpoint(token, 'external')
|
||||||
|
const connection = (await connect(endpoint, wsUrl, undefined, {
|
||||||
|
mode: 'backup'
|
||||||
|
})) as unknown as CoreClient
|
||||||
|
const client = new TxOperations(connection, core.account.System)
|
||||||
|
|
||||||
|
await importNotion(toolCtx, client, storageAdapter, dir, wsUrl)
|
||||||
|
|
||||||
|
await connection.close()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// import-notion-to-teamspace /home/anna/work/notion/pages/exported --workspace workspace --teamspace notion
|
||||||
|
program
|
||||||
|
.command('import-notion-to-teamspace <dir>')
|
||||||
|
.description('import extracted archive exported from Notion as "Markdown & CSV"')
|
||||||
|
.requiredOption('-ws, --workspace <workspace>', 'workspace where the documents should be imported to')
|
||||||
|
.requiredOption('-ts, --teamspace <teamspace>', 'teamspace where the documents should be imported to')
|
||||||
|
.action(async (dir: string, cmd) => {
|
||||||
|
if (cmd.workspace === '') return
|
||||||
|
if (cmd.teamspace === '') return
|
||||||
|
|
||||||
|
const { mongodbUri } = prepareTools()
|
||||||
|
|
||||||
|
await withDatabase(mongodbUri, async (db) => {
|
||||||
|
const ws = await getWorkspaceById(db, cmd.workspace)
|
||||||
|
if (ws === null) {
|
||||||
|
console.log('Workspace not found: ', cmd.workspace)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const wsUrl: WorkspaceIdWithUrl = {
|
||||||
|
name: ws.workspace,
|
||||||
|
workspaceName: ws.workspaceName ?? '',
|
||||||
|
workspaceUrl: ws.workspaceUrl ?? ''
|
||||||
|
}
|
||||||
|
|
||||||
|
await withStorage(mongodbUri, async (storageAdapter) => {
|
||||||
|
const token = generateToken(systemAccountEmail, { name: ws.workspace })
|
||||||
|
const endpoint = await getTransactorEndpoint(token, 'external')
|
||||||
|
const connection = (await connect(endpoint, wsUrl, undefined, {
|
||||||
|
mode: 'backup'
|
||||||
|
})) as unknown as CoreClient
|
||||||
|
const client = new TxOperations(connection, core.account.System)
|
||||||
|
|
||||||
|
await importToTeamspace(toolCtx, client, storageAdapter, dir, wsUrl, cmd.teamspace)
|
||||||
|
|
||||||
|
await connection.close()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
program
|
program
|
||||||
.command('reset-account <email>')
|
.command('reset-account <email>')
|
||||||
.description('create user and corresponding account in master database')
|
.description('create user and corresponding account in master database')
|
||||||
|
653
dev/tool/src/notion.ts
Normal file
653
dev/tool/src/notion.ts
Normal file
@ -0,0 +1,653 @@
|
|||||||
|
import {
|
||||||
|
generateId,
|
||||||
|
type AttachedData,
|
||||||
|
type Ref,
|
||||||
|
type WorkspaceIdWithUrl,
|
||||||
|
makeCollaborativeDoc,
|
||||||
|
type MeasureMetricsContext,
|
||||||
|
type TxOperations,
|
||||||
|
type Blob
|
||||||
|
} from '@hcengineering/core'
|
||||||
|
import { saveCollaborativeDoc } from '@hcengineering/collaboration'
|
||||||
|
import document, { type Document, type Teamspace } from '@hcengineering/document'
|
||||||
|
import { type StorageAdapter } from '@hcengineering/server-core'
|
||||||
|
import {
|
||||||
|
MarkupMarkType,
|
||||||
|
type MarkupNode,
|
||||||
|
MarkupNodeType,
|
||||||
|
parseMessageMarkdown,
|
||||||
|
traverseNode,
|
||||||
|
traverseNodeMarks,
|
||||||
|
jsonToYDocNoSchema
|
||||||
|
} from '@hcengineering/text'
|
||||||
|
|
||||||
|
import attachment from '@hcengineering/model-attachment'
|
||||||
|
import { type Attachment } from '@hcengineering/attachment'
|
||||||
|
import { contentType } from 'mime-types'
|
||||||
|
import core from '@hcengineering/model-core'
|
||||||
|
import { readdir, stat, readFile } from 'fs/promises'
|
||||||
|
import { type Dirent } from 'fs'
|
||||||
|
import { basename, join, parse } from 'path'
|
||||||
|
|
||||||
|
interface DocumentMetadata {
|
||||||
|
id: string
|
||||||
|
name: string
|
||||||
|
notionId: string
|
||||||
|
notionSubRootId?: string
|
||||||
|
notionParentId?: string
|
||||||
|
mimeType?: string
|
||||||
|
size?: number
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FileMetadata {
|
||||||
|
isFolder: boolean
|
||||||
|
level: number
|
||||||
|
hasChildren: boolean
|
||||||
|
fileName: string
|
||||||
|
extension?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const MD_EXTENSION = '.md'
|
||||||
|
const CSV_EXTENSION = '.csv'
|
||||||
|
const DEFAULT_ATTACHMENT_MIME_TYPE = 'application/octet-stream'
|
||||||
|
|
||||||
|
enum NOTION_MD_LINK_TYPES {
|
||||||
|
INTERNAL_LINK,
|
||||||
|
EXTERNAL_LINK,
|
||||||
|
ATTACHMENT,
|
||||||
|
UNKNOWN
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function importNotion (
|
||||||
|
ctx: MeasureMetricsContext,
|
||||||
|
client: TxOperations,
|
||||||
|
storage: StorageAdapter,
|
||||||
|
dir: string,
|
||||||
|
ws: WorkspaceIdWithUrl
|
||||||
|
): Promise<void> {
|
||||||
|
const files = await getFilesForImport(dir)
|
||||||
|
|
||||||
|
const fileMetaMap = new Map<string, FileMetadata>()
|
||||||
|
const documentMetaMap = new Map<string, DocumentMetadata>()
|
||||||
|
|
||||||
|
await collectMetadata(dir, files, fileMetaMap, documentMetaMap)
|
||||||
|
console.log(fileMetaMap)
|
||||||
|
console.log(documentMetaMap)
|
||||||
|
|
||||||
|
const spaceIdMap = await createTeamspaces(fileMetaMap, client)
|
||||||
|
if (spaceIdMap.size === 0) {
|
||||||
|
console.error('No teamspaces found in directory: ', dir)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
await importFiles(ctx, client, storage, fileMetaMap, documentMetaMap, spaceIdMap, ws)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getFilesForImport (dir: string): Promise<Dirent[]> {
|
||||||
|
const filesAndDirs = await readdir(dir, { recursive: true, withFileTypes: true })
|
||||||
|
const files = filesAndDirs.filter((file) => {
|
||||||
|
return !file.isDirectory() && !(file.name === 'index.html' && file.path === dir)
|
||||||
|
})
|
||||||
|
return files
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function importToTeamspace (
|
||||||
|
ctx: MeasureMetricsContext,
|
||||||
|
client: TxOperations,
|
||||||
|
storage: StorageAdapter,
|
||||||
|
dir: string,
|
||||||
|
ws: WorkspaceIdWithUrl,
|
||||||
|
teamspace: string
|
||||||
|
): Promise<void> {
|
||||||
|
const files = await getFilesForImport(dir)
|
||||||
|
|
||||||
|
const fileMetaMap = new Map<string, FileMetadata>()
|
||||||
|
const documentMetaMap = new Map<string, DocumentMetadata>()
|
||||||
|
|
||||||
|
await collectMetadata(dir, files, fileMetaMap, documentMetaMap)
|
||||||
|
console.log(fileMetaMap)
|
||||||
|
console.log(documentMetaMap)
|
||||||
|
|
||||||
|
const spaceId = await createTeamspace(teamspace, client)
|
||||||
|
|
||||||
|
await importFilesToSpace(ctx, client, storage, fileMetaMap, documentMetaMap, spaceId, ws)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function collectMetadata (
|
||||||
|
root: string,
|
||||||
|
files: Dirent[],
|
||||||
|
fileMetaMap: Map<string, FileMetadata>,
|
||||||
|
documentMetaMap: Map<string, DocumentMetadata>
|
||||||
|
): Promise<void> {
|
||||||
|
for (const file of files) {
|
||||||
|
const st = await stat(file.path)
|
||||||
|
collectFileMetadata(root, file, st.size, fileMetaMap, documentMetaMap)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function collectFileMetadata (
|
||||||
|
root: string,
|
||||||
|
file: Dirent,
|
||||||
|
fileSize: number,
|
||||||
|
fileMetaMap: Map<string, FileMetadata>,
|
||||||
|
documentMetaMap: Map<string, DocumentMetadata>
|
||||||
|
): void {
|
||||||
|
const notionId = getFileId(file.path, file.name)
|
||||||
|
const extension = extractExtension(file.name)
|
||||||
|
const ancestors = getAncestorEntries(root, file.path)
|
||||||
|
const meta = fileMetaMap.get(notionId)
|
||||||
|
fileMetaMap.set(notionId, {
|
||||||
|
level: ancestors.length,
|
||||||
|
isFolder: false,
|
||||||
|
extension,
|
||||||
|
fileName: join(file.path, file.name),
|
||||||
|
hasChildren: meta?.hasChildren ?? false
|
||||||
|
})
|
||||||
|
|
||||||
|
ancestors.forEach((folder, i) => {
|
||||||
|
const id = getFileId('', folder)
|
||||||
|
const meta = fileMetaMap.get(id)
|
||||||
|
fileMetaMap.set(id, {
|
||||||
|
level: meta?.level ?? i,
|
||||||
|
isFolder: meta?.isFolder ?? true,
|
||||||
|
fileName: meta?.fileName ?? folder,
|
||||||
|
extension: meta?.extension,
|
||||||
|
hasChildren: true
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
const notionParentId =
|
||||||
|
ancestors[ancestors.length - 1] !== undefined ? extractNotionId(ancestors[ancestors.length - 1]) : undefined
|
||||||
|
const notionSubRootId =
|
||||||
|
ancestors[1] !== undefined ? extractNotionId(ancestors[1]) ?? extractOriginalName(ancestors[1]) : undefined
|
||||||
|
|
||||||
|
documentMetaMap.set(notionId, {
|
||||||
|
id: generateId(),
|
||||||
|
name: extractOriginalName(file.name),
|
||||||
|
notionId,
|
||||||
|
notionParentId,
|
||||||
|
notionSubRootId,
|
||||||
|
mimeType: getContentType(file.name),
|
||||||
|
size: fileSize
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createTeamspaces (
|
||||||
|
fileMetaMap: Map<string, FileMetadata>,
|
||||||
|
client: TxOperations
|
||||||
|
): Promise<Map<string, Ref<Teamspace>>> {
|
||||||
|
const spaceIdMap = new Map<string, Ref<Teamspace>>()
|
||||||
|
for (const [notionId, meta] of fileMetaMap) {
|
||||||
|
if (meta.isFolder && meta.level === 1) {
|
||||||
|
console.log('TEAMSPACE: ', meta.fileName)
|
||||||
|
const teamspacName = extractOriginalName(meta.fileName)
|
||||||
|
const teamspaceId = await createTeamspace(teamspacName, client)
|
||||||
|
spaceIdMap.set(notionId, teamspaceId)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return spaceIdMap
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createTeamspace (name: string, client: TxOperations): Promise<Ref<Teamspace>> {
|
||||||
|
const teamspaceId = generateId<Teamspace>()
|
||||||
|
const data = {
|
||||||
|
type: document.spaceType.DefaultTeamspaceType,
|
||||||
|
description: 'Imported from Notion',
|
||||||
|
name,
|
||||||
|
private: false,
|
||||||
|
members: [],
|
||||||
|
owners: [],
|
||||||
|
autoJoin: false,
|
||||||
|
archived: false
|
||||||
|
}
|
||||||
|
await client.createDoc(document.class.Teamspace, core.space.Space, data, teamspaceId)
|
||||||
|
return teamspaceId
|
||||||
|
}
|
||||||
|
|
||||||
|
async function importFilesToSpace (
|
||||||
|
ctx: MeasureMetricsContext,
|
||||||
|
client: TxOperations,
|
||||||
|
storage: StorageAdapter,
|
||||||
|
fileMetaMap: Map<string, FileMetadata>,
|
||||||
|
documentMetaMap: Map<string, DocumentMetadata>,
|
||||||
|
spaceId: Ref<Teamspace>,
|
||||||
|
ws: WorkspaceIdWithUrl
|
||||||
|
): Promise<void> {
|
||||||
|
for (const [notionId, fileMeta] of fileMetaMap) {
|
||||||
|
if (!fileMeta.isFolder) {
|
||||||
|
const docMeta = documentMetaMap.get(notionId)
|
||||||
|
if (docMeta === undefined) throw new Error('Cannot find metadata for entry: ' + fileMeta.fileName)
|
||||||
|
await importFile(ctx, client, storage, fileMeta, docMeta, spaceId, documentMetaMap, ws)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function importFiles (
|
||||||
|
ctx: MeasureMetricsContext,
|
||||||
|
client: TxOperations,
|
||||||
|
storage: StorageAdapter,
|
||||||
|
fileMetaMap: Map<string, FileMetadata>,
|
||||||
|
documentMetaMap: Map<string, DocumentMetadata>,
|
||||||
|
spaceIdMap: Map<string, Ref<Teamspace>>,
|
||||||
|
ws: WorkspaceIdWithUrl
|
||||||
|
): Promise<void> {
|
||||||
|
for (const [notionId, fileMeta] of fileMetaMap) {
|
||||||
|
if (!fileMeta.isFolder) {
|
||||||
|
const docMeta = documentMetaMap.get(notionId)
|
||||||
|
if (docMeta === undefined) throw new Error('Cannot find metadata for entry: ' + fileMeta.fileName)
|
||||||
|
|
||||||
|
const spaceId = docMeta.notionSubRootId !== undefined && spaceIdMap.get(docMeta.notionSubRootId)
|
||||||
|
if (spaceId === undefined || spaceId === false) {
|
||||||
|
throw new Error('Teamspace not found for document: ' + docMeta.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
await importFile(ctx, client, storage, fileMeta, docMeta, spaceId, documentMetaMap, ws)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function importFile (
|
||||||
|
ctx: MeasureMetricsContext,
|
||||||
|
client: TxOperations,
|
||||||
|
storage: StorageAdapter,
|
||||||
|
fileMeta: FileMetadata,
|
||||||
|
docMeta: DocumentMetadata,
|
||||||
|
spaceId: Ref<Teamspace>,
|
||||||
|
documentMetaMap: Map<string, DocumentMetadata>,
|
||||||
|
ws: WorkspaceIdWithUrl
|
||||||
|
): Promise<void> {
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
if (fileMeta.isFolder) throw new Error('Importing folder entry is not supported: ' + fileMeta.fileName)
|
||||||
|
|
||||||
|
console.log('IMPORT STARTED:', fileMeta.fileName)
|
||||||
|
readFile(fileMeta.fileName)
|
||||||
|
.then((data) => {
|
||||||
|
const { notionParentId } = docMeta
|
||||||
|
|
||||||
|
const parentMeta =
|
||||||
|
notionParentId !== undefined && notionParentId !== '' ? documentMetaMap.get(notionParentId) : undefined
|
||||||
|
|
||||||
|
const processFileData = getDataProcessor(fileMeta, docMeta)
|
||||||
|
processFileData(ctx, client, storage, ws, data, docMeta, spaceId, parentMeta, documentMetaMap)
|
||||||
|
.then(() => {
|
||||||
|
console.log('IMPORT SUCCEED:', docMeta.name)
|
||||||
|
console.log('------------------------------------------------------------------')
|
||||||
|
resolve()
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
handleImportFailure(docMeta.name, error, reject)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
handleImportFailure(docMeta.name, error, reject)
|
||||||
|
})
|
||||||
|
|
||||||
|
function handleImportFailure (docName: string, error: any, reject: (reason?: any) => void): void {
|
||||||
|
console.warn('IMPORT FAILED:', docName)
|
||||||
|
console.log(error.stack)
|
||||||
|
console.log('------------------------------------------------------------------')
|
||||||
|
reject(error)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type DataProcessor = (
|
||||||
|
ctx: MeasureMetricsContext,
|
||||||
|
client: TxOperations,
|
||||||
|
storage: StorageAdapter,
|
||||||
|
ws: WorkspaceIdWithUrl,
|
||||||
|
data: Buffer,
|
||||||
|
docMeta: DocumentMetadata,
|
||||||
|
space: Ref<Teamspace>,
|
||||||
|
parentMeta?: DocumentMetadata,
|
||||||
|
documentMetaMap?: Map<string, DocumentMetadata>
|
||||||
|
) => Promise<void>
|
||||||
|
|
||||||
|
function getDataProcessor (fileMeta: FileMetadata, docMeta: DocumentMetadata): DataProcessor {
|
||||||
|
if (fileMeta.isFolder && fileMeta.level === 1) {
|
||||||
|
console.log('TEAMSPACE: ', docMeta.name)
|
||||||
|
return skip
|
||||||
|
}
|
||||||
|
if (fileMeta.extension === MD_EXTENSION) {
|
||||||
|
console.log('PAGE: ', docMeta.name)
|
||||||
|
return importPageDocument
|
||||||
|
}
|
||||||
|
if (fileMeta.extension === CSV_EXTENSION && fileMeta.hasChildren) {
|
||||||
|
console.log('DB FILE: ', docMeta.name)
|
||||||
|
return createDBPageWithAttachments
|
||||||
|
}
|
||||||
|
if (fileMeta.extension === CSV_EXTENSION && /[\d\w]*_all$/.test(docMeta.notionId)) {
|
||||||
|
console.log('DB FILE (ALL): ', docMeta.name)
|
||||||
|
return importDBAttachment
|
||||||
|
}
|
||||||
|
if (!fileMeta.isFolder && fileMeta.extension !== '' && docMeta.notionParentId !== undefined) {
|
||||||
|
console.log('ATTACHMENT: ', docMeta.name)
|
||||||
|
return importAttachment
|
||||||
|
}
|
||||||
|
return skip
|
||||||
|
}
|
||||||
|
|
||||||
|
async function createDBPageWithAttachments (
|
||||||
|
ctx: MeasureMetricsContext,
|
||||||
|
client: TxOperations,
|
||||||
|
storage: StorageAdapter,
|
||||||
|
ws: WorkspaceIdWithUrl,
|
||||||
|
data: Buffer,
|
||||||
|
docMeta: DocumentMetadata,
|
||||||
|
space: Ref<Teamspace>,
|
||||||
|
parentMeta?: DocumentMetadata,
|
||||||
|
documentMetaMap?: Map<string, DocumentMetadata>
|
||||||
|
): Promise<void> {
|
||||||
|
const pageId = docMeta.id as Ref<Document>
|
||||||
|
const collabId = makeCollaborativeDoc(pageId, 'content')
|
||||||
|
|
||||||
|
const parentId = parentMeta !== undefined ? (parentMeta.id as Ref<Document>) : document.ids.NoParent
|
||||||
|
|
||||||
|
const object: AttachedData<Document> = {
|
||||||
|
name: docMeta.name,
|
||||||
|
content: collabId,
|
||||||
|
attachments: 0,
|
||||||
|
children: 0,
|
||||||
|
embeddings: 0,
|
||||||
|
labels: 0,
|
||||||
|
comments: 0,
|
||||||
|
references: 0
|
||||||
|
}
|
||||||
|
|
||||||
|
await client.addCollection(
|
||||||
|
document.class.Document,
|
||||||
|
space,
|
||||||
|
parentId,
|
||||||
|
document.class.Document,
|
||||||
|
'children',
|
||||||
|
object,
|
||||||
|
pageId
|
||||||
|
)
|
||||||
|
|
||||||
|
const dbPage: DocumentMetadata = {
|
||||||
|
id: pageId,
|
||||||
|
notionParentId: docMeta.notionParentId,
|
||||||
|
name: docMeta.name,
|
||||||
|
notionId: docMeta.notionId
|
||||||
|
}
|
||||||
|
|
||||||
|
const attachment: DocumentMetadata = {
|
||||||
|
id: generateId(),
|
||||||
|
notionParentId: pageId,
|
||||||
|
name: docMeta.name,
|
||||||
|
notionId: docMeta.notionId,
|
||||||
|
mimeType: docMeta.mimeType,
|
||||||
|
size: docMeta.size
|
||||||
|
}
|
||||||
|
|
||||||
|
await importAttachment(ctx, client, storage, ws, data, attachment, space, dbPage)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function importDBAttachment (
|
||||||
|
ctx: MeasureMetricsContext,
|
||||||
|
client: TxOperations,
|
||||||
|
storage: StorageAdapter,
|
||||||
|
ws: WorkspaceIdWithUrl,
|
||||||
|
data: Buffer,
|
||||||
|
docMeta: DocumentMetadata,
|
||||||
|
space: Ref<Teamspace>,
|
||||||
|
parentMeta?: DocumentMetadata,
|
||||||
|
documentMetaMap?: Map<string, DocumentMetadata>
|
||||||
|
): Promise<void> {
|
||||||
|
const matched = docMeta.notionId.match(/([\d\w]*)_all$/)
|
||||||
|
if (matched == null || matched.length < 2) {
|
||||||
|
throw new Error('DB file not found: ' + docMeta.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
const originalNotionId = matched[1]
|
||||||
|
const dbPage = documentMetaMap?.get(originalNotionId)
|
||||||
|
if (dbPage === undefined) {
|
||||||
|
throw new Error('DB page metadata not found: ' + docMeta.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
const attachment: DocumentMetadata = {
|
||||||
|
id: docMeta.id,
|
||||||
|
notionParentId: dbPage.id,
|
||||||
|
name: docMeta.name,
|
||||||
|
notionId: docMeta.notionId,
|
||||||
|
mimeType: docMeta.mimeType,
|
||||||
|
size: docMeta.size
|
||||||
|
}
|
||||||
|
await importAttachment(ctx, client, storage, ws, data, attachment, space, dbPage)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function importAttachment (
|
||||||
|
ctx: MeasureMetricsContext,
|
||||||
|
client: TxOperations,
|
||||||
|
storage: StorageAdapter,
|
||||||
|
ws: WorkspaceIdWithUrl,
|
||||||
|
data: Buffer,
|
||||||
|
docMeta: DocumentMetadata,
|
||||||
|
space: Ref<Teamspace>,
|
||||||
|
parentMeta?: DocumentMetadata,
|
||||||
|
documentMetaMap?: Map<string, DocumentMetadata>
|
||||||
|
): Promise<void> {
|
||||||
|
if (parentMeta === undefined) {
|
||||||
|
throw new Error('Cannot import attachment without parent doc: ' + docMeta.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
const size = docMeta.size ?? 0
|
||||||
|
const type = docMeta.mimeType ?? DEFAULT_ATTACHMENT_MIME_TYPE
|
||||||
|
await storage.put(ctx, ws, docMeta.id, data, type, size)
|
||||||
|
|
||||||
|
const attachedData: AttachedData<Attachment> = {
|
||||||
|
file: docMeta.id as Ref<Blob>,
|
||||||
|
name: docMeta.name,
|
||||||
|
lastModified: Date.now(),
|
||||||
|
type,
|
||||||
|
size
|
||||||
|
}
|
||||||
|
|
||||||
|
await client.addCollection(
|
||||||
|
attachment.class.Attachment,
|
||||||
|
space,
|
||||||
|
parentMeta.id as Ref<Document>,
|
||||||
|
document.class.Document,
|
||||||
|
'attachments',
|
||||||
|
attachedData,
|
||||||
|
docMeta.id as Ref<Attachment>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async function importPageDocument (
|
||||||
|
ctx: MeasureMetricsContext,
|
||||||
|
client: TxOperations,
|
||||||
|
storage: StorageAdapter,
|
||||||
|
ws: WorkspaceIdWithUrl,
|
||||||
|
data: Buffer,
|
||||||
|
docMeta: DocumentMetadata,
|
||||||
|
space: Ref<Teamspace>,
|
||||||
|
parentMeta?: DocumentMetadata,
|
||||||
|
documentMetaMap?: Map<string, DocumentMetadata>
|
||||||
|
): Promise<void> {
|
||||||
|
const md = data.toString() ?? ''
|
||||||
|
const json = parseMessageMarkdown(md ?? '', 'image://')
|
||||||
|
if (documentMetaMap !== undefined) {
|
||||||
|
preProcessMarkdown(json, documentMetaMap)
|
||||||
|
}
|
||||||
|
|
||||||
|
const id = docMeta.id as Ref<Document>
|
||||||
|
const collabId = makeCollaborativeDoc(id, 'content')
|
||||||
|
const yDoc = jsonToYDocNoSchema(json, 'content')
|
||||||
|
await saveCollaborativeDoc(storage, ws, collabId, yDoc, ctx)
|
||||||
|
|
||||||
|
const parentId = parentMeta?.id ?? document.ids.NoParent
|
||||||
|
|
||||||
|
const attachedData: AttachedData<Document> = {
|
||||||
|
name: docMeta.name,
|
||||||
|
content: collabId,
|
||||||
|
attachments: 0,
|
||||||
|
children: 0,
|
||||||
|
embeddings: 0,
|
||||||
|
labels: 0,
|
||||||
|
comments: 0,
|
||||||
|
references: 0
|
||||||
|
}
|
||||||
|
|
||||||
|
await client.addCollection(
|
||||||
|
document.class.Document,
|
||||||
|
space,
|
||||||
|
parentId as Ref<Document>,
|
||||||
|
document.class.Document,
|
||||||
|
'children',
|
||||||
|
attachedData,
|
||||||
|
id
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
function preProcessMarkdown (json: MarkupNode, documentMetaMap: Map<string, DocumentMetadata>): void {
|
||||||
|
traverseNode(json, (node) => {
|
||||||
|
if (node.type === MarkupNodeType.image) {
|
||||||
|
const src = node.attrs?.src
|
||||||
|
if (src !== undefined) {
|
||||||
|
const notionId = getFileId('', src as string)
|
||||||
|
const meta = documentMetaMap.get(notionId)
|
||||||
|
if (meta !== undefined) {
|
||||||
|
alterImageNode(node, meta)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
traverseNodeMarks(node, (mark) => {
|
||||||
|
if (mark.type === MarkupMarkType.link) {
|
||||||
|
const href = mark.attrs.href
|
||||||
|
switch (getLinkType(href)) {
|
||||||
|
case NOTION_MD_LINK_TYPES.UNKNOWN:
|
||||||
|
case NOTION_MD_LINK_TYPES.EXTERNAL_LINK: {
|
||||||
|
console.log('skip this type of link: ', href)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case NOTION_MD_LINK_TYPES.INTERNAL_LINK: {
|
||||||
|
const notionId = getFileId('', href)
|
||||||
|
const targetMeta = documentMetaMap.get(notionId)
|
||||||
|
console.log('Target HULY page ID:', targetMeta?.id)
|
||||||
|
if (targetMeta !== undefined) {
|
||||||
|
alterInternalLinkNode(node, targetMeta)
|
||||||
|
} else {
|
||||||
|
console.warn('Linked page not found (outside of this import): ' + href)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case NOTION_MD_LINK_TYPES.ATTACHMENT: {
|
||||||
|
const notionId = getFileId('', href)
|
||||||
|
const attachmentMeta = documentMetaMap.get(notionId)
|
||||||
|
if (attachmentMeta !== undefined) {
|
||||||
|
console.log('Attachment found: ', attachmentMeta)
|
||||||
|
alterAttachmentNode(node, attachmentMeta, href)
|
||||||
|
} else {
|
||||||
|
console.warn('Attachment not found: ', href)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
function getLinkType (href: string): NOTION_MD_LINK_TYPES {
|
||||||
|
console.log('original link href: ' + href)
|
||||||
|
if (isExternalLink(href)) return NOTION_MD_LINK_TYPES.EXTERNAL_LINK
|
||||||
|
|
||||||
|
const notionId = extractNotionId(href)
|
||||||
|
if (notionId !== null && notionId !== undefined && notionId !== '') {
|
||||||
|
return NOTION_MD_LINK_TYPES.INTERNAL_LINK
|
||||||
|
}
|
||||||
|
|
||||||
|
const shortName = extractNameWoExtension(href)
|
||||||
|
if (shortName !== undefined && shortName !== '') {
|
||||||
|
return NOTION_MD_LINK_TYPES.ATTACHMENT
|
||||||
|
}
|
||||||
|
|
||||||
|
return NOTION_MD_LINK_TYPES.UNKNOWN
|
||||||
|
}
|
||||||
|
|
||||||
|
function alterAttachmentNode (node: MarkupNode, targetMeta: DocumentMetadata, href: string): void {
|
||||||
|
node.type = MarkupNodeType.file
|
||||||
|
node.attrs = {
|
||||||
|
'file-id': targetMeta.id,
|
||||||
|
'data-file-name': targetMeta.name,
|
||||||
|
'data-file-size': targetMeta.size ?? 0,
|
||||||
|
'data-file-type': targetMeta.mimeType ?? DEFAULT_ATTACHMENT_MIME_TYPE,
|
||||||
|
'data-file-href': href
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function alterInternalLinkNode (node: MarkupNode, targetMeta: DocumentMetadata): void {
|
||||||
|
node.type = MarkupNodeType.reference
|
||||||
|
node.attrs = {
|
||||||
|
id: targetMeta.id,
|
||||||
|
label: targetMeta.name,
|
||||||
|
objectclass: document.class.Document,
|
||||||
|
text: '',
|
||||||
|
content: ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function alterImageNode (node: MarkupNode, meta: DocumentMetadata): void {
|
||||||
|
node.type = MarkupNodeType.image
|
||||||
|
if (node.attrs !== undefined) {
|
||||||
|
node.attrs['file-id'] = meta.id
|
||||||
|
if (meta.mimeType !== undefined) {
|
||||||
|
node.attrs['data-file-type'] = meta.mimeType
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function skip (...args: any): Promise<void> {
|
||||||
|
const docMeta = args[5]
|
||||||
|
console.warn('Unsupported entry type, skipping: ', docMeta)
|
||||||
|
}
|
||||||
|
|
||||||
|
function isExternalLink (href: any): boolean {
|
||||||
|
return URL.canParse(href)
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractNotionId (fileName: string): string | undefined {
|
||||||
|
const decoded = decodeURI(fileName).trimEnd()
|
||||||
|
const matched = decoded.match(/ ([\w\d]{32}(_all)?)(\.|$)/)
|
||||||
|
return matched !== null && matched.length >= 2 ? matched[1] : undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractExtension (fileName: string): string {
|
||||||
|
const decoded = decodeURI(fileName)
|
||||||
|
return parse(decoded).ext.toLowerCase()
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractNameWoExtension (fileName: string): string {
|
||||||
|
const decoded = decodeURI(fileName)
|
||||||
|
return parse(decoded).name
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractOriginalName (fileName: string): string {
|
||||||
|
const woExtension = extractNameWoExtension(fileName)
|
||||||
|
const notionId = extractNotionId(woExtension)
|
||||||
|
const nameOnly = notionId !== undefined ? woExtension.replace(notionId, '') : woExtension
|
||||||
|
return nameOnly.trimEnd()
|
||||||
|
}
|
||||||
|
|
||||||
|
function getFileId (filePath: string, fileName: string): string {
|
||||||
|
const notionId = extractNotionId(fileName)
|
||||||
|
if (notionId !== '' && notionId !== undefined) {
|
||||||
|
return notionId
|
||||||
|
}
|
||||||
|
const decodedPath = decodeURI(filePath)
|
||||||
|
const decodedName = decodeURI(fileName)
|
||||||
|
return join(basename(decodedPath), decodedName)
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAncestorEntries (root: string, filePath: string): string[] {
|
||||||
|
const relativePath = filePath.replace(root, '')
|
||||||
|
const ancestors = relativePath.split('/')
|
||||||
|
return ancestors
|
||||||
|
}
|
||||||
|
|
||||||
|
function getContentType (fileName: string): string | undefined {
|
||||||
|
const mimeType = contentType(fileName)
|
||||||
|
return mimeType !== false ? mimeType : undefined
|
||||||
|
}
|
@ -23,6 +23,7 @@ export enum MarkupNodeType {
|
|||||||
code_block = 'codeBlock',
|
code_block = 'codeBlock',
|
||||||
text = 'text',
|
text = 'text',
|
||||||
image = 'image',
|
image = 'image',
|
||||||
|
file = 'file',
|
||||||
reference = 'reference',
|
reference = 'reference',
|
||||||
hard_break = 'hardBreak',
|
hard_break = 'hardBreak',
|
||||||
ordered_list = 'orderedList',
|
ordered_list = 'orderedList',
|
||||||
|
Loading…
Reference in New Issue
Block a user