diff --git a/.vscode/launch.json b/.vscode/launch.json
index 17bdb15d4c..7f3d2c0ac7 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -4,6 +4,30 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
+ {
+ "name": "Debug notion import",
+ "type": "node",
+ "request": "launch",
+ // "args": ["src/__start.ts", "import-notion-to-teamspace", "/home/anna/work/notion/natalya/Export-fad9ecb4-a1a5-4623-920d-df32dd423743", "-ws", "w-user1-ws1-66d8018b-ce1e0c3164-006bb0", "-ts", "notion"],
+ "args": ["src/__start.ts", "import-notion", "/home/anna/work/notion/natalya/Export-fad9ecb4-a1a5-4623-920d-df32dd423743", "-ws", "w-user1-ws1-66d8018b-ce1e0c3164-006bb0"],
+ "env": {
+ "SERVER_SECRET": "secret",
+ "MINIO_ACCESS_KEY": "minioadmin",
+ "MINIO_SECRET_KEY": "minioadmin",
+ "MINIO_ENDPOINT": "localhost",
+ "TRANSACTOR_URL": "ws://localhost:3333",
+ "MONGO_URL": "mongodb://localhost:27017",
+ "ACCOUNTS_URL": "http://localhost:3000",
+ "TELEGRAM_DATABASE": "telegram-service",
+ "ELASTIC_URL": "http://localhost:9200",
+ "REKONI_URL": "http://localhost:4004"
+ },
+ "runtimeVersion": "20",
+ "runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
+ "sourceMaps": true,
+ "outputCapture": "std",
+ "cwd": "${workspaceRoot}/dev/tool"
+ },
{
"address": "127.0.0.1",
"localRoot": "${workspaceFolder}",
@@ -229,6 +253,7 @@
"ELASTIC_URL": "http://localhost:9200",
"REKONI_URL": "http://localhost:4004"
},
+ "runtimeVersion": "20",
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"sourceMaps": true,
"outputCapture": "std",
diff --git a/dev/tool/package.json b/dev/tool/package.json
index ca8e811c1c..700f121318 100644
--- a/dev/tool/package.json
+++ b/dev/tool/package.json
@@ -64,6 +64,7 @@
"@hcengineering/client-resources": "^0.6.27",
"@hcengineering/contact": "^0.6.24",
"@hcengineering/core": "^0.6.32",
+ "@hcengineering/document": "^0.6.0",
"@hcengineering/elastic": "^0.6.0",
"@hcengineering/lead": "^0.6.0",
"@hcengineering/minio": "^0.6.0",
diff --git a/dev/tool/src/index.ts b/dev/tool/src/index.ts
index bca374fafe..30a53c1296 100644
--- a/dev/tool/src/index.ts
+++ b/dev/tool/src/index.ts
@@ -48,7 +48,7 @@ import {
} from '@hcengineering/server-backup'
import serverClientPlugin, { BlobClient, createClient, getTransactorEndpoint } from '@hcengineering/server-client'
import serverToken, { decodeToken, generateToken } from '@hcengineering/server-token'
-import toolPlugin, { FileModelLogger } from '@hcengineering/server-tool'
+import toolPlugin, { connect, FileModelLogger } from '@hcengineering/server-tool'
import path from 'path'
import { buildStorageFromConfig, storageConfigFromEnv } from '@hcengineering/server-storage'
@@ -63,7 +63,10 @@ import core, {
MeasureMetricsContext,
metricsToString,
systemAccountEmail,
+ TxOperations,
versionToString,
+ type WorkspaceIdWithUrl,
+ type Client as CoreClient,
type Data,
type Doc,
type Ref,
@@ -95,6 +98,7 @@ import { fixJsonMarkup, migrateMarkup } from './markup'
import { fixMixinForeignAttributes, showMixinForeignAttributes } from './mixin'
import { fixAccountEmails, renameAccount } from './renameAccount'
import { moveFiles, syncFiles } from './storage'
+import { importNotion, importToTeamspace } from './notion'
const colorConstants = {
colorRed: '\u001b[31m',
@@ -202,6 +206,84 @@ export function devTool (
})
})
+ // import-notion /home/anna/work/notion/pages/exported --workspace workspace
+ program
+ .command('import-notion
')
+ .description('import extracted archive exported from Notion as "Markdown & CSV"')
+ .requiredOption('-ws, --workspace ', 'workspace where the documents should be imported to')
+ .action(async (dir: string, cmd) => {
+ if (cmd.workspace === '') return
+
+ const { mongodbUri } = prepareTools()
+
+ await withDatabase(mongodbUri, async (db) => {
+ const ws = await getWorkspaceById(db, cmd.workspace)
+ if (ws === null) {
+ console.log('Workspace not found: ', cmd.workspace)
+ return
+ }
+
+ const wsUrl: WorkspaceIdWithUrl = {
+ name: ws.workspace,
+ workspaceName: ws.workspaceName ?? '',
+ workspaceUrl: ws.workspaceUrl ?? ''
+ }
+
+ await withStorage(mongodbUri, async (storageAdapter) => {
+ const token = generateToken(systemAccountEmail, { name: ws.workspace })
+ const endpoint = await getTransactorEndpoint(token, 'external')
+ const connection = (await connect(endpoint, wsUrl, undefined, {
+ mode: 'backup'
+ })) as unknown as CoreClient
+ const client = new TxOperations(connection, core.account.System)
+
+ await importNotion(toolCtx, client, storageAdapter, dir, wsUrl)
+
+ await connection.close()
+ })
+ })
+ })
+
+ // import-notion-to-teamspace /home/anna/work/notion/pages/exported --workspace workspace --teamspace notion
+ program
+ .command('import-notion-to-teamspace ')
+ .description('import extracted archive exported from Notion as "Markdown & CSV"')
+ .requiredOption('-ws, --workspace ', 'workspace where the documents should be imported to')
+ .requiredOption('-ts, --teamspace ', 'teamspace where the documents should be imported to')
+ .action(async (dir: string, cmd) => {
+ if (cmd.workspace === '') return
+ if (cmd.teamspace === '') return
+
+ const { mongodbUri } = prepareTools()
+
+ await withDatabase(mongodbUri, async (db) => {
+ const ws = await getWorkspaceById(db, cmd.workspace)
+ if (ws === null) {
+ console.log('Workspace not found: ', cmd.workspace)
+ return
+ }
+
+ const wsUrl: WorkspaceIdWithUrl = {
+ name: ws.workspace,
+ workspaceName: ws.workspaceName ?? '',
+ workspaceUrl: ws.workspaceUrl ?? ''
+ }
+
+ await withStorage(mongodbUri, async (storageAdapter) => {
+ const token = generateToken(systemAccountEmail, { name: ws.workspace })
+ const endpoint = await getTransactorEndpoint(token, 'external')
+ const connection = (await connect(endpoint, wsUrl, undefined, {
+ mode: 'backup'
+ })) as unknown as CoreClient
+ const client = new TxOperations(connection, core.account.System)
+
+ await importToTeamspace(toolCtx, client, storageAdapter, dir, wsUrl, cmd.teamspace)
+
+ await connection.close()
+ })
+ })
+ })
+
program
.command('reset-account ')
.description('create user and corresponding account in master database')
diff --git a/dev/tool/src/notion.ts b/dev/tool/src/notion.ts
new file mode 100644
index 0000000000..c99c0f0bc7
--- /dev/null
+++ b/dev/tool/src/notion.ts
@@ -0,0 +1,653 @@
+import {
+ generateId,
+ type AttachedData,
+ type Ref,
+ type WorkspaceIdWithUrl,
+ makeCollaborativeDoc,
+ type MeasureMetricsContext,
+ type TxOperations,
+ type Blob
+} from '@hcengineering/core'
+import { saveCollaborativeDoc } from '@hcengineering/collaboration'
+import document, { type Document, type Teamspace } from '@hcengineering/document'
+import { type StorageAdapter } from '@hcengineering/server-core'
+import {
+ MarkupMarkType,
+ type MarkupNode,
+ MarkupNodeType,
+ parseMessageMarkdown,
+ traverseNode,
+ traverseNodeMarks,
+ jsonToYDocNoSchema
+} from '@hcengineering/text'
+
+import attachment from '@hcengineering/model-attachment'
+import { type Attachment } from '@hcengineering/attachment'
+import { contentType } from 'mime-types'
+import core from '@hcengineering/model-core'
+import { readdir, stat, readFile } from 'fs/promises'
+import { type Dirent } from 'fs'
+import { basename, join, parse } from 'path'
+
+interface DocumentMetadata {
+ id: string
+ name: string
+ notionId: string
+ notionSubRootId?: string
+ notionParentId?: string
+ mimeType?: string
+ size?: number
+}
+
+interface FileMetadata {
+ isFolder: boolean
+ level: number
+ hasChildren: boolean
+ fileName: string
+ extension?: string
+}
+
+const MD_EXTENSION = '.md'
+const CSV_EXTENSION = '.csv'
+const DEFAULT_ATTACHMENT_MIME_TYPE = 'application/octet-stream'
+
+enum NOTION_MD_LINK_TYPES {
+ INTERNAL_LINK,
+ EXTERNAL_LINK,
+ ATTACHMENT,
+ UNKNOWN
+}
+
+export async function importNotion (
+ ctx: MeasureMetricsContext,
+ client: TxOperations,
+ storage: StorageAdapter,
+ dir: string,
+ ws: WorkspaceIdWithUrl
+): Promise {
+ const files = await getFilesForImport(dir)
+
+ const fileMetaMap = new Map()
+ const documentMetaMap = new Map()
+
+ await collectMetadata(dir, files, fileMetaMap, documentMetaMap)
+ console.log(fileMetaMap)
+ console.log(documentMetaMap)
+
+ const spaceIdMap = await createTeamspaces(fileMetaMap, client)
+ if (spaceIdMap.size === 0) {
+ console.error('No teamspaces found in directory: ', dir)
+ return
+ }
+
+ await importFiles(ctx, client, storage, fileMetaMap, documentMetaMap, spaceIdMap, ws)
+}
+
+async function getFilesForImport (dir: string): Promise {
+ const filesAndDirs = await readdir(dir, { recursive: true, withFileTypes: true })
+ const files = filesAndDirs.filter((file) => {
+ return !file.isDirectory() && !(file.name === 'index.html' && file.path === dir)
+ })
+ return files
+}
+
+export async function importToTeamspace (
+ ctx: MeasureMetricsContext,
+ client: TxOperations,
+ storage: StorageAdapter,
+ dir: string,
+ ws: WorkspaceIdWithUrl,
+ teamspace: string
+): Promise {
+ const files = await getFilesForImport(dir)
+
+ const fileMetaMap = new Map()
+ const documentMetaMap = new Map()
+
+ await collectMetadata(dir, files, fileMetaMap, documentMetaMap)
+ console.log(fileMetaMap)
+ console.log(documentMetaMap)
+
+ const spaceId = await createTeamspace(teamspace, client)
+
+ await importFilesToSpace(ctx, client, storage, fileMetaMap, documentMetaMap, spaceId, ws)
+}
+
+async function collectMetadata (
+ root: string,
+ files: Dirent[],
+ fileMetaMap: Map,
+ documentMetaMap: Map
+): Promise {
+ for (const file of files) {
+ const st = await stat(file.path)
+ collectFileMetadata(root, file, st.size, fileMetaMap, documentMetaMap)
+ }
+}
+
+function collectFileMetadata (
+ root: string,
+ file: Dirent,
+ fileSize: number,
+ fileMetaMap: Map,
+ documentMetaMap: Map
+): void {
+ const notionId = getFileId(file.path, file.name)
+ const extension = extractExtension(file.name)
+ const ancestors = getAncestorEntries(root, file.path)
+ const meta = fileMetaMap.get(notionId)
+ fileMetaMap.set(notionId, {
+ level: ancestors.length,
+ isFolder: false,
+ extension,
+ fileName: join(file.path, file.name),
+ hasChildren: meta?.hasChildren ?? false
+ })
+
+ ancestors.forEach((folder, i) => {
+ const id = getFileId('', folder)
+ const meta = fileMetaMap.get(id)
+ fileMetaMap.set(id, {
+ level: meta?.level ?? i,
+ isFolder: meta?.isFolder ?? true,
+ fileName: meta?.fileName ?? folder,
+ extension: meta?.extension,
+ hasChildren: true
+ })
+ })
+
+ const notionParentId =
+ ancestors[ancestors.length - 1] !== undefined ? extractNotionId(ancestors[ancestors.length - 1]) : undefined
+ const notionSubRootId =
+ ancestors[1] !== undefined ? extractNotionId(ancestors[1]) ?? extractOriginalName(ancestors[1]) : undefined
+
+ documentMetaMap.set(notionId, {
+ id: generateId(),
+ name: extractOriginalName(file.name),
+ notionId,
+ notionParentId,
+ notionSubRootId,
+ mimeType: getContentType(file.name),
+ size: fileSize
+ })
+}
+
+async function createTeamspaces (
+ fileMetaMap: Map,
+ client: TxOperations
+): Promise