From c15f253c15911681111b52971feb43bba0f87a90 Mon Sep 17 00:00:00 2001
From: Andrey Sobolev <haiodo@users.noreply.github.com>
Date: Thu, 5 Oct 2023 23:45:11 +0700
Subject: [PATCH] Optimize model (#3795)

Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
---
 dev/tool/src/clean.ts      | 74 ++++++++++++++++++++++++++++++++++++++
 dev/tool/src/index.ts      | 16 ++++++++-
 packages/core/src/memdb.ts |  5 +++
 3 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/dev/tool/src/clean.ts b/dev/tool/src/clean.ts
index 6761480548..ea00e7c6b0 100644
--- a/dev/tool/src/clean.ts
+++ b/dev/tool/src/clean.ts
@@ -16,6 +16,7 @@
 import attachment from '@hcengineering/attachment'
 import chunter, { Comment } from '@hcengineering/chunter'
 import contact from '@hcengineering/contact'
+import { deepEqual } from 'fast-equals'
 import core, {
   BackupClient,
   Client as CoreClient,
@@ -23,6 +24,7 @@ import core, {
   Doc,
   Domain,
   Ref,
+  SortingOrder,
   TxCreateDoc,
   TxOperations,
   TxProcessor,
@@ -177,6 +179,78 @@ export async function cleanRemovedTransactions (workspaceId: WorkspaceId, transa
     await connection.close()
   }
 }
+
+export async function optimizeModel (workspaceId: WorkspaceId, transactorUrl: string): Promise<void> {
+  const connection = (await connect(transactorUrl, workspaceId, undefined, {
+    mode: 'backup',
+    model: 'upgrade'
+  })) as unknown as CoreClient & BackupClient
+  try {
+    let count = 0
+
+    const model = connection.getModel()
+
+    const updateTransactions = await connection.findAll(
+      core.class.TxUpdateDoc,
+      {
+        objectSpace: core.space.Model,
+        _class: core.class.TxUpdateDoc
+      },
+      { sort: { _id: SortingOrder.Ascending, modifiedOn: SortingOrder.Ascending }, limit: 5000 }
+    )
+
+    const toRemove: Ref<Doc>[] = []
+
+    let i = 0
+    for (const tx of updateTransactions) {
+      try {
+        const doc = model.findObject(tx.objectId)
+        if (doc === undefined) {
+          // Document is removed, we could remove update transaction at all
+          toRemove.push(tx._id)
+          console.log('marking update tx to remove', tx)
+          continue
+        }
+        const opt: any = { ...tx.operations }
+        const adoc = doc as any
+
+        let uDoc: any = {}
+
+        // Find next update operations for same doc
+        for (const ops of updateTransactions.slice(i + 1).filter((it) => it.objectId === tx.objectId)) {
+          uDoc = { ...uDoc, ...ops.operations }
+        }
+
+        for (const [k, v] of Object.entries(opt)) {
+          // If value is same as in document or we have more transactions with same value updated.
+          if (!k.startsWith('$') && (!deepEqual(adoc[k], v) || uDoc[k] !== undefined)) {
+            // Current value is not we modify
+            // eslint-disable-next-line @typescript-eslint/no-dynamic-delete
+            delete opt[k]
+          }
+        }
+        if (Object.keys(opt).length === 0) {
+          // No operations pending, remove update tx.
+          toRemove.push(tx._id)
+          console.log('marking update tx to remove, since not real update is performed', tx)
+        }
+      } finally {
+        i++
+      }
+    }
+
+    await connection.clean(DOMAIN_TX, toRemove)
+
+    count += toRemove.length
+    console.log('processed', count)
+
+    console.log('total docs with remove', count)
+  } catch (err: any) {
+    console.trace(err)
+  } finally {
+    await connection.close()
+  }
+}
 export async function cleanArchivedSpaces (workspaceId: WorkspaceId, transactorUrl: string): Promise<void> {
   const connection = (await connect(transactorUrl, workspaceId, undefined, {
     mode: 'backup'
diff --git a/dev/tool/src/index.ts b/dev/tool/src/index.ts
index d484e7f565..2504387fc6 100644
--- a/dev/tool/src/index.ts
+++ b/dev/tool/src/index.ts
@@ -52,7 +52,13 @@ import { MinioService } from '@hcengineering/minio'
 import { MigrateOperation } from '@hcengineering/model'
 import { openAIConfigDefaults } from '@hcengineering/openai'
 import { benchmark } from './benchmark'
-import { cleanArchivedSpaces, cleanRemovedTransactions, cleanWorkspace, fixCommentDoubleIdCreate } from './clean'
+import {
+  cleanArchivedSpaces,
+  cleanRemovedTransactions,
+  cleanWorkspace,
+  fixCommentDoubleIdCreate,
+  optimizeModel
+} from './clean'
 import { changeConfiguration } from './configuration'
 import { fixMixinForeignAttributes, showMixinForeignAttributes } from './mixin'
 import { openAIConfig } from './openai'
@@ -499,6 +505,14 @@ export function devTool (
       await changeConfiguration(getWorkspaceId(workspace, productId), transactorUrl, cmd)
     })
 
+  program
+    .command('optimize-model <workspace>')
+    .description('optimize model')
+    .action(async (workspace: string, cmd: { enable: string, disable: string, list: boolean }) => {
+      console.log(JSON.stringify(cmd))
+      await optimizeModel(getWorkspaceId(workspace, productId), transactorUrl)
+    })
+
   program
     .command('benchmark')
     .description('clean archived spaces')
diff --git a/packages/core/src/memdb.ts b/packages/core/src/memdb.ts
index 3aa3d0a473..572c87361f 100644
--- a/packages/core/src/memdb.ts
+++ b/packages/core/src/memdb.ts
@@ -77,6 +77,11 @@ export abstract class MemDb extends TxProcessor implements Storage {
     return doc as T
   }
 
+  findObject<T extends Doc>(_id: Ref<T>): T | undefined {
+    const doc = this.objectById.get(_id)
+    return doc as T
+  }
+
   private async getLookupValue<T extends Doc>(
     _class: Ref<Class<T>>,
     doc: T,