// // Copyright © 2023 Hardcore Engineering Inc. // // Licensed under the Eclipse Public License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. You may // obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // // See the License for the specific language governing permissions and // limitations under the License. // import attachment from '@hcengineering/attachment' import contact from '@hcengineering/contact' import { deepEqual } from 'fast-equals' import core, { type BackupClient, type Client as CoreClient, DOMAIN_TX, type Doc, type Domain, type Ref, SortingOrder, type TxCreateDoc, TxOperations, TxProcessor, type WorkspaceId, generateId, getObjectValue } from '@hcengineering/core' import { type MinioService } from '@hcengineering/minio' import { getWorkspaceDB } from '@hcengineering/mongo' import recruit from '@hcengineering/recruit' import { connect } from '@hcengineering/server-tool' import tracker from '@hcengineering/tracker' import tags, { type TagCategory, type TagElement, type TagReference } from '@hcengineering/tags' import { MongoClient } from 'mongodb' import chunter, { type ChatMessage } from '@hcengineering/chunter' export const DOMAIN_ACTIVITY = 'activity' as Domain export async function cleanWorkspace ( mongoUrl: string, workspaceId: WorkspaceId, minio: MinioService, elasticUrl: string, transactorUrl: string, opt: { recruit: boolean, tracker: boolean, removedTx: boolean } ): Promise { const connection = (await connect(transactorUrl, workspaceId, undefined, { mode: 'backup', model: 'upgrade' })) as unknown as CoreClient & BackupClient try { const ops = new TxOperations(connection, core.account.System) const hierarchy = ops.getHierarchy() const attachments = await ops.findAll(attachment.class.Attachment, {}) const contacts = await ops.findAll(contact.class.Contact, {}) const files = new Set( attachments.map((it) => it.file).concat(contacts.map((it) => it.avatar).filter((it) => it) as string[]) ) const minioList = await minio.list(workspaceId) const toClean: string[] = [] for (const mv of minioList) { if (!files.has(mv.name)) { toClean.push(mv.name) } } await minio.remove(workspaceId, toClean) // connection.loadChunk(DOMAIN_BLOB, idx = ) if (opt.recruit) { const contacts = await ops.findAll(recruit.mixin.Candidate, {}) console.log('removing Talents', contacts.length) const filter = contacts.filter((it) => !hierarchy.isDerived(it._class, contact.mixin.Employee)) while (filter.length > 0) { const part = filter.splice(0, 100) const op = ops.apply('') for (const c of part) { await op.remove(c) } const t = Date.now() console.log('remove:', part.map((it) => it.name).join(', ')) await op.commit() const t2 = Date.now() console.log('remove time:', t2 - t, filter.length) } // const vacancies = await ops.findAll(recruit.class.Vacancy, {}) // console.log('removing vacancies', vacancies.length) // for (const c of vacancies) { // console.log('Remove', c.name) // await ops.remove(c) // } } if (opt.tracker) { const issues = await ops.findAll(tracker.class.Issue, {}) console.log('removing Issues', issues.length) while (issues.length > 0) { const part = issues.splice(0, 5) const op = ops.apply('') for (const c of part) { await op.remove(c) } const t = Date.now() await op.commit() const t2 = Date.now() console.log('remove time:', t2 - t, issues.length) } } const client = new MongoClient(mongoUrl) try { await client.connect() const db = getWorkspaceDB(client, workspaceId) if (opt.removedTx) { const txes = await db.collection(DOMAIN_TX).find({}).toArray() for (const tx of txes) { if (tx._class === core.class.TxRemoveDoc) { // We need to remove all update and create operations for document await db.collection(DOMAIN_TX).deleteMany({ objectId: tx.objectId }) } } } } finally { await client.close() } } catch (err: any) { console.trace(err) } finally { await connection.close() } } export async function fixMinioBW (workspaceId: WorkspaceId, minio: MinioService): Promise { console.log('try clean bw miniature for ', workspaceId.name) const from = new Date(new Date().setDate(new Date().getDate() - 7)) const list = await minio.list(workspaceId) console.log('found', list.length) let removed = 0 for (const obj of list) { if (obj.lastModified < from) continue if (obj.name.includes('%size%')) { await minio.remove(workspaceId, [obj.name]) removed++ if (removed % 100 === 0) { console.log('removed: ', removed) } } } console.log('FINISH, removed: ', removed) } export async function cleanRemovedTransactions (workspaceId: WorkspaceId, transactorUrl: string): Promise { const connection = (await connect(transactorUrl, workspaceId, undefined, { mode: 'backup' })) as unknown as CoreClient & BackupClient try { let count = 0 while (true) { const removedDocs = await connection.findAll( core.class.TxCollectionCUD, { 'tx._class': core.class.TxRemoveDoc }, { limit: 1000 } ) if (removedDocs.length === 0) { break } const toRemove = await connection.findAll(core.class.TxCollectionCUD, { 'tx._class': { $in: [core.class.TxCreateDoc, core.class.TxRemoveDoc, core.class.TxUpdateDoc] }, 'tx.objectId': { $in: removedDocs.map((it) => it.tx.objectId) } }) await connection.clean( DOMAIN_TX, toRemove.map((it) => it._id) ) count += toRemove.length console.log('processed', count) } console.log('total docs with remove', count) } catch (err: any) { console.trace(err) } finally { await connection.close() } } export async function optimizeModel (workspaceId: WorkspaceId, transactorUrl: string): Promise { const connection = (await connect(transactorUrl, workspaceId, undefined, { mode: 'backup', model: 'upgrade' })) as unknown as CoreClient & BackupClient try { let count = 0 const model = connection.getModel() const updateTransactions = await connection.findAll( core.class.TxUpdateDoc, { objectSpace: core.space.Model, _class: core.class.TxUpdateDoc }, { sort: { _id: SortingOrder.Ascending, modifiedOn: SortingOrder.Ascending }, limit: 5000 } ) const toRemove: Ref[] = [] let i = 0 for (const tx of updateTransactions) { try { const doc = model.findObject(tx.objectId) if (doc === undefined) { // Document is removed, we could remove update transaction at all toRemove.push(tx._id) console.log('marking update tx to remove', tx) continue } const opt: any = { ...tx.operations } const adoc = doc as any let uDoc: any = {} // Find next update operations for same doc for (const ops of updateTransactions.slice(i + 1).filter((it) => it.objectId === tx.objectId)) { uDoc = { ...uDoc, ...ops.operations } } for (const [k, v] of Object.entries(opt)) { // If value is same as in document or we have more transactions with same value updated. if (!k.startsWith('$') && (!deepEqual(adoc[k], v) || uDoc[k] !== undefined)) { // Current value is not we modify // eslint-disable-next-line @typescript-eslint/no-dynamic-delete delete opt[k] } } if (Object.keys(opt).length === 0) { // No operations pending, remove update tx. toRemove.push(tx._id) console.log('marking update tx to remove, since not real update is performed', tx) } } finally { i++ } } await connection.clean(DOMAIN_TX, toRemove) count += toRemove.length console.log('processed', count) console.log('total docs with remove', count) } catch (err: any) { console.trace(err) } finally { await connection.close() } } export async function cleanArchivedSpaces (workspaceId: WorkspaceId, transactorUrl: string): Promise { const connection = (await connect(transactorUrl, workspaceId, undefined, { mode: 'backup' })) as unknown as CoreClient & BackupClient try { const count = 0 const ops = new TxOperations(connection, core.account.System) while (true) { const spaces = await connection.findAll(core.class.Space, { archived: true }, { limit: 1000 }) if (spaces.length === 0) { break } const h = connection.getHierarchy() const withDomain = h .getDescendants(core.class.Doc) .filter((it) => h.findDomain(it) !== undefined) .filter((it) => !h.isMixin(it)) for (const c of withDomain) { while (true) { const docs = await connection.findAll(c, { space: { $in: spaces.map((it) => it._id) } }) if (docs.length === 0) { break } console.log('removing:', c, docs.length) for (const d of docs) { await ops.remove(d) } } } for (const s of spaces) { await ops.remove(s) } } console.log('total docs with remove', count) } catch (err: any) { console.trace(err) } finally { await connection.close() } } export async function fixCommentDoubleIdCreate (workspaceId: WorkspaceId, transactorUrl: string): Promise { const connection = (await connect(transactorUrl, workspaceId, undefined, { mode: 'backup' })) as unknown as CoreClient & BackupClient try { const commentTxes = await connection.findAll(core.class.TxCollectionCUD, { 'tx._class': core.class.TxCreateDoc, 'tx.objectClass': chunter.class.ChatMessage }) const commentTxesRemoved = await connection.findAll(core.class.TxCollectionCUD, { 'tx._class': core.class.TxRemoveDoc, 'tx.objectClass': chunter.class.ChatMessage }) const removed = new Map(commentTxesRemoved.map((it) => [it.tx.objectId, it])) // Do not checked removed const objSet = new Set>() const oldValue = new Map, string>() for (const c of commentTxes) { const cid = c.tx.objectId if (removed.has(cid)) { continue } const has = objSet.has(cid) objSet.add(cid) if (has) { // We have found duplicate one, let's rename it. const doc = TxProcessor.createDoc2Doc(c.tx as unknown as TxCreateDoc) if (doc.message !== '' && doc.message.trim() !== '

') { await connection.clean(DOMAIN_TX, [c._id]) if (oldValue.get(cid) === doc.message.trim()) { console.log('delete tx', cid, doc.message) } else { oldValue.set(doc._id, doc.message) console.log('renaming', cid, doc.message) // Remove previous transaction. c.tx.objectId = generateId() doc._id = c.tx.objectId as Ref await connection.upload(DOMAIN_TX, [c]) // Also we need to create snapsot await connection.upload(DOMAIN_ACTIVITY, [doc]) } } } } } catch (err: any) { console.trace(err) } finally { await connection.close() } } const DOMAIN_TAGS = 'tags' as Domain export async function fixSkills ( mongoUrl: string, workspaceId: WorkspaceId, transactorUrl: string, step: string ): Promise { const connection = (await connect(transactorUrl, workspaceId, undefined, { mode: 'backup' })) as unknown as CoreClient & BackupClient const client = new MongoClient(mongoUrl) try { await client.connect() const db = getWorkspaceDB(client, workspaceId) async function fixCount (): Promise { console.log('fixing ref-count...') const allTags = (await connection.findAll(tags.class.TagElement, {})) as TagElement[] for (const tag of allTags) { console.log('progress: ', ((allTags.indexOf(tag) + 1) * 100) / allTags.length) const references = await connection.findAll(tags.class.TagReference, { tag: tag._id }, { total: true }) if (references.total >= 0) { await db.collection(DOMAIN_TAGS).updateOne({ _id: tag._id }, { $set: { refCount: references.total } }) } } console.log('DONE: fixing ref-count') } // STEP 1: all to Upper Case if (step === '1') { console.log('converting case') const tagsToClean = (await connection.findAll(tags.class.TagElement, { category: { $in: ['recruit:category:Other', 'document:category:Other', 'tracker:category:Other'] as Ref[] } })) as TagElement[] for (const tag of tagsToClean) { await db .collection(DOMAIN_TAGS) .updateOne({ _id: tag._id }, { $set: { title: tag.title.trim().toUpperCase() } }) } console.log('DONE: converting case') } // STEP 2: Replace with same titles if (step === '2') { console.log('fixing titles') const tagsToClean = (await connection.findAll(tags.class.TagElement, { category: { $in: ['recruit:category:Other', 'document:category:Other', 'tracker:category:Other'] as Ref[] } })) as TagElement[] const groupped = groupBy(tagsToClean, 'title') console.log('STEP2: Done grouping') for (const key in groupped) { const values = groupped[key] if (values.length === 1) continue // console.log('duplicates: ', values) const goodTag = values[0] for (const t of values) { if (t._id === goodTag._id) continue const references = await connection.findAll(tags.class.TagReference, { attachedToClass: recruit.mixin.Candidate, tag: t._id }) goodTag.refCount = (goodTag.refCount ?? 0) + references.length for (const reference of references) { await db .collection(DOMAIN_TAGS) .updateOne( { _id: reference._id }, { $set: { tag: goodTag._id, color: goodTag.color, title: goodTag.title } } ) } await db.collection(DOMAIN_TAGS).deleteOne({ _id: t._id }) } await db.collection(DOMAIN_TAGS).updateOne({ _id: goodTag._id }, { $set: { refCount: goodTag.refCount } }) } console.log('STEP2 DONE') } // fix skills with + and - if (step === '3') { console.log('STEP 3') const ops = new TxOperations(connection, core.account.System) const regex = /\S+(?:[-+]\S+)+/g const tagsToClean = (await connection.findAll(tags.class.TagElement, { category: { $in: ['recruit:category:Other', 'document:category:Other', 'tracker:category:Other'] as Ref[] } })) as TagElement[] const tagsMatchingRegex = tagsToClean.filter((tag) => regex.test(tag.title)) let goodTags = (await connection.findAll(tags.class.TagElement, { category: { $nin: ['recruit:category:Other', 'document:category:Other', 'tracker:category:Other'] as Ref[] } })) as TagElement[] goodTags = goodTags.sort((a, b) => b.title.length - a.title.length).filter((t) => t.title.length > 2) for (const wrongTag of tagsMatchingRegex) { const incorrectStrings = wrongTag.title.match(regex) if (incorrectStrings == null) continue for (const str of incorrectStrings) { const goodTag = goodTags.find((t) => t.title.toUpperCase() === str.replaceAll(/[+-]/g, '')) if (goodTag === undefined) continue const references = (await connection.findAll(tags.class.TagReference, { attachedToClass: recruit.mixin.Candidate, tag: wrongTag._id })) as TagReference[] for (const ref of references) { await ops.addCollection( tags.class.TagReference, ref.space, ref.attachedTo, ref.attachedToClass, ref.collection, { title: goodTag.title, tag: goodTag._id, color: ref.color } ) await db .collection(DOMAIN_TAGS) .updateOne({ _id: ref._id }, { $set: { title: ref.title.replace(str, '') } }) } await db .collection(DOMAIN_TAGS) .updateOne({ _id: wrongTag._id }, { $set: { title: wrongTag.title.replace(str, goodTag.title) } }) } } console.log('DONE: STEP 3') } // change incorrect skills and add good one if (step === '4') { console.log('step 4') let goodTags = (await connection.findAll(tags.class.TagElement, { category: { $nin: ['recruit:category:Other', 'document:category:Other', 'tracker:category:Other'] as Ref[] } })) as TagElement[] goodTags = goodTags.sort((a, b) => b.title.length - a.title.length).filter((t) => t.title.length > 2) const ops = new TxOperations(connection, core.account.System) const tagsToClean = (await connection.findAll(tags.class.TagElement, { category: { $in: ['recruit:category:Other', 'document:category:Other', 'tracker:category:Other'] as Ref[] } })) as TagElement[] for (const incorrectTag of tagsToClean) { console.log('tag progress: ', ((tagsToClean.indexOf(incorrectTag) + 1) * 100) / tagsToClean.length) const toReplace = goodTags.filter((t) => incorrectTag.title.includes(t.title.toUpperCase())) if (toReplace.length === 0) continue const references = (await connection.findAll(tags.class.TagReference, { attachedToClass: recruit.mixin.Candidate, tag: incorrectTag._id })) as TagReference[] let title = incorrectTag.title for (const ref of references) { const refsForCand = ( (await connection.findAll(tags.class.TagReference, { attachedToClass: recruit.mixin.Candidate, attachedTo: ref.attachedTo })) as TagReference[] ).map((r) => r.tag) for (const gTag of toReplace) { title = title.replace(gTag.title.toUpperCase(), '') if ((refsForCand ?? []).includes(gTag._id)) continue await ops.addCollection( tags.class.TagReference, ref.space, ref.attachedTo, ref.attachedToClass, ref.collection, { title: gTag.title, tag: gTag._id, color: ref.color } ) } await db.collection(DOMAIN_TAGS).updateOne({ _id: ref._id }, { $set: { title } }) } await db.collection(DOMAIN_TAGS).updateOne({ _id: incorrectTag._id }, { $set: { title } }) } console.log('STEP4 DONE') } // remove skills with space or empty string if (step === '5') { console.log('STEP 5') const tagsToClean = (await connection.findAll(tags.class.TagElement, { category: { $in: ['recruit:category:Other', 'document:category:Other', 'tracker:category:Other'] as Ref[] }, title: { $in: [' ', ''] } })) as TagElement[] if (tagsToClean.length > 0) { for (const t of tagsToClean) { const references = (await connection.findAll(tags.class.TagReference, { attachedToClass: recruit.mixin.Candidate, tag: t._id })) as TagReference[] const ids = references.map((r) => r._id) await db.collection(DOMAIN_TAGS).deleteMany({ _id: { $in: ids } }) await db.collection(DOMAIN_TAGS).deleteOne({ _id: t._id }) } } await fixCount() console.log('DONE 5 STEP') } // remove skills with ref count less or equal to 10 if (step === '6') { console.log('STEP 6') const tagsToClean = (await connection.findAll(tags.class.TagElement, { category: { $in: ['recruit:category:Other', 'document:category:Other', 'tracker:category:Other'] as Ref[] } })) as TagElement[] for (const t of tagsToClean) { if ((t?.refCount ?? 0) >= 10) continue const references = (await connection.findAll(tags.class.TagReference, { attachedToClass: recruit.mixin.Candidate, tag: t._id })) as TagReference[] const ids = references.map((r) => r._id) await db.collection(DOMAIN_TAGS).deleteMany({ _id: { $in: ids } }) await db.collection(DOMAIN_TAGS).deleteOne({ _id: t._id }) } console.log('DONE 6 STEP') } // remove all skills that don't have letters in it if (step === '7') { console.log('STEP 7') const tagsToClean = (await connection.findAll(tags.class.TagElement, { category: { $in: ['recruit:category:Other', 'document:category:Other', 'tracker:category:Other'] as Ref[] } })) as TagElement[] const regex = /^((?![a-zA-Zа-яА-Я]).)*$/g if (tagsToClean.length > 0) { for (const t of tagsToClean) { if (!regex.test(t.title)) continue const references = (await connection.findAll(tags.class.TagReference, { attachedToClass: recruit.mixin.Candidate, tag: t._id })) as TagReference[] const ids = references.map((r) => r._id) await db.collection(DOMAIN_TAGS).deleteMany({ _id: { $in: ids } }) await db.collection(DOMAIN_TAGS).deleteOne({ _id: t._id }) } } await fixCount() console.log('DONE 7 STEP') } } catch (err: any) { console.trace(err) } finally { await client.close() await connection.close() } } function groupBy (docs: T[], key: string): Record { return docs.reduce((storage: Record, item: T) => { const group = getObjectValue(key, item) ?? undefined storage[group] = storage[group] ?? [] storage[group].push(item) return storage }, {}) }