platform/server-plugins/collaboration/src/fulltext.ts
Andrey Sobolev c42923c2b1
Fix Indexer use of $ne (#6264)
Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
2024-08-06 21:36:03 +07:00

168 lines
5.1 KiB
TypeScript

//
// Copyright © 2024 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import core, {
Blob,
Class,
CollaborativeDoc,
Doc,
DocIndexState,
DocumentQuery,
DocumentUpdate,
MeasureContext,
Ref,
WorkspaceId,
collaborativeDocParse,
getFullTextIndexableAttributes
} from '@hcengineering/core'
import {
ContentTextAdapter,
DbAdapter,
DocUpdateHandler,
FullTextPipeline,
FullTextPipelineStage,
IndexedDoc,
StorageAdapter,
collabStageId,
contentStageId,
docKey,
docUpdKey,
fieldStateId
} from '@hcengineering/server-core'
/**
* @public
*/
export class CollaborativeContentRetrievalStage implements FullTextPipelineStage {
require = []
stageId = collabStageId
extra = ['content', 'base64']
enabled = true
// Clear all except following.
clearExcept: string[] = [fieldStateId, contentStageId]
updateFields: DocUpdateHandler[] = []
textLimit = 100 * 1024
constructor (
readonly storageAdapter: StorageAdapter | undefined,
readonly workspace: WorkspaceId,
readonly metrics: MeasureContext,
private readonly contentAdapter: ContentTextAdapter
) {}
async initialize (ctx: MeasureContext, storage: DbAdapter, pipeline: FullTextPipeline): Promise<void> {
// Just do nothing
}
async search (
_classes: Ref<Class<Doc>>[],
search: DocumentQuery<Doc>,
size?: number,
from?: number
): Promise<{ docs: IndexedDoc[], pass: boolean }> {
return { docs: [], pass: true }
}
async collect (toIndex: DocIndexState[], pipeline: FullTextPipeline): Promise<void> {
for (const doc of toIndex) {
if (pipeline.cancelling) {
return
}
await this.updateContent(doc, pipeline)
}
}
async updateContent (doc: DocIndexState, pipeline: FullTextPipeline): Promise<void> {
const attributes = getFullTextIndexableAttributes(pipeline.hierarchy, doc.objectClass)
// Copy content attributes as well.
const update: DocumentUpdate<DocIndexState> = {}
if (pipeline.cancelling) {
return
}
try {
for (const [, val] of Object.entries(attributes)) {
if (val.type._class === core.class.TypeCollaborativeDoc) {
const collaborativeDoc = doc.attributes[docKey(val.name, { _class: val.attributeOf })] as CollaborativeDoc
if (collaborativeDoc !== undefined && collaborativeDoc !== '') {
const { documentId } = collaborativeDocParse(collaborativeDoc)
const docInfo: Blob | undefined = await this.storageAdapter?.stat(this.metrics, this.workspace, documentId)
if (docInfo !== undefined) {
const digest = docInfo.etag
const digestKey = docKey(val.name, { _class: val.attributeOf, digest: true })
if (doc.attributes[digestKey] !== digest) {
;(update as any)[docUpdKey(digestKey)] = digest
const contentType = (docInfo.contentType ?? '').split(';')[0]
const readable = await this.storageAdapter?.get(this.metrics, this.workspace, documentId)
if (readable !== undefined) {
let textContent = await this.metrics.with(
'fetch',
{},
async () => await this.contentAdapter.content(documentId, contentType, readable)
)
readable?.destroy()
textContent = textContent
.split(/ +|\t+|\f+/)
.filter((it) => it)
.join(' ')
.split(/\n\n+/)
.join('\n')
// trim to large content
if (textContent.length > this.textLimit) {
textContent = textContent.slice(0, this.textLimit)
}
textContent = Buffer.from(textContent).toString('base64')
;(update as any)[docUpdKey(val.name, { _class: val.attributeOf, extra: this.extra })] = textContent
}
}
}
}
}
}
} catch (err: any) {
const wasError = (doc as any).error !== undefined
await pipeline.update(doc._id, false, { [docKey('error')]: JSON.stringify({ message: err.message, err }) })
if (wasError) {
return
}
// Print error only first time, and update it in doc index
console.error(err)
return
}
await pipeline.update(doc._id, true, update)
}
async remove (docs: DocIndexState[], pipeline: FullTextPipeline): Promise<void> {
// will be handled by field processor
for (const doc of docs) {
await pipeline.update(doc._id, true, {})
}
}
}