UBER-921: Improve full text search (#3848)

Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
This commit is contained in:
Andrey Sobolev 2023-10-17 15:21:59 +07:00 committed by GitHub
parent 3c0ff4c049
commit c199ab8e00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 68 additions and 36 deletions

View File

@ -161,12 +161,15 @@ export class TApplicant extends TTask implements Applicant {
startDate!: Timestamp | null
@Prop(TypeRef(contact.mixin.Employee), recruit.string.AssignedRecruiter)
@Index(IndexKind.Indexed)
declare assignee: Ref<Employee> | null
@Prop(TypeRef(task.class.State), task.string.TaskState, { _id: recruit.attribute.State })
@Index(IndexKind.Indexed)
declare status: Ref<State>
@Prop(TypeRef(task.class.DoneState), task.string.TaskStateDone, { _id: recruit.attribute.DoneState })
@Index(IndexKind.Indexed)
declare doneState: Ref<DoneState>
}

View File

@ -90,9 +90,11 @@ export class TLostState extends TDoneState implements LostState {}
@UX(task.string.Task, task.icon.Task, task.string.Task)
export class TTask extends TAttachedDoc implements Task {
@Prop(TypeRef(core.class.Status), task.string.TaskState, { _id: task.attribute.State })
@Index(IndexKind.Indexed)
status!: Ref<Status>
@Prop(TypeRef(task.class.DoneState), task.string.TaskStateDone, { _id: task.attribute.DoneState })
@Index(IndexKind.Indexed)
doneState!: Ref<DoneState> | null
@Prop(TypeString(), task.string.TaskNumber)

View File

@ -102,6 +102,8 @@ export enum IndexKind {
FullText,
/**
* For attribute with this annotation should be created an index in mongo database
*
* Also mean to include into Elastic search.
*/
Indexed
}

View File

@ -179,6 +179,13 @@ export function isFullTextAttribute (attr: AnyAttribute): boolean {
)
}
/**
* @public
*/
export function isIndexedAttribute (attr: AnyAttribute): boolean {
return attr.index === IndexKind.Indexed
}
/**
* @public
*/

View File

@ -58,7 +58,7 @@
<span class="font-medium">Summary:</span>
{#each summary.split('\n') as line}
{@const hl = search.length > 0 && line.toLowerCase().includes(search.toLowerCase())}
<span class:text-md={!hl} class:highlight={hl}>{line}</span>
<span class="select-text" class:text-md={!hl} class:highlight={hl}>{line}</span>
{/each}
{:else if indexDoc}
{#each attributes as attr}
@ -77,13 +77,13 @@
{#if search.length > 0}
<span class="font-medium">Result:</span>
{#each doc.filter((line) => line.toLowerCase().includes(search.toLowerCase())) as line}
<span class:highlight={true}>{line}</span>
<span class="select-text" class:highlight={true}>{line}</span>
{/each}
<br />
{/if}
{#each doc as line}
{@const hl = search.length > 0 && line.toLowerCase().includes(search.toLowerCase())}
<span class:text-md={!hl} class:highlight={hl}>{line}</span>
<span class="select-text" class:text-md={!hl} class:highlight={hl}>{line}</span>
{/each}
</div>
{/each}

View File

@ -19,11 +19,13 @@ import core, {
Class,
Doc,
DocIndexState,
docKey,
DocumentQuery,
FindOptions,
FindResult,
Hierarchy,
IndexKind,
isFullTextAttribute,
isIndexedAttribute,
MeasureContext,
ObjQueryType,
Ref,
@ -135,10 +137,22 @@ export class FullTextIndex implements WithFind {
}
try {
for (const [k, attr] of attrs) {
if (attr.index === IndexKind.FullText) {
if (isFullTextAttribute(attr) || isIndexedAttribute(attr)) {
const vv = (query as any)[k]
if (vv != null) {
findQuery[k] = vv
if (
k === '_class' ||
k === 'modifiedBy' ||
k === 'modifiedOn' ||
k === 'space' ||
k === 'attachedTo' ||
k === 'attachedToClass'
) {
findQuery[k] = vv
} else {
const docKeyValue = docKey(attr.name, { _class: attr.attributeOf })
findQuery[docKeyValue] = vv
}
}
}
if (attr.type._class === core.class.Collection) {
@ -165,12 +179,12 @@ export class FullTextIndex implements WithFind {
return true
})
const fullTextLimit = options?.limit ?? 200
const fullTextLimit = Math.min(5000, (options?.limit ?? 200) * 100)
let { docs, pass } = await this.indexer.search(classes, findQuery, fullTextLimit)
if (docs.length === 0 && pass) {
docs = await this.adapter.search(classes, query, fullTextLimit)
docs = await this.adapter.search(classes, findQuery, fullTextLimit)
}
const indexedDocMap = new Map<Ref<Doc>, IndexedDoc>()

View File

@ -27,7 +27,7 @@ import core, {
import { MinioService } from '@hcengineering/minio'
import { ContentTextAdapter, IndexedDoc } from '../types'
import { contentStageId, DocUpdateHandler, fieldStateId, FullTextPipeline, FullTextPipelineStage } from './types'
import { docKey, docUpdKey, getFullTextAttributes } from './utils'
import { docKey, docUpdKey, getFullTextIndexableAttributes } from './utils'
/**
* @public
@ -80,7 +80,7 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
}
async updateContent (doc: DocIndexState, pipeline: FullTextPipeline): Promise<void> {
const attributes = getFullTextAttributes(pipeline.hierarchy, doc.objectClass)
const attributes = getFullTextIndexableAttributes(pipeline.hierarchy, doc.objectClass)
// Copy content attributes as well.
const update: DocumentUpdate<DocIndexState> = {}

View File

@ -34,7 +34,7 @@ import {
docKey,
docUpdKey,
getContent,
getFullTextAttributes,
getFullTextIndexableAttributes,
getFullTextContext,
isFullTextAttribute,
loadIndexStageStage
@ -112,7 +112,7 @@ export class IndexedFieldStage implements FullTextPipelineStage {
const docs = await this.dbStorage.findAll(metrics, objClass, {
_id: { $in: Array.from(valueIds.keys()) }
})
const attributes = getFullTextAttributes(pipeline.hierarchy, objClass)
const attributes = getFullTextIndexableAttributes(pipeline.hierarchy, objClass)
// Child docs.

View File

@ -22,7 +22,8 @@ import core, {
DocumentQuery,
DocumentUpdate,
extractDocKey,
IndexKind,
isFullTextAttribute,
isIndexedAttribute,
MeasureContext,
Ref,
ServerStorage,
@ -110,7 +111,7 @@ export class FullTextPushStage implements FullTextPipelineStage {
if (
attrObj !== null &&
attrObj !== undefined &&
attrObj.index === IndexKind.FullText &&
(isFullTextAttribute(attrObj) || isIndexedAttribute(attrObj)) &&
(attrObj.type._class === core.class.RefTo ||
(attrObj.type._class === core.class.ArrOf && (attrObj.type as ArrOf<any>).of._class === core.class.RefTo))
) {

View File

@ -102,7 +102,7 @@ export const contentStageId = 'cnt-v2b'
/**
* @public
*/
export const fieldStateId = 'fld-v5'
export const fieldStateId = 'fld-v6'
/**
* @public

View File

@ -33,6 +33,7 @@ import core, {
Hierarchy,
IndexStageState,
isFullTextAttribute,
isIndexedAttribute,
Obj,
Ref,
Space,
@ -45,11 +46,11 @@ import { FullTextPipeline } from './types'
/**
* @public
*/
export function getFullTextAttributes (hierarchy: Hierarchy, clazz: Ref<Class<Obj>>): AnyAttribute[] {
export function getFullTextIndexableAttributes (hierarchy: Hierarchy, clazz: Ref<Class<Obj>>): AnyAttribute[] {
const allAttributes = hierarchy.getAllAttributes(clazz)
const result: AnyAttribute[] = []
for (const [, attr] of allAttributes) {
if (isFullTextAttribute(attr)) {
if (isFullTextAttribute(attr) || isIndexedAttribute(attr)) {
result.push(attr)
}
}
@ -59,7 +60,7 @@ export function getFullTextAttributes (hierarchy: Hierarchy, clazz: Ref<Class<Ob
.filter((m) => hierarchy.getClass(m).kind === ClassifierKind.MIXIN)
.forEach((m) => {
for (const [, v] of hierarchy.getAllAttributes(m, clazz)) {
if (isFullTextAttribute(v)) {
if (isFullTextAttribute(v) || isIndexedAttribute(v)) {
result.push(v)
}
}
@ -119,10 +120,10 @@ export function isClassIndexable (hierarchy: Hierarchy, c: Ref<Class<Doc>>): boo
hierarchy.setClassifierProp(c, 'class_indexed', false)
return false
}
const attrs = getFullTextAttributes(hierarchy, c)
const attrs = getFullTextIndexableAttributes(hierarchy, c)
for (const d of hierarchy.getDescendants(c)) {
if (hierarchy.isMixin(d)) {
attrs.push(...getFullTextAttributes(hierarchy, d))
attrs.push(...getFullTextIndexableAttributes(hierarchy, d))
}
}

View File

@ -133,26 +133,28 @@ class ElasticAdapter implements FullTextAdapter {
}
}
if (query.space != null) {
if (typeof query.space === 'object') {
if (query.space.$in !== undefined) {
for (const [q, v] of Object.entries(query)) {
if (!q.startsWith('$')) {
if (typeof v === 'object') {
if (v.$in !== undefined) {
request.bool.should.push({
terms: {
[q]: v.$in,
boost: 100.0
}
})
}
} else {
request.bool.should.push({
terms: {
space: query.space.$in.map((c) => c.toLowerCase()),
boost: 2.0
term: {
[q]: {
value: v,
boost: 100.0,
case_insensitive: true
}
}
})
}
} else {
request.bool.should.push({
term: {
space: {
value: query.space,
boost: 2.0,
case_insensitive: true
}
}
})
}
}