// // Copyright © 2022 Hardcore Engineering Inc. // // Licensed under the Eclipse Public License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. You may // obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // // See the License for the specific language governing permissions and // limitations under the License. // import { Class, Doc, DocIndexState, DocumentQuery, DocumentUpdate, MeasureContext, Ref, WorkspaceId, type LowLevelStorage } from '@hcengineering/core' import { contentStageId, docKey, DocUpdateHandler, docUpdKey, extractDocKey, fieldStateId, FullTextPipeline, IndexedDoc } from '@hcengineering/server-core' import { translateStateId, TranslationStage } from './types' /** * @public */ export class LibRetranslateStage implements TranslationStage { require = [fieldStateId, contentStageId] stageId = translateStateId updateFields: DocUpdateHandler[] = [] langExtra = 'en' clearExcept?: string[] = undefined enabled = false token: string = '' endpoint: string = '' constructor (readonly workspaceId: WorkspaceId) {} async initialize (ctx: MeasureContext, storage: LowLevelStorage, pipeline: FullTextPipeline): Promise {} async search ( _classes: Ref>[], query: DocumentQuery, size: number | undefined, from?: number ): Promise<{ docs: IndexedDoc[], pass: boolean }> { return { docs: [], pass: true } } async update (doc: DocIndexState, update: DocumentUpdate): Promise { for (const [k] of Array.from(Object.keys(update))) { const { _class, attr, docId, extra } = extractDocKey(k) if (!extra.includes('en')) { // Fill translation document update request. ;(update as any)[docUpdKey(attr, { _class, docId, extra: [...extra, ''] })] = '' } } } async collect (toIndex: DocIndexState[], pipeline: FullTextPipeline, metrics: MeasureContext): Promise { if (!this.enabled) { return } for (const doc of toIndex) { if (pipeline.cancelling) { return } await this.retranslate(doc, pipeline) } } async isEnglish (text: string): Promise { let english = false try { if (text.length > 0) { const langResponse = await ( await fetch(this.endpoint + '/detect', { headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ q: text, api_key: this.token }) }) ).json() english = JSON.parse(langResponse.body).some((it: any) => it.language === 'en' && it.confidence * 100 > 90) } } catch (err: any) { // Coult not detect language // console.error(err) } return english } async retranslate (doc: DocIndexState, pipeline: FullTextPipeline): Promise { // Copy content attributes as well. const update: DocumentUpdate = {} const elasticUpdate: Partial = {} if (pipeline.cancelling) { return } try { for (const [attrKey, v] of Object.entries(doc.attributes)) { if (pipeline.cancelling) { return } const { _class, attr, docId, extra } = extractDocKey(attrKey) // Translate only non english value and non child value. if (!extra.includes(this.langExtra) && docId === undefined) { const enContent = doc.attributes[`${docKey(attr, { _class, docId })}`] let sourceContent = v as string if (extra.includes('base64')) { sourceContent = Buffer.from(sourceContent, 'base64').toString() } // If value is cleared if (enContent === undefined || enContent === '') { let toTranslate = `${sourceContent}\n` // Remove extra spaces and extra new lines. toTranslate = toTranslate .split(/ |\t|\f/) .filter((it) => it) .join(' ') .split(/\n+/) .join('\n') let english = false try { if (toTranslate.length > 0) { english = await this.isEnglish(toTranslate) } } catch (err: any) { // Coult not detect language console.error(err) } let translatedText = '' if (!english) { try { const st = Date.now() console.log('retranslate:begin: ', doc._id, attr) const translation = await ( await fetch(this.endpoint + '/translate', { headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ q: toTranslate, source: 'auto', target: 'en', format: 'text', api_key: this.token }) }) ).json() const response: any = JSON.parse(translation.body) console.log('retranslate:', doc._id, attr, Date.now() - st, response.translatedText.length) translatedText = response.translatedText } catch (err: any) { console.error(err) } } else { translatedText = '' console.log('retranslate: Not required', doc._id, attr) } const base64Content = Buffer.from(translatedText).toString('base64') ;(update as any)[`${docUpdKey(attr, { _class, extra: [...extra, this.langExtra] })}`] = base64Content elasticUpdate[`${docKey(attr, { _class, extra: [...extra, this.langExtra] })}`] = base64Content if (doc.attachedTo != null) { const parentUpdate: DocumentUpdate = {} ;(parentUpdate as any)[docUpdKey(attr, { _class, docId: doc._id, extra: [...extra, this.langExtra] })] = base64Content await pipeline.update(doc.attachedTo as Ref, false, parentUpdate) } } } } } catch (err: any) { const wasError = doc.attributes.error !== undefined await pipeline.update(doc._id, false, { [docKey('error')]: JSON.stringify(err) }) if (wasError) { return } // Print error only first time, and update it in doc index console.error(err) return } await pipeline.update(doc._id, true, update, true) } async remove (docs: DocIndexState[], pipeline: FullTextPipeline): Promise { // will be handled by field processor for (const doc of docs) { await pipeline.update(doc._id, true, {}) } } }