diff --git a/services/mail/pod-inbound-mail/src/__tests__/__mocks__/base64Message.json b/services/mail/pod-inbound-mail/src/__tests__/__mocks__/base64Message.json new file mode 100644 index 0000000000..f08cadf670 --- /dev/null +++ b/services/mail/pod-inbound-mail/src/__tests__/__mocks__/base64Message.json @@ -0,0 +1,81 @@ +{ + "envelope": { + "from": { + "address": "example1@test.com" + }, + "to": [ + { + "address": "recipient2@example.com" + } + ] + }, + "message": { + "contents": "VGVzdCBlbmNvZGVkIGVtYWlsIGNvbnRlbnQ=", + "headers": [ + [ + "Received", + " from mail-nwsmtp-mxback-production-main-38.iva.yp-c.yandex.net (mail-nwsmtp-mxback-production-main-38.iva.yp-c.yandex.net [IPv6:2a02:6b8:c0c:1724:0:640:dee6:0])\r\n\tby forward500b.mail.yandex.net (Yandex) with ESMTPS id 65A23611AA\r\n\tfor ; Tue, 3 Jun 2025 06:35:44 +0300 (MSK)\r\n" + ], + [ + "Received", + " from mail.yandex.ru (2a02:6b8:c0c:b187:0:640:6b88:0 [2a02:6b8:c0c:b187:0:640:6b88:0])\r\n\tby mail-nwsmtp-mxback-production-main-38.iva.yp-c.yandex.net (mxback/Yandex) with HTTPS id YZRhuVDL1eA0-m7PJdtq8;\r\n\tTue, 03 Jun 2025 06:35:44 +0300\r\n" + ], + [ + "X-Yandex-Fwd", + " 1\r\n" + ], + [ + "DKIM-Signature", + " v=1; a=rsa-sha256; c=relaxed/relaxed; d=yandex.ru; s=mail;\r\n\tt=1748921744; bh=DnseDjFgmtsB1kN2sgMKhzeGZ1TcOQm0aEN3ux6v8k0=;\r\n\th=Message-Id:Date:Subject:In-Reply-To:To:From;\r\n\tb=mOwsmrIzUHfrnHY6fjABtgU2IHkXKyHjoEmbKNGHPkFFdq9fqtNiw7rwX7HYJIFwN\r\n\t Jx9ZGkGNLpDGElAXs67xexAp6t/mAebaInU/5/C7nJd8YMlkauUGTKmQDD4rtOrBSG\r\n\t 2LAfXrsAyEVaeqnIjhNEir+sAWHyA1+kDPpA4jCc=\r\n" + ], + [ + "Authentication-Results", + " mail-nwsmtp-mxback-production-main-38.iva.yp-c.yandex.net; dkim=pass header.i=@yandex.ru\r\n" + ], + [ + "Received", + " by qvxj4z7i6zm4ub2j.iva.yp-c.yandex.net with HTTP;\r\n\tTue, 03 Jun 2025 06:35:43 +0300\r\n" + ], + [ + "From", + " =?utf-8?B?RXhhbXBsZSBVc2VyMQ==?= \r\n" + ], + [ + "To", + " \"Example Recipient2\" \r\n" + ], + [ + "In-Reply-To", + " YUvuZoQ3ypgAAAAAAfVzrQAAAAAAAAQu\r\n" + ], + [ + "Subject", + " =?utf-8?B?VGhpcyBpcyBlbmNvZGVkIGVtYWlsIHN1YmplY3Q=?=\r\n" + ], + [ + "MIME-Version", + " 1.0\r\n" + ], + [ + "X-Mailer", + " Yamail [ http://yandex.ru ] 5.0\r\n" + ], + [ + "Date", + " Tue, 03 Jun 2025 06:35:43 +0300\r\n" + ], + [ + "Message-Id", + " <1296141748921736@mail.yandex.ru>\r\n" + ], + [ + "Content-Transfer-Encoding", + " base64\r\n" + ], + [ + "Content-Type", + " text/html; charset=utf-8\r\n" + ] + ] + } +} \ No newline at end of file diff --git a/services/mail/pod-inbound-mail/src/__tests__/decode.test.ts b/services/mail/pod-inbound-mail/src/__tests__/decode.test.ts new file mode 100644 index 0000000000..30d0049e46 --- /dev/null +++ b/services/mail/pod-inbound-mail/src/__tests__/decode.test.ts @@ -0,0 +1,230 @@ +// +// Copyright © 2025 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// +import { decodeContent, decodeEncodedWords } from '../decode' +import { MeasureContext } from '@hcengineering/core' + +jest.mock( + '../config', + () => ({ + hookToken: 'test-hook-token', + ignoredAddresses: ['ignored@example.com'], + storageConfig: 'test-storage-config', + workspaceUrl: 'test-workspace' + }), + { virtual: true } +) + +const mockCtx: MeasureContext = { + info: jest.fn(), + error: jest.fn(), + warn: jest.fn() +} as any + +describe('decodeContent', () => { + test('should return original content when encoding is undefined', () => { + const content = 'Hello World' + const result = decodeContent(mockCtx, content, undefined) + expect(result).toBe(content) + }) + + test('should return original content when encoding is empty string', () => { + const content = 'Hello World' + const result = decodeContent(mockCtx, content, '') + expect(result).toBe(content) + }) + + test('should decode base64 content', () => { + const base64Content = 'SGVsbG8gV29ybGQ=' // "Hello World" in base64 + const result = decodeContent(mockCtx, base64Content, 'base64') + expect(result).toBe('Hello World') + }) + + test('should decode base64 content with case insensitive encoding', () => { + const base64Content = 'SGVsbG8gV29ybGQ=' + const result = decodeContent(mockCtx, base64Content, 'BASE64') + expect(result).toBe('Hello World') + }) + + test('should decode quoted-printable content', () => { + const qpContent = 'Hello=20World=21' + const result = decodeContent(mockCtx, qpContent, 'quoted-printable') + expect(result).toBe('Hello World!') + }) + + test('should handle quoted-printable with soft line breaks', () => { + const qpContent = 'This is a very long line that needs=\r\nto be wrapped' + const result = decodeContent(mockCtx, qpContent, 'quoted-printable') + expect(result).toBe('This is a very long line that needsto be wrapped') + }) + + test('should return original content for 7bit encoding', () => { + const content = 'Plain text content' + const result = decodeContent(mockCtx, content, '7bit') + expect(result).toBe(content) + }) + + test('should return original content for 8bit encoding', () => { + const content = 'Plain text with émojis 🎉' + const result = decodeContent(mockCtx, content, '8bit') + expect(result).toBe(content) + }) + + test('should return original content for binary encoding', () => { + const content = 'Binary content' + const result = decodeContent(mockCtx, content, 'binary') + expect(result).toBe(content) + }) + + test('should return original content for unknown encoding', () => { + const content = 'Unknown encoding content' + const result = decodeContent(mockCtx, content, 'unknown-encoding') + expect(result).toBe(content) + }) +}) + +describe('decodeEncodedWords', () => { + test('should return original text when no encoded words present', () => { + const text = 'Plain text without encoding' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe(text) + }) + + test('should decode base64 encoded word', () => { + const text = '=?utf-8?B?SGVsbG8gV29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should decode quoted-printable encoded word', () => { + const text = '=?utf-8?Q?Hello=20World?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should decode quoted-printable with underscores as spaces', () => { + const text = '=?utf-8?Q?Hello_World?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should handle multiple encoded words in same text', () => { + const text = '=?utf-8?B?SGVsbG8=?= =?utf-8?B?V29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should handle mixed encoded and plain text', () => { + const text = 'Subject: =?utf-8?B?SGVsbG8=?= from sender' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Subject: Hello from sender') + }) + + test('should handle case insensitive encoding (lowercase b)', () => { + const text = '=?utf-8?b?SGVsbG8gV29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should handle case insensitive encoding (lowercase q)', () => { + const text = '=?utf-8?q?Hello_World?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should handle unknown encoding gracefully', () => { + const text = '=?utf-8?X?unknown?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe(text) // Should return original + }) + + test('should decode real-world email subject', () => { + const text = '=?UTF-8?B?8J+OiSBXZWxjb21lIHRvIG91ciBwbGF0Zm9ybSE=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('🎉 Welcome to our platform!') + }) + + test('should handle empty encoded text', () => { + const text = '=?utf-8?B??=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('') + }) + + test('should handle different charset - ISO-8859-1', () => { + const text = '=?iso-8859-1?B?SGVsbG8gV29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should handle different charset - latin1', () => { + const text = '=?latin1?B?SGVsbG8gV29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should handle different charset - windows-1252', () => { + const text = '=?windows-1252?B?SGVsbG8gV29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should handle ASCII charset', () => { + const text = '=?us-ascii?B?SGVsbG8gV29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should handle case insensitive charset names', () => { + const text = '=?UTF-8?B?SGVsbG8gV29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should handle charset with whitespace', () => { + const text = '=? utf-8 ?B?SGVsbG8gV29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should default to utf8 for unsupported charset', () => { + const text = '=?gb2312?B?SGVsbG8gV29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') // Should still decode as utf8 + }) + + test('should handle mixed charsets in same text', () => { + const text = '=?utf-8?B?SGVsbG8=?= =?iso-8859-1?B?V29ybGQ=?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('Hello World') + }) + + test('should handle quoted-printable with different charset', () => { + const text = '=?iso-8859-1?Q?caf=E9?=' + const result = decodeEncodedWords(mockCtx, text) + expect(result).toBe('café') + }) + + test('should handle error in charset conversion gracefully', () => { + const consoleSpy = jest.spyOn(mockCtx, 'warn') + // This might cause an encoding issue depending on the content + const text = '=?invalid-charset?B?invalid-content?=' + + const result = decodeEncodedWords(mockCtx, text) + + // Should either decode successfully with fallback or return original + expect(typeof result).toBe('string') + + consoleSpy.mockRestore() + }) +}) diff --git a/services/mail/pod-inbound-mail/src/__tests__/handlerMta.test.ts b/services/mail/pod-inbound-mail/src/__tests__/handlerMta.test.ts index 2bba73dd38..b39b052ab1 100644 --- a/services/mail/pod-inbound-mail/src/__tests__/handlerMta.test.ts +++ b/services/mail/pod-inbound-mail/src/__tests__/handlerMta.test.ts @@ -13,6 +13,8 @@ // limitations under the License. // +import fs from 'fs/promises' +import path from 'path' import { Request, Response } from 'express' import { MeasureContext } from '@hcengineering/core' import { createMessages } from '@hcengineering/mail-common' @@ -486,4 +488,41 @@ This is an **HTML** test email` [] ) }) + + it('should decode encoded content in email', async () => { + // Create a multipart email with both text and HTML + const base64MessageData = await fs.readFile(path.join(__dirname, '__mocks__/base64Message.json'), 'utf-8') + const mtaMessage: MtaMessage = JSON.parse(base64MessageData) + + mockReq = { + headers: { 'x-hook-token': 'test-hook-token' }, + body: mtaMessage + } + + await handleMtaHook(mockReq as Request, mockRes as Response, mockCtx) + + // Should return 200 + expect(mockStatus).toHaveBeenCalledWith(200) + expect(mockSend).toHaveBeenCalledWith({ action: 'accept' }) + + // Should process the message with both content types + expect(createMessages).toHaveBeenCalledWith( + client.baseConfig, + mockCtx, + mockTxOperations, + {}, + {}, + client.mailServiceToken, + mockLoginInfo, + expect.objectContaining({ + mailId: expect.any(String), + from: { email: 'example1@test.com', firstName: 'Example', lastName: 'User1' }, + to: [{ email: 'recipient2@example.com', firstName: 'Example', lastName: 'Recipient2' }], + subject: 'This is encoded email subject', + content: 'Test encoded email content', + incoming: true + }), + [] + ) + }) }) diff --git a/services/mail/pod-inbound-mail/src/decode.ts b/services/mail/pod-inbound-mail/src/decode.ts new file mode 100644 index 0000000000..2bd843fe87 --- /dev/null +++ b/services/mail/pod-inbound-mail/src/decode.ts @@ -0,0 +1,130 @@ +// +// Copyright © 2025 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { MeasureContext } from '@hcengineering/core' +import { MtaMessage } from './types' +import { getHeader } from './utils' + +export function getDecodedContent (ctx: MeasureContext, mta: MtaMessage): string { + const contentEncoding = getHeader(mta, 'Content-Transfer-Encoding') + return decodeContent(ctx, mta.message.contents, contentEncoding) +} + +export function decodeContent (ctx: MeasureContext, content: string, encoding: string | undefined): string { + if (encoding == null || encoding.trim() === '') { + return content + } + + const normalizedEncoding = encoding.toLowerCase().trim() + + switch (normalizedEncoding) { + case 'base64': + try { + return Buffer.from(content, 'base64').toString('utf-8') + } catch (error: any) { + ctx.warn('Failed to decode base64 content:', { error: error.message }) + return content + } + + case 'quoted-printable': + return decodeQuotedPrintable(content) + case '7bit': + case '8bit': + case 'binary': + default: + return content + } +} + +function decodeQuotedPrintable (content: string): string { + return content + .replace(/=([0-9A-F]{2})/gi, (match, hex) => { + return String.fromCharCode(parseInt(hex, 16)) + }) + .replace(/=\r?\n/g, '') // Remove soft line breaks + .replace(/=$/gm, '') // Remove trailing = at end of lines +} + +export function decodeEncodedWords (ctx: MeasureContext, text: string): string { + // RFC 2047 encoded word pattern: =?charset?encoding?encoded_text?= + const encodedWordPattern = /=\?([^?]+)\?([BQbq])\?([^?]*)\?=/g + + return text.replace(encodedWordPattern, (match, charset, encoding, encodedText) => { + try { + const normalizedEncoding = encoding.toLowerCase() + let decodedBytes: Buffer + + if (normalizedEncoding === 'b') { + // Base64 encoding + decodedBytes = Buffer.from(encodedText, 'base64') + } else if (normalizedEncoding === 'q') { + // Quoted-printable encoding (with some modifications for encoded words) + const qpDecoded = encodedText + .replace(/_/g, ' ') // Underscores represent spaces in encoded words + .replace(/=([0-9A-F]{2})/gi, (_match: any, hex: string) => { + return String.fromCharCode(parseInt(hex, 16)) + }) + decodedBytes = Buffer.from(qpDecoded, 'binary') + } else { + // Unknown encoding, return original + return match + } + + // Convert to string using the specified charset + const normalizedCharset = normalizeCharset(charset) + return decodedBytes.toString(normalizedCharset) + } catch (error: any) { + ctx.warn('Failed to decode encoded word:', { match, error: error.message }) + return match // Return original if decoding fails + } + }) +} + +function normalizeCharset (charset: string): BufferEncoding { + const normalized = charset.toLowerCase().trim() + + // Map common charset aliases to Node.js Buffer encodings + switch (normalized) { + case 'utf-8': + case 'utf8': + return 'utf8' + + case 'iso-8859-1': + case 'latin1': + case 'cp1252': + case 'windows-1252': + return 'latin1' + + case 'ascii': + case 'us-ascii': + return 'ascii' + + case 'utf-16': + case 'utf-16le': + case 'ucs-2': + case 'ucs2': + return 'utf16le' + + case 'base64': + return 'base64' + + case 'hex': + return 'hex' + + // For any unsupported charset, default to utf8 + default: + return 'utf8' + } +} diff --git a/services/mail/pod-inbound-mail/src/handlerMta.ts b/services/mail/pod-inbound-mail/src/handlerMta.ts index 7c3473f48c..cf3fc4a9ac 100644 --- a/services/mail/pod-inbound-mail/src/handlerMta.ts +++ b/services/mail/pod-inbound-mail/src/handlerMta.ts @@ -23,6 +23,7 @@ import { mailServiceToken, baseConfig, kvsClient } from './client' import config from './config' import { MtaMessage } from './types' import { getHeader, parseContent } from './utils' +import { decodeEncodedWords } from './decode' export async function handleMtaHook (req: Request, res: Response, ctx: MeasureContext): Promise { try { @@ -60,7 +61,7 @@ export async function handleMtaHook (req: Request, res: Response, ctx: MeasureCo } } - const subject = getHeader(mta, 'Subject') ?? '' + const subject = decodeEncodedWords(ctx, getHeader(mta, 'Subject') ?? '') const inReplyTo = getHeader(mta, 'In-Reply-To') const { content, attachments } = await parseContent(ctx, mta) @@ -132,7 +133,7 @@ function extractContactName ( // Match name part that appears before an email in angle brackets const nameMatch = fromHeader.match(/^\s*"?([^"<]+?)"?\s*<.+?>/) const encodedName = nameMatch?.[1].trim() ?? '' - const name = encodedName.length > 0 ? decodeMimeWord(ctx, encodedName) : '' + const name = encodedName.length > 0 ? decodeEncodedWords(ctx, encodedName) : '' let [firstName, lastName] = name.split(' ') if (firstName === undefined || firstName.length === 0) { firstName = email.split('@')[0] @@ -143,28 +144,6 @@ function extractContactName ( return { firstName, lastName } } -function decodeMimeWord (ctx: MeasureContext, text: string): string { - return text.replace(/=\?([^?]+)\?([BQ])\?([^?]+)\?=/gi, (match, charset, encoding, content) => { - try { - if (encoding.toUpperCase() === 'B') { - // Base64 encoding - const buffer = Buffer.from(content, 'base64') - return buffer.toString(charset as BufferEncoding) - } else if (encoding.toUpperCase() === 'Q') { - // Quoted-printable encoding - const decoded = content - .replace(/_/g, ' ') - .replace(/=([0-9A-F]{2})/gi, (_: any, hex: string) => String.fromCharCode(parseInt(hex, 16))) - return Buffer.from(decoded).toString(charset as BufferEncoding) - } - return match - } catch (error) { - ctx.warn('Failed to decode encoded word', { error }) - return match - } - }) -} - function stripTags (email: string): string { const [name, domain] = email.split('@') const tagStart = name.indexOf('+') diff --git a/services/mail/pod-inbound-mail/src/utils.ts b/services/mail/pod-inbound-mail/src/utils.ts index a0b41d2ce2..a71b81a4a9 100644 --- a/services/mail/pod-inbound-mail/src/utils.ts +++ b/services/mail/pod-inbound-mail/src/utils.ts @@ -21,6 +21,7 @@ import { type Attachment } from '@hcengineering/mail-common' import { MtaMessage } from './types' import config from './config' +import { getDecodedContent } from './decode' export async function parseContent ( ctx: MeasureContext, @@ -34,10 +35,10 @@ export async function parseContent ( } if (contentType.toLowerCase().startsWith('text/plain')) { - return { content: mta.message.contents, attachments: [] } + return { content: getDecodedContent(ctx, mta), attachments: [] } } - const email = await getEmailContent(mta) + const email = await getEmailContent(ctx, mta) let content = email.text ?? '' let isMarkdown = false @@ -83,14 +84,14 @@ export async function parseContent ( return { content, attachments } } -export function convertMtaToEml (mta: MtaMessage): string { +export function convertMtaToEml (ctx: MeasureContext, mta: MtaMessage): string { return `MIME-Version: 1.0 Date: ${new Date().toUTCString()} From: ${mta.envelope.from.address} To: ${mta.envelope.to.map((to) => to.address).join(', ')} Content-Type: ${getHeader(mta, 'Content-Type') ?? 'text/plain; charset=utf-8'} -${unescapeString(mta.message.contents)}` +${unescapeString(getDecodedContent(ctx, mta))}` } function unescapeString (str: string): string { @@ -107,8 +108,8 @@ export function getHeader (mta: MtaMessage, header: string): string | undefined return mta.message.headers.find((header) => header[0].toLowerCase() === h)?.[1]?.trim() } -async function getEmailContent (mta: MtaMessage): Promise { - const eml = convertMtaToEml(mta) +async function getEmailContent (ctx: MeasureContext, mta: MtaMessage): Promise { + const eml = convertMtaToEml(ctx, mta) const email = await new Promise((resolve, reject) => { readEml(eml, (err, json) => { if (err !== undefined && err !== null) { @@ -123,7 +124,7 @@ async function getEmailContent (mta: MtaMessage): Promise { if (isEmptyString(email.text) && isEmptyString(email.html)) { return { ...email, - text: removeContentTypeHeader(mta.message.contents) + text: removeContentTypeHeader(getDecodedContent(ctx, mta)) } } return email