diff --git a/services/mail/pod-inbound-mail/src/__tests__/handlerMta.test.ts b/services/mail/pod-inbound-mail/src/__tests__/handlerMta.test.ts index e162b3835a..c87c5c3557 100644 --- a/services/mail/pod-inbound-mail/src/__tests__/handlerMta.test.ts +++ b/services/mail/pod-inbound-mail/src/__tests__/handlerMta.test.ts @@ -16,10 +16,12 @@ import { Request, Response } from 'express' import { MeasureContext } from '@hcengineering/core' import { createMessages } from '@hcengineering/mail-common' -import { type MtaMessage, handleMtaHook } from '../handlerMta' -import * as client from '../client' import { createRestTxOperations } from '@hcengineering/api-client' +import { handleMtaHook } from '../handlerMta' +import * as client from '../client' +import { type MtaMessage } from '../types' + // Mock dependencies jest.mock('@hcengineering/mail-common', () => ({ createMessages: jest.fn(), @@ -388,4 +390,175 @@ describe('handleMtaHook', () => { } } } + + it('should process HTML email correctly', async () => { + // Mock request with HTML content + const htmlContent = '

Hello

This is an HTML test email

' + mockReq = { + headers: { 'x-hook-token': 'test-hook-token' }, + body: createValidMtaMessage('sender@example.com', ['recipient@example.com'], { + subject: 'HTML Test Subject', + contentType: 'text/html; charset=utf-8', + content: htmlContent + }) + } + + await handleMtaHook(mockReq as Request, mockRes as Response, mockCtx) + + // Should return 200 + expect(mockStatus).toHaveBeenCalledWith(200) + expect(mockSend).toHaveBeenCalledWith({ action: 'accept' }) + + // Should process the message with both HTML and text content + expect(createMessages).toHaveBeenCalledWith( + client.baseConfig, + mockCtx, + mockTxOperations, + {}, + {}, + client.mailServiceToken, + mockLoginInfo, + expect.objectContaining({ + mailId: expect.any(String), + from: { email: 'sender@example.com', firstName: 'sender', lastName: 'example.com' }, + to: [{ email: 'recipient@example.com', firstName: 'recipient', lastName: 'example.com' }], + subject: 'HTML Test Subject', + content: htmlContent, + incoming: true + }), + [] // attachments + ) + }) + + it('should process multipart email with both HTML and text correctly', async () => { + // Create a multipart email with both text and HTML + const textContent = 'This is the plain text version' + const htmlContent = '

This is the HTML version

' + + // Mock message with multipart content by setting multiple headers and contents + const multipartMessage = { + envelope: { + from: { address: 'sender@example.com' }, + to: [{ address: 'recipient@example.com' }] + }, + message: { + headers: [ + ['Content-Type', 'multipart/alternative; boundary="boundary-string"'], + ['Subject', 'Multipart Test Email'], + ['From', 'Sender '], + ['To', 'Recipient '] + ], + contents: [ + { + headers: [['Content-Type', 'text/plain; charset=utf-8']], + content: textContent + }, + { + headers: [['Content-Type', 'text/html; charset=utf-8']], + content: htmlContent + } + ] + } + } + + mockReq = { + headers: { 'x-hook-token': 'test-hook-token' }, + body: multipartMessage + } + + await handleMtaHook(mockReq as Request, mockRes as Response, mockCtx) + + // Should return 200 + expect(mockStatus).toHaveBeenCalledWith(200) + expect(mockSend).toHaveBeenCalledWith({ action: 'accept' }) + + // Should process the message with both content types + expect(createMessages).toHaveBeenCalledWith( + client.baseConfig, + mockCtx, + mockTxOperations, + {}, + {}, + client.mailServiceToken, + mockLoginInfo, + expect.objectContaining({ + mailId: expect.any(String), + from: { email: 'sender@example.com', firstName: 'Sender', lastName: '' }, + to: [{ email: 'recipient@example.com', firstName: 'Recipient', lastName: '' }], + subject: 'Multipart Test Email', + content: 'This is the HTML version', + incoming: true + }), + [] + ) + }) + + it('should handle HTML email with inline images correctly', async () => { + // HTML content with embedded image reference + const htmlWithImage = + '

Test with image:

Test Image' + + // Create image attachment + const imageAttachment = { + headers: [ + ['Content-Type', 'image/jpeg'], + ['Content-Disposition', 'inline; filename="image.jpg"'], + ['Content-ID', ''] + ], + content: 'base64encodedcontent' // Would normally be a Base64 string + } + + // Create multipart message with HTML and image + const multipartMessage = { + envelope: { + from: { address: 'sender@example.com' }, + to: [{ address: 'recipient@example.com' }] + }, + message: { + headers: [ + ['Content-Type', 'multipart/related; boundary="boundary-string"'], + ['Subject', 'Email with Inline Image'], + ['From', 'Sender '], + ['To', 'Recipient '] + ], + contents: [ + { + headers: [['Content-Type', 'text/html; charset=utf-8']], + content: htmlWithImage + }, + imageAttachment + ] + } + } + + mockReq = { + headers: { 'x-hook-token': 'test-hook-token' }, + body: multipartMessage + } + + await handleMtaHook(mockReq as Request, mockRes as Response, mockCtx) + + // Should process message with attachments + expect(createMessages).toHaveBeenCalledWith( + client.baseConfig, + mockCtx, + mockTxOperations, + {}, + {}, + client.mailServiceToken, + mockLoginInfo, + expect.objectContaining({ + htmlContent: htmlWithImage + // Other fields as expected + }), + expect.arrayContaining([ + expect.objectContaining({ + contentType: 'image/jpeg', + name: 'image.jpg', + contentId: '' + // Other attachment fields + }) + ]) + ) + }) }) diff --git a/services/mail/pod-inbound-mail/src/handlerMta.ts b/services/mail/pod-inbound-mail/src/handlerMta.ts index 976a60e687..7b7d7fdbe4 100644 --- a/services/mail/pod-inbound-mail/src/handlerMta.ts +++ b/services/mail/pod-inbound-mail/src/handlerMta.ts @@ -12,14 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. // -import { createHash, randomUUID } from 'crypto' -import { readEml, ReadedEmlJson } from 'eml-parse-js' +import { createHash } from 'crypto' import { Request, Response } from 'express' -import TurndownService from 'turndown' -import sanitizeHtml from 'sanitize-html' import { MeasureContext } from '@hcengineering/core' import { - type Attachment, type EmailContact, type EmailMessage, createMessages, @@ -30,26 +26,8 @@ import { createRestTxOperations } from '@hcengineering/api-client' import { mailServiceToken, baseConfig, kvsClient } from './client' import config from './config' - -export interface MtaMessage { - envelope: { - from: { - address: string - } - to: { - address: string - }[] - } - message: { - headers: string[][] - contents: string - } -} - -function getHeader (mta: MtaMessage, header: string): string | undefined { - const h = header.toLowerCase() - return mta.message.headers.find((header) => header[0].toLowerCase() === h)?.[1]?.trim() -} +import { MtaMessage } from './types' +import { getHeader, parseContent } from './utils' export async function handleMtaHook (req: Request, res: Response, ctx: MeasureContext): Promise { try { @@ -142,75 +120,6 @@ export async function handleMtaHook (req: Request, res: Response, ctx: MeasureCo } } -async function parseContent ( - ctx: MeasureContext, - mta: MtaMessage -): Promise<{ content: string, attachments: Attachment[] }> { - const contentType = getHeader(mta, 'Content-Type') - if (contentType === undefined) { - throw new Error('Content-Type header not found') - } - - if (contentType.toLowerCase().startsWith('text/plain')) { - return { content: mta.message.contents, attachments: [] } - } - - const contents = `Content-Type: ${contentType}\r\n${mta.message.contents}` - const email = await new Promise((resolve, reject) => { - readEml(contents, (err, json) => { - if (err !== undefined && err !== null) { - reject(err) - } else if (json === undefined) { - reject(new Error('Failed to parse email')) - } else { - resolve(json) - } - }) - }) - - let content = email.text ?? '' - let isMarkdown = false - if (email.html !== undefined) { - try { - const html = sanitizeHtml(email.html) - const tds = new TurndownService() - content = tds.turndown(html) - isMarkdown = true - } catch (error) { - ctx.warn('Failed to parse html content', { error }) - } - } - - const attachments: Attachment[] = [] - if (config.storageConfig !== undefined) { - for (const a of email.attachments ?? []) { - if (a.name === undefined || a.name.length === 0) { - // EML parser returns attachments with empty name for parts of content - // that do not have "Content-Disposition: attachment" e.g. for part - // Content-Type: text/calendar; charset="UTF-8"; method=REQUEST - continue - } - const attachment: Attachment = { - id: randomUUID(), - name: a.name, - data: Buffer.from(a.data64, 'base64'), - contentType: a.contentType.split(';')[0].trim() - } - attachments.push(attachment) - - // For inline images, replace the CID references with the blob id - if (isMarkdown && a.inline && a.id !== undefined) { - const cid = a.id.replace(/[<>]/g, '') - content = content.replaceAll( - new RegExp(`!\\[.*?\\]\\(cid:${cid}\\)`, 'g'), - `![${a.name}](cid:${attachment.id})` - ) - } - } - } - return { content, attachments } -} - function getEmailContact (email: string): EmailContact { const parts = stripTags(email).split('@') return { diff --git a/services/mail/pod-inbound-mail/src/types.ts b/services/mail/pod-inbound-mail/src/types.ts new file mode 100644 index 0000000000..12a07d5504 --- /dev/null +++ b/services/mail/pod-inbound-mail/src/types.ts @@ -0,0 +1,29 @@ +// +// Copyright © 2025 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +export interface MtaMessage { + envelope: { + from: { + address: string + } + to: { + address: string + }[] + } + message: { + headers: string[][] + contents: string + } +} diff --git a/services/mail/pod-inbound-mail/src/utils.ts b/services/mail/pod-inbound-mail/src/utils.ts new file mode 100644 index 0000000000..da879ad69b --- /dev/null +++ b/services/mail/pod-inbound-mail/src/utils.ts @@ -0,0 +1,135 @@ +// +// Copyright © 2025 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// +import { randomUUID } from 'crypto' +import { readEml, ReadedEmlJson } from 'eml-parse-js' +import TurndownService from 'turndown' +import sanitizeHtml from 'sanitize-html' +import { MeasureContext } from '@hcengineering/core' +import { + type Attachment +} from '@hcengineering/mail-common' + +import { MtaMessage } from './types' +import config from './config' + +export async function parseContent ( + ctx: MeasureContext, + mta: MtaMessage +): Promise<{ content: string, attachments: Attachment[] }> { + const contentType = getHeader(mta, 'Content-Type') + if (contentType === undefined) { + throw new Error('Content-Type header not found') + } + + if (contentType.toLowerCase().startsWith('text/plain')) { + return { content: mta.message.contents, attachments: [] } + } + + const email = await getEmailContent(mta.message.contents) + + let content = email.text ?? '' + console.log('Content:', content) + let isMarkdown = false + if (email.html !== undefined) { + try { + const html = sanitizeHtml(email.html) + const tds = new TurndownService() + content = tds.turndown(html) + console.log('HTML Content:', content) + + isMarkdown = true + } catch (error) { + ctx.warn('Failed to parse html content', { error }) + } + } + + const attachments: Attachment[] = [] + if (config.storageConfig !== undefined) { + for (const a of email.attachments ?? []) { + if (a.name === undefined || a.name.length === 0) { + // EML parser returns attachments with empty name for parts of content + // that do not have "Content-Disposition: attachment" e.g. for part + // Content-Type: text/calendar; charset="UTF-8"; method=REQUEST + continue + } + const attachment: Attachment = { + id: randomUUID(), + name: a.name, + data: Buffer.from(a.data64, 'base64'), + contentType: a.contentType.split(';')[0].trim() + } + attachments.push(attachment) + + // For inline images, replace the CID references with the blob id + if (isMarkdown && a.inline && a.id !== undefined) { + const cid = a.id.replace(/[<>]/g, '') + content = content.replaceAll( + new RegExp(`!\\[.*?\\]\\(cid:${cid}\\)`, 'g'), + `![${a.name}](cid:${attachment.id})` + ) + } + } + } + return { content, attachments } +} + +export function getHeader (mta: MtaMessage, header: string): string | undefined { + const h = header.toLowerCase() + return mta.message.headers.find((header) => header[0].toLowerCase() === h)?.[1]?.trim() +} + +async function getEmailContent (mtaContent: string): Promise { + if (mtaContent == null) { + return { + text: '', + html: '', + attachments: [] + } as any + } + const contentRegex = /Content-Type/i + const content = contentRegex.test(mtaContent) + ? mtaContent + : `Content-Type: ${guessContentType(mtaContent)}\r\n${mtaContent}` + const email = await new Promise((resolve, reject) => { + readEml(content, (err, json) => { + if (err !== undefined && err !== null) { + reject(new Error(`Email parsing error: ${err.message}`)) + } else if (json === undefined) { + reject(new Error('Email parser returned undefined result')) + } else { + resolve(json) + } + }) + }) + if (isEmptyString(email.text) && isEmptyString(email.html)) { + return { + ...email, + text: mtaContent + } + } + return email +} + +function guessContentType (content: string): string { + // Simple heuristic - if it contains HTML tags, it's likely HTML + if (/<[a-z][\s\S]*>/i.test(content)) { + return 'text/html; charset="UTF-8"' + } + return 'text/plain; charset="UTF-8"' +} + +function isEmptyString (str: string | undefined): boolean { + return str == null || str.trim() === '' +}