UBERF-11004: Fix mta-hook email content

Signed-off-by: Artem Savchenko <armisav@gmail.com>
This commit is contained in:
Artem Savchenko 2025-05-23 10:38:32 +07:00
parent 02f584811f
commit 18f00908f1
4 changed files with 342 additions and 96 deletions

View File

@ -16,10 +16,12 @@
import { Request, Response } from 'express'
import { MeasureContext } from '@hcengineering/core'
import { createMessages } from '@hcengineering/mail-common'
import { type MtaMessage, handleMtaHook } from '../handlerMta'
import * as client from '../client'
import { createRestTxOperations } from '@hcengineering/api-client'
import { handleMtaHook } from '../handlerMta'
import * as client from '../client'
import { type MtaMessage } from '../types'
// Mock dependencies
jest.mock('@hcengineering/mail-common', () => ({
createMessages: jest.fn(),
@ -388,4 +390,175 @@ describe('handleMtaHook', () => {
}
}
}
it('should process HTML email correctly', async () => {
// Mock request with HTML content
const htmlContent = '<html><body><h1>Hello</h1><p>This is an <b>HTML</b> test email</p></body></html>'
mockReq = {
headers: { 'x-hook-token': 'test-hook-token' },
body: createValidMtaMessage('sender@example.com', ['recipient@example.com'], {
subject: 'HTML Test Subject',
contentType: 'text/html; charset=utf-8',
content: htmlContent
})
}
await handleMtaHook(mockReq as Request, mockRes as Response, mockCtx)
// Should return 200
expect(mockStatus).toHaveBeenCalledWith(200)
expect(mockSend).toHaveBeenCalledWith({ action: 'accept' })
// Should process the message with both HTML and text content
expect(createMessages).toHaveBeenCalledWith(
client.baseConfig,
mockCtx,
mockTxOperations,
{},
{},
client.mailServiceToken,
mockLoginInfo,
expect.objectContaining({
mailId: expect.any(String),
from: { email: 'sender@example.com', firstName: 'sender', lastName: 'example.com' },
to: [{ email: 'recipient@example.com', firstName: 'recipient', lastName: 'example.com' }],
subject: 'HTML Test Subject',
content: htmlContent,
incoming: true
}),
[] // attachments
)
})
it('should process multipart email with both HTML and text correctly', async () => {
// Create a multipart email with both text and HTML
const textContent = 'This is the plain text version'
const htmlContent = '<html><body><p>This is the HTML version</p></body></html>'
// Mock message with multipart content by setting multiple headers and contents
const multipartMessage = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [
['Content-Type', 'multipart/alternative; boundary="boundary-string"'],
['Subject', 'Multipart Test Email'],
['From', 'Sender <sender@example.com>'],
['To', 'Recipient <recipient@example.com>']
],
contents: [
{
headers: [['Content-Type', 'text/plain; charset=utf-8']],
content: textContent
},
{
headers: [['Content-Type', 'text/html; charset=utf-8']],
content: htmlContent
}
]
}
}
mockReq = {
headers: { 'x-hook-token': 'test-hook-token' },
body: multipartMessage
}
await handleMtaHook(mockReq as Request, mockRes as Response, mockCtx)
// Should return 200
expect(mockStatus).toHaveBeenCalledWith(200)
expect(mockSend).toHaveBeenCalledWith({ action: 'accept' })
// Should process the message with both content types
expect(createMessages).toHaveBeenCalledWith(
client.baseConfig,
mockCtx,
mockTxOperations,
{},
{},
client.mailServiceToken,
mockLoginInfo,
expect.objectContaining({
mailId: expect.any(String),
from: { email: 'sender@example.com', firstName: 'Sender', lastName: '' },
to: [{ email: 'recipient@example.com', firstName: 'Recipient', lastName: '' }],
subject: 'Multipart Test Email',
content: 'This is the HTML version',
incoming: true
}),
[]
)
})
it('should handle HTML email with inline images correctly', async () => {
// HTML content with embedded image reference
const htmlWithImage =
'<html><body><p>Test with image:</p><img src="cid:image1@example.com" alt="Test Image"></body></html>'
// Create image attachment
const imageAttachment = {
headers: [
['Content-Type', 'image/jpeg'],
['Content-Disposition', 'inline; filename="image.jpg"'],
['Content-ID', '<image1@example.com>']
],
content: 'base64encodedcontent' // Would normally be a Base64 string
}
// Create multipart message with HTML and image
const multipartMessage = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [
['Content-Type', 'multipart/related; boundary="boundary-string"'],
['Subject', 'Email with Inline Image'],
['From', 'Sender <sender@example.com>'],
['To', 'Recipient <recipient@example.com>']
],
contents: [
{
headers: [['Content-Type', 'text/html; charset=utf-8']],
content: htmlWithImage
},
imageAttachment
]
}
}
mockReq = {
headers: { 'x-hook-token': 'test-hook-token' },
body: multipartMessage
}
await handleMtaHook(mockReq as Request, mockRes as Response, mockCtx)
// Should process message with attachments
expect(createMessages).toHaveBeenCalledWith(
client.baseConfig,
mockCtx,
mockTxOperations,
{},
{},
client.mailServiceToken,
mockLoginInfo,
expect.objectContaining({
htmlContent: htmlWithImage
// Other fields as expected
}),
expect.arrayContaining([
expect.objectContaining({
contentType: 'image/jpeg',
name: 'image.jpg',
contentId: '<image1@example.com>'
// Other attachment fields
})
])
)
})
})

View File

@ -12,14 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { createHash, randomUUID } from 'crypto'
import { readEml, ReadedEmlJson } from 'eml-parse-js'
import { createHash } from 'crypto'
import { Request, Response } from 'express'
import TurndownService from 'turndown'
import sanitizeHtml from 'sanitize-html'
import { MeasureContext } from '@hcengineering/core'
import {
type Attachment,
type EmailContact,
type EmailMessage,
createMessages,
@ -30,26 +26,8 @@ import { createRestTxOperations } from '@hcengineering/api-client'
import { mailServiceToken, baseConfig, kvsClient } from './client'
import config from './config'
export interface MtaMessage {
envelope: {
from: {
address: string
}
to: {
address: string
}[]
}
message: {
headers: string[][]
contents: string
}
}
function getHeader (mta: MtaMessage, header: string): string | undefined {
const h = header.toLowerCase()
return mta.message.headers.find((header) => header[0].toLowerCase() === h)?.[1]?.trim()
}
import { MtaMessage } from './types'
import { getHeader, parseContent } from './utils'
export async function handleMtaHook (req: Request, res: Response, ctx: MeasureContext): Promise<void> {
try {
@ -142,75 +120,6 @@ export async function handleMtaHook (req: Request, res: Response, ctx: MeasureCo
}
}
async function parseContent (
ctx: MeasureContext,
mta: MtaMessage
): Promise<{ content: string, attachments: Attachment[] }> {
const contentType = getHeader(mta, 'Content-Type')
if (contentType === undefined) {
throw new Error('Content-Type header not found')
}
if (contentType.toLowerCase().startsWith('text/plain')) {
return { content: mta.message.contents, attachments: [] }
}
const contents = `Content-Type: ${contentType}\r\n${mta.message.contents}`
const email = await new Promise<ReadedEmlJson>((resolve, reject) => {
readEml(contents, (err, json) => {
if (err !== undefined && err !== null) {
reject(err)
} else if (json === undefined) {
reject(new Error('Failed to parse email'))
} else {
resolve(json)
}
})
})
let content = email.text ?? ''
let isMarkdown = false
if (email.html !== undefined) {
try {
const html = sanitizeHtml(email.html)
const tds = new TurndownService()
content = tds.turndown(html)
isMarkdown = true
} catch (error) {
ctx.warn('Failed to parse html content', { error })
}
}
const attachments: Attachment[] = []
if (config.storageConfig !== undefined) {
for (const a of email.attachments ?? []) {
if (a.name === undefined || a.name.length === 0) {
// EML parser returns attachments with empty name for parts of content
// that do not have "Content-Disposition: attachment" e.g. for part
// Content-Type: text/calendar; charset="UTF-8"; method=REQUEST
continue
}
const attachment: Attachment = {
id: randomUUID(),
name: a.name,
data: Buffer.from(a.data64, 'base64'),
contentType: a.contentType.split(';')[0].trim()
}
attachments.push(attachment)
// For inline images, replace the CID references with the blob id
if (isMarkdown && a.inline && a.id !== undefined) {
const cid = a.id.replace(/[<>]/g, '')
content = content.replaceAll(
new RegExp(`!\\[.*?\\]\\(cid:${cid}\\)`, 'g'),
`![${a.name}](cid:${attachment.id})`
)
}
}
}
return { content, attachments }
}
function getEmailContact (email: string): EmailContact {
const parts = stripTags(email).split('@')
return {

View File

@ -0,0 +1,29 @@
//
// Copyright © 2025 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
export interface MtaMessage {
envelope: {
from: {
address: string
}
to: {
address: string
}[]
}
message: {
headers: string[][]
contents: string
}
}

View File

@ -0,0 +1,135 @@
//
// Copyright © 2025 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { randomUUID } from 'crypto'
import { readEml, ReadedEmlJson } from 'eml-parse-js'
import TurndownService from 'turndown'
import sanitizeHtml from 'sanitize-html'
import { MeasureContext } from '@hcengineering/core'
import {
type Attachment
} from '@hcengineering/mail-common'
import { MtaMessage } from './types'
import config from './config'
export async function parseContent (
ctx: MeasureContext,
mta: MtaMessage
): Promise<{ content: string, attachments: Attachment[] }> {
const contentType = getHeader(mta, 'Content-Type')
if (contentType === undefined) {
throw new Error('Content-Type header not found')
}
if (contentType.toLowerCase().startsWith('text/plain')) {
return { content: mta.message.contents, attachments: [] }
}
const email = await getEmailContent(mta.message.contents)
let content = email.text ?? ''
console.log('Content:', content)
let isMarkdown = false
if (email.html !== undefined) {
try {
const html = sanitizeHtml(email.html)
const tds = new TurndownService()
content = tds.turndown(html)
console.log('HTML Content:', content)
isMarkdown = true
} catch (error) {
ctx.warn('Failed to parse html content', { error })
}
}
const attachments: Attachment[] = []
if (config.storageConfig !== undefined) {
for (const a of email.attachments ?? []) {
if (a.name === undefined || a.name.length === 0) {
// EML parser returns attachments with empty name for parts of content
// that do not have "Content-Disposition: attachment" e.g. for part
// Content-Type: text/calendar; charset="UTF-8"; method=REQUEST
continue
}
const attachment: Attachment = {
id: randomUUID(),
name: a.name,
data: Buffer.from(a.data64, 'base64'),
contentType: a.contentType.split(';')[0].trim()
}
attachments.push(attachment)
// For inline images, replace the CID references with the blob id
if (isMarkdown && a.inline && a.id !== undefined) {
const cid = a.id.replace(/[<>]/g, '')
content = content.replaceAll(
new RegExp(`!\\[.*?\\]\\(cid:${cid}\\)`, 'g'),
`![${a.name}](cid:${attachment.id})`
)
}
}
}
return { content, attachments }
}
export function getHeader (mta: MtaMessage, header: string): string | undefined {
const h = header.toLowerCase()
return mta.message.headers.find((header) => header[0].toLowerCase() === h)?.[1]?.trim()
}
async function getEmailContent (mtaContent: string): Promise<ReadedEmlJson> {
if (mtaContent == null) {
return {
text: '',
html: '',
attachments: []
} as any
}
const contentRegex = /Content-Type/i
const content = contentRegex.test(mtaContent)
? mtaContent
: `Content-Type: ${guessContentType(mtaContent)}\r\n${mtaContent}`
const email = await new Promise<ReadedEmlJson>((resolve, reject) => {
readEml(content, (err, json) => {
if (err !== undefined && err !== null) {
reject(new Error(`Email parsing error: ${err.message}`))
} else if (json === undefined) {
reject(new Error('Email parser returned undefined result'))
} else {
resolve(json)
}
})
})
if (isEmptyString(email.text) && isEmptyString(email.html)) {
return {
...email,
text: mtaContent
}
}
return email
}
function guessContentType (content: string): string {
// Simple heuristic - if it contains HTML tags, it's likely HTML
if (/<[a-z][\s\S]*>/i.test(content)) {
return 'text/html; charset="UTF-8"'
}
return 'text/plain; charset="UTF-8"'
}
function isEmptyString (str: string | undefined): boolean {
return str == null || str.trim() === ''
}