UBERF-11004: Fix mta-hook email content parsing (#9066)

Signed-off-by: Artem Savchenko <armisav@gmail.com>
This commit is contained in:
Artyom Savchenko 2025-05-23 14:40:21 +07:00 committed by GitHub
parent 02f584811f
commit 8ae0b47364
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 572 additions and 102 deletions

View File

@ -16,10 +16,12 @@
import { Request, Response } from 'express'
import { MeasureContext } from '@hcengineering/core'
import { createMessages } from '@hcengineering/mail-common'
import { type MtaMessage, handleMtaHook } from '../handlerMta'
import * as client from '../client'
import { createRestTxOperations } from '@hcengineering/api-client'
import { handleMtaHook } from '../handlerMta'
import * as client from '../client'
import { type MtaMessage } from '../types'
// Mock dependencies
jest.mock('@hcengineering/mail-common', () => ({
createMessages: jest.fn(),
@ -388,4 +390,96 @@ describe('handleMtaHook', () => {
}
}
}
it('should process HTML email correctly', async () => {
// Mock request with HTML content
const htmlContent = '<html><body><h1>Hello</h1><p>This is an <b>HTML</b> test email</p></body></html>'
mockReq = {
headers: { 'x-hook-token': 'test-hook-token' },
body: createValidMtaMessage('sender@example.com', ['recipient@example.com'], {
subject: 'HTML Test Subject',
contentType: 'text/html; charset=utf-8',
content: htmlContent
})
}
await handleMtaHook(mockReq as Request, mockRes as Response, mockCtx)
// Should return 200
expect(mockStatus).toHaveBeenCalledWith(200)
expect(mockSend).toHaveBeenCalledWith({ action: 'accept' })
// Should process the message with both HTML and text content
expect(createMessages).toHaveBeenCalledWith(
client.baseConfig,
mockCtx,
mockTxOperations,
{},
{},
client.mailServiceToken,
mockLoginInfo,
expect.objectContaining({
mailId: expect.any(String),
from: { email: 'sender@example.com', firstName: 'sender', lastName: 'example.com' },
to: [{ email: 'recipient@example.com', firstName: 'recipient', lastName: 'example.com' }],
subject: 'HTML Test Subject',
content: htmlContent,
incoming: true
}),
[] // attachments
)
})
it('should process email plain/text content header', async () => {
// Create a multipart email with both text and HTML
const textContent = 'This is the plain text version'
// Mock message with multipart content by setting multiple headers and contents
const message = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [
['Content-Type', 'multipart/alternative; boundary="boundary-string"'],
['Subject', 'Test Email'],
['From', 'Sender <sender@example.com>'],
['To', 'Recipient <recipient@example.com>']
],
contents: `Content-Type: text/plain; charset=utf-8 \r\n${textContent}`
}
}
mockReq = {
headers: { 'x-hook-token': 'test-hook-token' },
body: message
}
await handleMtaHook(mockReq as Request, mockRes as Response, mockCtx)
// Should return 200
expect(mockStatus).toHaveBeenCalledWith(200)
expect(mockSend).toHaveBeenCalledWith({ action: 'accept' })
// Should process the message with both content types
expect(createMessages).toHaveBeenCalledWith(
client.baseConfig,
mockCtx,
mockTxOperations,
{},
{},
client.mailServiceToken,
mockLoginInfo,
expect.objectContaining({
mailId: expect.any(String),
from: { email: 'sender@example.com', firstName: 'Sender', lastName: 'example.com' },
to: [{ email: 'recipient@example.com', firstName: 'Recipient', lastName: 'example.com' }],
subject: 'Test Email',
content: textContent,
incoming: true
}),
[]
)
})
})

View File

@ -0,0 +1,311 @@
//
// Copyright © 2025 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { parseContent, getHeader, removeContentTypeHeader } from '../utils'
import { MeasureContext } from '@hcengineering/core'
import { MtaMessage } from '../types'
import { readEml } from 'eml-parse-js'
// Mock dependencies
jest.mock('eml-parse-js')
jest.mock('sanitize-html', () => (html: string) => html)
jest.mock('turndown', () => {
return class TurndownService {
turndown (html: string): string {
return html.replace(/<\/?[^>]+(>|$)/g, '') // Simple HTML tag removal
}
}
})
jest.mock('../config', () => ({
__esModule: true,
default: {
storageConfig: {}
}
}))
describe('utils.ts', () => {
let mockCtx: MeasureContext
beforeEach(() => {
jest.clearAllMocks()
mockCtx = {
warn: jest.fn(),
error: jest.fn(),
info: jest.fn()
} as unknown as MeasureContext
// Default mock implementation for readEml
;(readEml as jest.Mock).mockImplementation((content, callback) => {
callback(null, {
text: 'Plain text content',
html: '<p>HTML content</p>',
attachments: []
})
})
})
describe('parseContent', () => {
it('should handle plain text emails', async () => {
// Arrange
const plainTextMessage: MtaMessage = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [['Content-Type', 'text/plain; charset=utf-8']],
contents: 'This is a plain text email'
}
}
// Act
const result = await parseContent(mockCtx, plainTextMessage)
// Assert
expect(result).toEqual({
content: 'This is a plain text email',
attachments: []
})
expect(readEml).not.toHaveBeenCalled()
})
it('should handle HTML emails', async () => {
// Arrange
const htmlMessage: MtaMessage = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [['Content-Type', 'text/html; charset=utf-8']],
contents: '<html><body><p>This is an HTML email</p></body></html>'
}
} as any
// Mock readEml to return HTML content
;(readEml as jest.Mock).mockImplementation((content, callback) => {
callback(null, {
text: '',
html: '<p>This is an HTML email</p>',
attachments: []
})
})
// Act
const result = await parseContent(mockCtx, htmlMessage)
// Assert
expect(result).toEqual({
content: 'This is an HTML email',
attachments: []
})
})
it('should handle multipart emails with text and HTML parts', async () => {
// Arrange
const multipartMessage: MtaMessage = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [['Content-Type', 'multipart/alternative; boundary="boundary"']],
contents:
'--boundary\r\nContent-Type: text/plain\r\n\r\nText part\r\n--boundary\r\nContent-Type: text/html\r\n\r\n<p>HTML part</p>\r\n--boundary--'
}
} as any
// Mock readEml to return both text and HTML content
;(readEml as jest.Mock).mockImplementation((content, callback) => {
callback(null, {
text: 'Text part',
html: '<p>HTML part</p>',
attachments: []
})
})
// Act
const result = await parseContent(mockCtx, multipartMessage)
// Assert
expect(result).toEqual({
content: 'HTML part',
attachments: []
})
})
it('should throw error when Content-Type header is not found', async () => {
// Arrange
const messageWithNoContentType: MtaMessage = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [['Subject', 'Test Email']],
contents: 'Email content'
}
}
// Act & Assert
await expect(parseContent(mockCtx, messageWithNoContentType)).rejects.toThrow('Content-Type header not found')
})
})
describe('getHeader', () => {
it('should return the value of the specified header', () => {
// Arrange
const message: MtaMessage = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [
['Subject', 'Test Email'],
['Content-Type', 'text/plain'],
['X-Custom-Header', 'Custom Value']
],
contents: 'Email content'
}
}
// Act & Assert
expect(getHeader(message, 'Subject')).toBe('Test Email')
expect(getHeader(message, 'Content-Type')).toBe('text/plain')
expect(getHeader(message, 'X-Custom-Header')).toBe('Custom Value')
})
it('should be case-insensitive when looking for headers', () => {
// Arrange
const message: MtaMessage = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [
['Subject', 'Test Email'],
['Content-Type', 'text/plain']
],
contents: 'Email content'
}
}
// Act & Assert
expect(getHeader(message, 'subject')).toBe('Test Email')
expect(getHeader(message, 'CONTENT-TYPE')).toBe('text/plain')
})
it('should return undefined for non-existent headers', () => {
// Arrange
const message: MtaMessage = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [['Subject', 'Test Email']],
contents: 'Email content'
}
}
// Act & Assert
expect(getHeader(message, 'X-Not-Exists')).toBeUndefined()
})
it('should trim the header value', () => {
// Arrange
const message: MtaMessage = {
envelope: {
from: { address: 'sender@example.com' },
to: [{ address: 'recipient@example.com' }]
},
message: {
headers: [['Subject', ' Test Email ']],
contents: 'Email content'
}
}
// Act & Assert
expect(getHeader(message, 'Subject')).toBe('Test Email')
})
})
describe('removeContentTypeHeader', () => {
it('should remove Content-Type header from content', () => {
// Arrange
const content = 'Content-Type: text/plain; charset=utf-8\r\nHello world'
// Act
const result = removeContentTypeHeader(content)
// Assert
expect(result).toBe('Hello world')
})
it('should handle content with no Content-Type header', () => {
// Arrange
const content = 'Hello world'
// Act
const result = removeContentTypeHeader(content)
// Assert
expect(result).toBe('Hello world')
})
it('should handle content with Content-Type header in different case', () => {
// Arrange
const content = 'content-type: text/plain; charset=utf-8\r\nHello world'
// Act
const result = removeContentTypeHeader(content)
// Assert
expect(result).toBe('Hello world')
})
it('should handle content with multiple headers', () => {
// Arrange
const content =
'Subject: Test Email\r\nContent-Type: text/plain; charset=utf-8\r\nFrom: test@example.com\r\n\r\nHello world'
// Act
const result = removeContentTypeHeader(content)
// Assert
expect(result).toBe('Subject: Test Email\r\nFrom: test@example.com\r\n\r\nHello world')
})
it('should handle null or undefined content', () => {
// Act & Assert
expect(removeContentTypeHeader(null as any)).toBeNull()
expect(removeContentTypeHeader(undefined as any)).toBeUndefined()
})
it('should handle different line endings', () => {
// Arrange
const crlfContent = 'Content-Type: text/plain\r\nHello world'
const lfContent = 'Content-Type: text/plain\nHello world'
const crContent = 'Content-Type: text/plain\rHello world'
// Act & Assert
expect(removeContentTypeHeader(crlfContent)).toBe('Hello world')
expect(removeContentTypeHeader(lfContent)).toBe('Hello world')
expect(removeContentTypeHeader(crContent)).toBe('Hello world')
})
})
})

View File

@ -12,44 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { createHash, randomUUID } from 'crypto'
import { readEml, ReadedEmlJson } from 'eml-parse-js'
import { createHash } from 'crypto'
import { Request, Response } from 'express'
import TurndownService from 'turndown'
import sanitizeHtml from 'sanitize-html'
import { MeasureContext } from '@hcengineering/core'
import {
type Attachment,
type EmailContact,
type EmailMessage,
createMessages,
getProducer
} from '@hcengineering/mail-common'
import { type EmailContact, type EmailMessage, createMessages, getProducer } from '@hcengineering/mail-common'
import { getClient as getAccountClient } from '@hcengineering/account-client'
import { createRestTxOperations } from '@hcengineering/api-client'
import { mailServiceToken, baseConfig, kvsClient } from './client'
import config from './config'
export interface MtaMessage {
envelope: {
from: {
address: string
}
to: {
address: string
}[]
}
message: {
headers: string[][]
contents: string
}
}
function getHeader (mta: MtaMessage, header: string): string | undefined {
const h = header.toLowerCase()
return mta.message.headers.find((header) => header[0].toLowerCase() === h)?.[1]?.trim()
}
import { MtaMessage } from './types'
import { getHeader, parseContent } from './utils'
export async function handleMtaHook (req: Request, res: Response, ctx: MeasureContext): Promise<void> {
try {
@ -142,75 +115,6 @@ export async function handleMtaHook (req: Request, res: Response, ctx: MeasureCo
}
}
async function parseContent (
ctx: MeasureContext,
mta: MtaMessage
): Promise<{ content: string, attachments: Attachment[] }> {
const contentType = getHeader(mta, 'Content-Type')
if (contentType === undefined) {
throw new Error('Content-Type header not found')
}
if (contentType.toLowerCase().startsWith('text/plain')) {
return { content: mta.message.contents, attachments: [] }
}
const contents = `Content-Type: ${contentType}\r\n${mta.message.contents}`
const email = await new Promise<ReadedEmlJson>((resolve, reject) => {
readEml(contents, (err, json) => {
if (err !== undefined && err !== null) {
reject(err)
} else if (json === undefined) {
reject(new Error('Failed to parse email'))
} else {
resolve(json)
}
})
})
let content = email.text ?? ''
let isMarkdown = false
if (email.html !== undefined) {
try {
const html = sanitizeHtml(email.html)
const tds = new TurndownService()
content = tds.turndown(html)
isMarkdown = true
} catch (error) {
ctx.warn('Failed to parse html content', { error })
}
}
const attachments: Attachment[] = []
if (config.storageConfig !== undefined) {
for (const a of email.attachments ?? []) {
if (a.name === undefined || a.name.length === 0) {
// EML parser returns attachments with empty name for parts of content
// that do not have "Content-Disposition: attachment" e.g. for part
// Content-Type: text/calendar; charset="UTF-8"; method=REQUEST
continue
}
const attachment: Attachment = {
id: randomUUID(),
name: a.name,
data: Buffer.from(a.data64, 'base64'),
contentType: a.contentType.split(';')[0].trim()
}
attachments.push(attachment)
// For inline images, replace the CID references with the blob id
if (isMarkdown && a.inline && a.id !== undefined) {
const cid = a.id.replace(/[<>]/g, '')
content = content.replaceAll(
new RegExp(`!\\[.*?\\]\\(cid:${cid}\\)`, 'g'),
`![${a.name}](cid:${attachment.id})`
)
}
}
}
return { content, attachments }
}
function getEmailContact (email: string): EmailContact {
const parts = stripTags(email).split('@')
return {

View File

@ -0,0 +1,29 @@
//
// Copyright © 2025 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
export interface MtaMessage {
envelope: {
from: {
address: string
}
to: {
address: string
}[]
}
message: {
headers: string[][]
contents: string
}
}

View File

@ -0,0 +1,132 @@
//
// Copyright © 2025 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { randomUUID } from 'crypto'
import { readEml, ReadedEmlJson } from 'eml-parse-js'
import TurndownService from 'turndown'
import sanitizeHtml from 'sanitize-html'
import { MeasureContext } from '@hcengineering/core'
import { type Attachment } from '@hcengineering/mail-common'
import { MtaMessage } from './types'
import config from './config'
export async function parseContent (
ctx: MeasureContext,
mta: MtaMessage
): Promise<{ content: string, attachments: Attachment[] }> {
// TODO: UBERF-11029 - remove this logging after testing
ctx.info('Parsing email content', { content: mta.message.contents })
const contentType = getHeader(mta, 'Content-Type')
if (contentType === undefined) {
throw new Error('Content-Type header not found')
}
if (contentType.toLowerCase().startsWith('text/plain')) {
return { content: mta.message.contents, attachments: [] }
}
const email = await getEmailContent(mta.message.contents, contentType)
let content = email.text ?? ''
let isMarkdown = false
if (email.html !== undefined) {
try {
const html = sanitizeHtml(email.html)
const tds = new TurndownService()
content = tds.turndown(html)
isMarkdown = true
} catch (error) {
ctx.warn('Failed to parse html content', { error })
}
}
const attachments: Attachment[] = []
if (config.storageConfig !== undefined) {
for (const a of email.attachments ?? []) {
if (a.name === undefined || a.name.length === 0) {
// EML parser returns attachments with empty name for parts of content
// that do not have "Content-Disposition: attachment" e.g. for part
// Content-Type: text/calendar; charset="UTF-8"; method=REQUEST
continue
}
const attachment: Attachment = {
id: randomUUID(),
name: a.name,
data: Buffer.from(a.data64, 'base64'),
contentType: a.contentType.split(';')[0].trim()
}
attachments.push(attachment)
// For inline images, replace the CID references with the blob id
if (isMarkdown && a.inline && a.id !== undefined) {
const cid = a.id.replace(/[<>]/g, '')
content = content.replaceAll(
new RegExp(`!\\[.*?\\]\\(cid:${cid}\\)`, 'g'),
`![${a.name}](cid:${attachment.id})`
)
}
}
}
return { content, attachments }
}
export function getHeader (mta: MtaMessage, header: string): string | undefined {
const h = header.toLowerCase()
return mta.message.headers.find((header) => header[0].toLowerCase() === h)?.[1]?.trim()
}
async function getEmailContent (mtaContent: string, contentType: string): Promise<ReadedEmlJson> {
if (mtaContent == null) {
return {
text: '',
html: '',
attachments: []
} as any
}
const contentRegex = /Content-Type/i
const content = contentRegex.test(mtaContent) ? mtaContent : `Content-Type: ${contentType}\r\n${mtaContent}`
const email = await new Promise<ReadedEmlJson>((resolve, reject) => {
readEml(content, (err, json) => {
if (err !== undefined && err !== null) {
reject(new Error(`Email parsing error: ${err.message}`))
} else if (json === undefined) {
reject(new Error('Email parser returned undefined result'))
} else {
resolve(json)
}
})
})
if (isEmptyString(email.text) && isEmptyString(email.html)) {
return {
...email,
text: removeContentTypeHeader(mtaContent)
}
}
return email
}
export function removeContentTypeHeader (content: string): string {
if (content == null) {
return content
}
const contentTypeRegex = /^Content-Type:.*?(?:\r\n|\n|\r)/im
return content.replace(contentTypeRegex, '')
}
function isEmptyString (str: string | undefined): boolean {
return str == null || str.trim() === ''
}