mirror of
https://github.com/hcengineering/platform.git
synced 2025-06-09 09:20:54 +00:00
UBERF-11067: Fix html to md conversion for complex links
Signed-off-by: Artem Savchenko <armisav@gmail.com>
This commit is contained in:
parent
02f584811f
commit
04e9e1cebc
File diff suppressed because one or more lines are too long
120
services/mail/mail-common/src/__tests__/md.test.ts
Normal file
120
services/mail/mail-common/src/__tests__/md.test.ts
Normal file
@ -0,0 +1,120 @@
|
||||
//
|
||||
// Copyright © 2025 Hardcore Engineering Inc.
|
||||
//
|
||||
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License. You may
|
||||
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
//
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
import { MeasureContext } from '@hcengineering/core'
|
||||
import fs from 'fs'
|
||||
import path from 'path'
|
||||
|
||||
import { getMdContent } from '../utils'
|
||||
import { EmailMessage } from '../types'
|
||||
|
||||
describe('getMdContent', () => {
|
||||
let mockCtx: MeasureContext
|
||||
let meetingHtml: string
|
||||
|
||||
beforeEach(() => {
|
||||
// Setup mock context
|
||||
mockCtx = {
|
||||
info: jest.fn(),
|
||||
error: jest.fn(),
|
||||
warn: jest.fn()
|
||||
} as unknown as MeasureContext
|
||||
|
||||
// Load the test HTML fixture
|
||||
meetingHtml = fs.readFileSync(path.join(__dirname, '__mocks__', 'meetingMail.html'), 'utf8')
|
||||
})
|
||||
|
||||
it('should handle complex HTML emails with large guest lists', () => {
|
||||
// Create an email message with the meeting HTML
|
||||
const email: EmailMessage = {
|
||||
mailId: 'test-mail-id',
|
||||
from: { email: 'jane.smith@example.com', firstName: 'Jane', lastName: 'Smith' },
|
||||
to: [{ email: 'recipient@example.com', firstName: 'Recipient', lastName: '' }],
|
||||
copy: [],
|
||||
subject: 'Year End Meeting 2023',
|
||||
content: meetingHtml, // Using the sanitized meeting HTML
|
||||
textContent: 'Simple text content',
|
||||
sendOn: Date.now()
|
||||
} as any
|
||||
|
||||
// Convert the HTML to Markdown
|
||||
const result = getMdContent(mockCtx, email)
|
||||
|
||||
// Update verification to match the sanitized HTML content
|
||||
expect(result).toContain('Join Google Meet')
|
||||
expect(result).toContain('When')
|
||||
expect(result).toContain('Thursday, Dec 28, 2023')
|
||||
expect(result).toContain('Guests')
|
||||
expect(result).toContain('Jane Smith')
|
||||
expect(result).toContain('organizer')
|
||||
|
||||
// Verify no warnings were logged
|
||||
expect(mockCtx.warn).not.toHaveBeenCalled()
|
||||
|
||||
// Verify meeting link is preserved (updated to match sanitized link)
|
||||
expect(result).toContain('meet.google.com/abc-defg-hij')
|
||||
|
||||
// Verify phone numbers are present
|
||||
expect(result).toContain('+1 234 567-8910')
|
||||
expect(result).toContain('PIN: 1234567891011')
|
||||
|
||||
// Verify it's readable markdown, not HTML
|
||||
expect(result).not.toContain('<html>')
|
||||
expect(result).not.toContain('<body>')
|
||||
expect(result).not.toContain('<table')
|
||||
|
||||
// Verify we have the expected guest entries
|
||||
expect(result).toContain('John Doe')
|
||||
expect(result).toContain('Sarah Jones')
|
||||
})
|
||||
|
||||
it('should handle undefined content gracefully', () => {
|
||||
// Create an email message with undefined content
|
||||
const email: EmailMessage = {
|
||||
mailId: 'test-mail-id',
|
||||
from: { email: 'sender@example.com', firstName: 'Sender', lastName: '' },
|
||||
to: [{ email: 'recipient@example.com', firstName: 'Recipient', lastName: '' }],
|
||||
copy: [],
|
||||
subject: 'Test subject',
|
||||
content: undefined,
|
||||
textContent: 'Text only content',
|
||||
sendOn: Date.now()
|
||||
} as any
|
||||
|
||||
// Should return text content when html content is undefined
|
||||
const result = getMdContent(mockCtx, email)
|
||||
expect(result).toBe('Text only content')
|
||||
expect(mockCtx.warn).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('should handle empty content properly', () => {
|
||||
// Create an email message with empty content
|
||||
const email: EmailMessage = {
|
||||
mailId: 'test-mail-id',
|
||||
from: { email: 'sender@example.com', firstName: 'Sender', lastName: '' },
|
||||
to: [{ email: 'recipient@example.com', firstName: 'Recipient', lastName: '' }],
|
||||
copy: [],
|
||||
subject: 'Test subject',
|
||||
content: '',
|
||||
textContent: '',
|
||||
sendOn: Date.now()
|
||||
} as any
|
||||
|
||||
// Should handle empty content gracefully
|
||||
const result = getMdContent(mockCtx, email)
|
||||
expect(result).toBe('')
|
||||
expect(mockCtx.warn).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
@ -23,6 +23,24 @@ export function getMdContent (ctx: MeasureContext, email: EmailMessage): string
|
||||
try {
|
||||
const html = sanitizeHtml(email.content)
|
||||
const tds = new TurndownService()
|
||||
|
||||
tds.addRule('links', {
|
||||
filter: 'a',
|
||||
replacement: function (content, node: Node) {
|
||||
try {
|
||||
const element = node as HTMLElement
|
||||
const href = element.getAttribute('href')
|
||||
const title = element.title ?? ''
|
||||
// Trim content to prevent empty lines inside links
|
||||
const trimmedContent = content.trim().replace(/\n\s*\n/g, ' ')
|
||||
return `[${trimmedContent}](${href}${title})`
|
||||
} catch (error: any) {
|
||||
ctx.warn('Failed to parse link', { error: error.message })
|
||||
return content
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return tds.turndown(html)
|
||||
} catch (error) {
|
||||
ctx.warn('Failed to parse html content', { error })
|
||||
|
Loading…
Reference in New Issue
Block a user