import { convertToHtml, images } from 'mammoth'
import { contentType } from 'mime-types'
import { DocumentExtractor } from './types'
import { convertString } from './html'
export const docxExtractor: DocumentExtractor = {
async isMatch (fileName: string, type: string | false, data): Promise {
if (type === false) return false
if (isType(type)) {
return true
}
// Try detect by fileName
type = contentType(fileName)
return type === false ? false : isType(type)
},
async extract (fileName: string, type: string, data): Promise {
const htmlData = await convertToHtml(
{ buffer: data },
{
convertImage: images.imgElement((image) => {
return image.read('base64').then(function (imageBuffer) {
return {
src: ''
}
})
})
}
)
const text = convertString(htmlData.value)
return text
}
}
function isType (type: string): boolean {
return type === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
}