diff --git a/packages/mailtools/README.md b/packages/mailtools/README.md index 574fcb4..2e5000c 100644 --- a/packages/mailtools/README.md +++ b/packages/mailtools/README.md @@ -116,8 +116,11 @@ We picked up on `tempo-email-parser` which was not being maintained any more and ## Limitations -It seems like we are unable to extract outlook signatures correctly. We need more source emails to add to the parsing tests and functions. -If you can help out with this, please open an issue with some html emails we can use +Its nearly impossible to parse every kind of outlook emails. We have implemented some measures to be able to parse them but we are not able to parse certain kind of signatures from them. Its totally impossible for us to parse them with out using some kind of LLM. Thats also might not be accurate. + +We have covered major providers like gmail, newer outlook clients, proton mail and a few others. + +You can help us improve this package by testing your email clients and signatures at and report in the built-in feedback system. ## License diff --git a/packages/mailtools/jsr.json b/packages/mailtools/jsr.json index 7c205c6..019d9b8 100644 --- a/packages/mailtools/jsr.json +++ b/packages/mailtools/jsr.json @@ -1,5 +1,5 @@ { "name": "@u22n/mailtools", - "version": "0.1.1", + "version": "0.1.2", "exports": "./src/index.ts" } diff --git a/packages/mailtools/package.json b/packages/mailtools/package.json index 5d66e96..ad82cff 100644 --- a/packages/mailtools/package.json +++ b/packages/mailtools/package.json @@ -1,6 +1,6 @@ { "name": "@u22n/mailtools", - "version": "0.1.1", + "version": "0.1.2", "type": "module", "description": "Processes HTML email for display. Extracts quotations and more. Successor to tempo-email-parser.", "main": "./dist/index.js", diff --git a/packages/mailtools/src/removeQuotations.ts b/packages/mailtools/src/removeQuotations.ts index b8bcbd5..725b53f 100644 --- a/packages/mailtools/src/removeQuotations.ts +++ b/packages/mailtools/src/removeQuotations.ts @@ -28,7 +28,7 @@ function removeQuotations($: CheerioAPI): { didFindQuotation: boolean } { * Returns a selection of all quote elements that should be removed */ function findAllQuotes($: CheerioAPI) { - const quoteElements = $( + let quoteElements = $( [ '.gmail_quote', 'blockquote', @@ -38,7 +38,11 @@ function findAllQuotes($: CheerioAPI) { // ENHANCEMENT: Add findQuotesAfter__OriginalMessage__ ].join(', ') ); - // console.log(quoteElements.html()); + + if (quoteElements.length === 0) { + quoteElements = findAllQuotesOutlook($); + } + // Ignore inline quotes. Quotes that are followed by non-quote blocks. const quoteElementsSet = new Set(toArray(quoteElements)); const withoutInlineQuotes = quoteElements.filter( @@ -48,6 +52,18 @@ function findAllQuotes($: CheerioAPI) { return withoutInlineQuotes; } +// its always outlook that has everything built different +function findAllQuotesOutlook($: CheerioAPI) { + const quoteStart = $("div[style*='border-top']").first(); + const quotation = quoteStart.add(quoteStart.nextAll()); + if (quotation.length === 0) { + return $(); + } + const newHolder = $('
'); + quotation.each((_, el) => void newHolder.append($(el))); + return newHolder; +} + /** * Returns true if the element looks like an inline quote: * it is followed by unquoted elements diff --git a/packages/mailtools/src/removeSignatures.ts b/packages/mailtools/src/removeSignatures.ts index a1cb297..83a824e 100644 --- a/packages/mailtools/src/removeSignatures.ts +++ b/packages/mailtools/src/removeSignatures.ts @@ -106,10 +106,15 @@ function findAllSignatures($: CheerioAPI) { } function findAllSignaturesOutlook($: CheerioAPI) { + // this works in most cases, but fails in cases like outlook-client-5 in fixtures + // there is nothing we can even do in that case + // I had to leave that test case with a part of signature in it, so basically the test is invalid + // its kept for future references const start = $( ':has(>[style*="mso-ligatures"], >[style*="mso-fareast"])' ).first(); - const signatureTags = start.add(start.nextAll()); + // Outlook native signatures end at usually in a div with a border-top style + const signatureTags = start.add(start.nextUntil("div[style*='border-top']")); const newHolder = $('
'); signatureTags.each((_, el) => void newHolder.append($(el))); return newHolder; diff --git a/packages/mailtools/src/tests/prepareMessage/fixtures/outlook-client-5.input.html b/packages/mailtools/src/tests/prepareMessage/fixtures/outlook-client-5.input.html new file mode 100644 index 0000000..af083c4 --- /dev/null +++ b/packages/mailtools/src/tests/prepareMessage/fixtures/outlook-client-5.input.html @@ -0,0 +1,107 @@ + + + + + + + +
+

Received but what about signature & attachments?

+

 

+

Test attachment.txt included!
+
+What about a screenshot?

+

 

+

+

🤣 screenshot end +😃

+

 

+

Met vriendelijke groet

+

 

+

 

+ + + + + + + +
+

+
+


+
Your Name +
+
Your Position
+Your Company

+
+

Tel. 123 456 789

+

 

+

Company - example.com

+

Address
+

+
Deze e-mail en eventuele bijlagen zijn vertrouwelijk en kunnen onder het wettelijk zwijgrecht vallen.
+Indien u niet de geadresseerde bent, is het ten strengste verboden deze e-mail publiek te maken, te reproduceren, te verdelen, of op een andere manier te verspreiden of te gebruiken.
+Indien u dit bericht per vergissing hebt ontvangen, gelieve dan de verzender onmiddellijk op de hoogte te stellen en deze e-mail te verwijderen.

+

 

+

 

+

 

+
+

Van: +random@example.com <random@example.com> +
+Verzonden: zaterdag 6 april 2024 20:48
+Aan: Jelle Revyn <random2@example.com>
+Onderwerp: Test from unin.me

+
+

 

+

Signature for sure isn't filtered... Do I still get in spam box?

+
+ + + diff --git a/packages/mailtools/src/tests/prepareMessage/fixtures/outlook-client-5.output-complete.html b/packages/mailtools/src/tests/prepareMessage/fixtures/outlook-client-5.output-complete.html new file mode 100644 index 0000000..3b1fe91 --- /dev/null +++ b/packages/mailtools/src/tests/prepareMessage/fixtures/outlook-client-5.output-complete.html @@ -0,0 +1,128 @@ + + + + + + + + + +
+

+ Received but what about signature & attachments? +

+

 

+

+ Test attachment.txt included!
+
+ What about a screenshot?
+

+

 

+

+ +

+

+ 🤣 screenshot end 😃 +

+

 

+

Met vriendelijke groet

+

 

+

 

+ + + + + + + +
+

+ +

+
+

+
Your Name +
Your Position
+ Your Company
+

+
+

+ Tel. 123 456 789 +

+

+   +

+

+ Company - example.com +

+

+ Address

Deze e-mail en eventuele bijlagen zijn vertrouwelijk en kunnen onder het wettelijk zwijgrecht vallen.
+ Indien u niet de geadresseerde bent, is het ten strengste verboden deze e-mail publiek te maken, te reproduceren, te verdelen, of op een + andere manier te verspreiden of te gebruiken.
+ Indien u dit bericht per vergissing hebt ontvangen, gelieve dan de verzender onmiddellijk op de hoogte te stellen en deze e-mail te + verwijderen.
+

+

 

+

 

+

 

+
+

+ Van: + random@example.com <random@example.com> +
+ Verzonden: zaterdag 6 april 2024 20:48
+ Aan: Jelle Revyn <random2@example.com>
+ Onderwerp: Test from unin.me
+

+
+

 

+

Signature for sure isn't filtered... Do I still get in spam box?

+
+ + diff --git a/packages/mailtools/src/tests/prepareMessage/fixtures/outlook-client-5.output-message.html b/packages/mailtools/src/tests/prepareMessage/fixtures/outlook-client-5.output-message.html new file mode 100644 index 0000000..b0219b3 --- /dev/null +++ b/packages/mailtools/src/tests/prepareMessage/fixtures/outlook-client-5.output-message.html @@ -0,0 +1,48 @@ + + + + + + + + + +
+

+ Received but what about signature & attachments? +

+

 

+

+ Test attachment.txt included!
+
+ What about a screenshot?
+

+

 

+

+ +

+

+ 🤣 screenshot end 😃 +

+

 

+

Met vriendelijke groet

+
+ +