From 91849e55136c5d55a5a722faf3db56955fdb5921 Mon Sep 17 00:00:00 2001 From: eikek <eike.kettner@posteo.de> Date: Mon, 7 Nov 2022 09:37:16 +0100 Subject: [PATCH] Allow class attributes in sanitized html for emails --- .../joex/extract/JsoupSanitizer.scala | 4 +- .../joex/extract/JsoupSanitizerTest.scala | 133 ++++++++++++++++++ 2 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 modules/joex/src/test/scala/docspell/joex/extract/JsoupSanitizerTest.scala diff --git a/modules/joex/src/main/scala/docspell/joex/extract/JsoupSanitizer.scala b/modules/joex/src/main/scala/docspell/joex/extract/JsoupSanitizer.scala index dc8817ed..ca675f02 100644 --- a/modules/joex/src/main/scala/docspell/joex/extract/JsoupSanitizer.scala +++ b/modules/joex/src/main/scala/docspell/joex/extract/JsoupSanitizer.scala @@ -13,9 +13,11 @@ import emil.jsoup._ import scodec.bits.ByteVector object JsoupSanitizer { + private val whitelist = + EmailWhitelist.default.addAttributes(":all", "class") private val change = - BodyClean.whitelistClean(EmailWhitelist.default) + BodyClean.whitelistClean(whitelist) def clean(html: String): String = BodyClean.modifyContent(change)(BodyContent(html)).asString diff --git a/modules/joex/src/test/scala/docspell/joex/extract/JsoupSanitizerTest.scala b/modules/joex/src/test/scala/docspell/joex/extract/JsoupSanitizerTest.scala new file mode 100644 index 00000000..c893ef8a --- /dev/null +++ b/modules/joex/src/test/scala/docspell/joex/extract/JsoupSanitizerTest.scala @@ -0,0 +1,133 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.joex.extract + +import java.nio.charset.StandardCharsets + +import munit.FunSuite +import org.jsoup.Jsoup + +class JsoupSanitizerTest extends FunSuite { + + test("keep interesting tags and attributes") { + val cleaned = JsoupSanitizer.clean(html) + val doc = Jsoup.parse(cleaned) + + assertEquals(doc.head().getElementsByTag("link").size(), 1) + assertEquals(doc.head().getElementsByTag("style").size(), 1) + assertEquals(doc.charset(), StandardCharsets.UTF_8) + assertEquals(doc.head().select("meta[charset]").attr("charset").toUpperCase, "UTF-8") + assert(doc.select("*[class]").size() > 0) + assert(doc.select("*[style]").size() > 0) + } + + def html = + """ + |<!DOCTYPE html> + |<html lang="en"> + | <head> + | <meta charset="utf-8" /> + | <meta name="viewport" content="width=device-width, initial-scale=1" /> + | <title>A simple, clean, and responsive HTML invoice template</title> + | <link rel="icon" href="./images/favicon.png" type="image/x-icon" /> + | <style> + | body { + | font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; + | } + | body h1 { + | font-weight: 300; + | } + | body h3 { + | font-weight: 300; + | margin-top: 10px; + | color: #555; + | } + | body a { + | color: #06f; + | } + | .invoice-box { + | max-width: 800px; + | margin: auto; + | padding: 30px; + | border: 1px solid #eee; + | box-shadow: 0 0 10px rgba(0, 0, 0, 0.15); + | font-size: 16px; + | line-height: 24px; + | font-family: 'Helvetica Neue', 'Helvetica', Helvetica, Arial, sans-serif; + | color: #555; + | } + | </style> + | </head> + | <body> + | <h1>Some html template for an invoice</h1> + | <h3>It is something simple.</h3> + | <div class="invoice-box"> + | <table> + | <tr class="top"> + | <td colspan="2"> + | <table> + | <tr> + | <td class="title"> + | <img src="./images/logo.png" alt="Company logo" style="width: 100%; max-width: 300px" /> + | </td> + | <td> + | Invoice #: 123<br /> + | Created: January 1, 2015<br /> + | Due: February 1, 2015 + | </td> + | </tr> + | </table> + | </td> + | </tr> + | <tr class="information"> + | <td colspan="2"> + | <table style="color: black;"> + | <tr> + | <td> + | Company, Inc.<br /> + | 456 Rosewood Road<br /> + | Flowerville, MI 12345 + | </td> + | <td> + | Acme Corp.<br /> + | John Doe<br /> + | john@example.com + | </td> + | </tr> + | </table> + | </td> + | </tr> + | <tr class="heading"> + | <td>Payment Method</td> + | <td>Check #</td> + | </tr> + | <tr class="details"> + | <td>Check</td> + | <td>1000</td> + | </tr> + | <tr class="heading"> + | <td>Item</td> + | <td>Price</td> + | </tr> + | <tr class="item"> + | <td>Website design</td> + | <td>$300.00</td> + | </tr> + | <tr class="item last"> + | <td>Domain name (1 year)</td> + | <td>$10.00</td> + | </tr> + | <tr class="total"> + | <td></td> + | <td>Total: $385.00</td> + | </tr> + | </table> + | </div> + | </body> + |</html> + |""".stripMargin +}