mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-05 22:55:58 +00:00
Simplify jsoup sanitizer to reuse from emil
This commit is contained in:
parent
dee697e466
commit
6747a86fea
@ -1,29 +1,18 @@
|
|||||||
package docspell.joex.extract
|
package docspell.joex.extract
|
||||||
|
|
||||||
import org.jsoup.Jsoup
|
import emil.BodyContent
|
||||||
import org.jsoup.nodes._
|
|
||||||
import emil.jsoup._
|
import emil.jsoup._
|
||||||
import scodec.bits.ByteVector
|
import scodec.bits.ByteVector
|
||||||
import java.io.ByteArrayInputStream
|
import java.nio.charset.Charset
|
||||||
import java.nio.charset.{Charset, StandardCharsets}
|
|
||||||
|
|
||||||
object JsoupSanitizer {
|
object JsoupSanitizer {
|
||||||
|
|
||||||
//BIG NOTE: this changes the input document
|
val change =
|
||||||
def apply(doc: Document): Document =
|
BodyClean.whitelistClean(EmailWhitelist.default)
|
||||||
BodyClean.whitelistClean(EmailWhitelist.default)(doc)
|
|
||||||
|
|
||||||
def clean(html: String): String = {
|
def clean(html: String): String =
|
||||||
//note: Jsoup.clean throws away the html head, which removes the
|
BodyClean.modifyContent(change)(BodyContent(html)).asString
|
||||||
//charset if present
|
|
||||||
val doc = Jsoup.parse(html)
|
|
||||||
apply(doc).outerHtml
|
|
||||||
}
|
|
||||||
|
|
||||||
def clean(html: ByteVector, cs: Option[Charset]): ByteVector = {
|
|
||||||
val in = new ByteArrayInputStream(html.toArray)
|
|
||||||
val doc = Jsoup.parse(in, cs.map(_.name).orNull, "")
|
|
||||||
ByteVector.view(apply(doc).outerHtml.getBytes(cs.getOrElse(StandardCharsets.UTF_8)))
|
|
||||||
}
|
|
||||||
|
|
||||||
|
def clean(html: ByteVector, cs: Option[Charset]): ByteVector =
|
||||||
|
BodyClean.modifyContent(change)(BodyContent(html, cs)).bytes
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,7 @@ object ReadMail {
|
|||||||
mail.body,
|
mail.body,
|
||||||
Some(mail.header),
|
Some(mail.header),
|
||||||
Some(MarkdownBody.makeHtml(markdownCfg)),
|
Some(MarkdownBody.makeHtml(markdownCfg)),
|
||||||
Some(JsoupSanitizer.apply)
|
Some(JsoupSanitizer.change)
|
||||||
).map(makeHtmlBinary[F] _).map(b => Some(b))
|
).map(makeHtmlBinary[F] _).map(b => Some(b))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user