diff --git a/modules/extract/src/main/scala/docspell/extract/Extraction.scala b/modules/extract/src/main/scala/docspell/extract/Extraction.scala
index d5604499..54d41a09 100644
--- a/modules/extract/src/main/scala/docspell/extract/Extraction.scala
+++ b/modules/extract/src/main/scala/docspell/extract/Extraction.scala
@@ -7,6 +7,7 @@ import docspell.extract.ocr.{OcrType, TextExtract}
 import docspell.extract.odf.{OdfExtract, OdfType}
 import docspell.extract.poi.{PoiExtract, PoiType}
 import docspell.extract.rtf.RtfExtract
+import docspell.extract.internal.Text
 import fs2.Stream
 import docspell.files.TikaMimetype
 import docspell.files.ImageSize
@@ -38,23 +39,30 @@ object Extraction {
           case MimeType.PdfMatch(_) =>
             PdfExtract
               .get(data, blocker, lang, cfg.pdf.minTextLen, cfg.ocr, logger)
+              .map(_.map(_.value))
               .map(ExtractResult.fromEither)
 
           case PoiType(mt) =>
-            PoiExtract.get(data, mt).map(ExtractResult.fromEither)
+            PoiExtract
+              .get(data, mt)
+              .map(_.map(_.value))
+              .map(ExtractResult.fromEither)
 
           case RtfExtract.rtfType =>
-            RtfExtract.get(data).map(ExtractResult.fromEither)
+            RtfExtract.get(data).map(_.map(_.value)).map(ExtractResult.fromEither)
 
           case OdfType(_) =>
-            OdfExtract.get(data).map(ExtractResult.fromEither)
+            OdfExtract
+              .get(data)
+              .map(_.map(_.value))
+              .map(ExtractResult.fromEither)
 
           case OcrType(mt) =>
             val doExtract = TextExtract
               .extractOCR(data, blocker, logger, lang.iso3, cfg.ocr)
               .compile
               .lastOrError
-              .map(_.trim)
+              .map(_.value)
               .attempt
               .map(ExtractResult.fromEither)
 
@@ -85,13 +93,16 @@ object Extraction {
               .info(
                 s"File detected as ${OdfType.container}. Try to read as OpenDocument file."
               ) *>
-              OdfExtract.get(data).map(ExtractResult.fromEither)
+              OdfExtract
+                .get(data)
+                .map(_.map(_.value))
+                .map(ExtractResult.fromEither)
 
           case MimeType.NonHtmlText(mt) =>
             val cs = mt.charsetOrUtf8
             logger.info(s"File detected as ${mt.asString}. Returning itself as text.") *>
               data.through(Binary.decode(cs)).foldMonoid.compile.last.map { txt =>
-                ExtractResult.success(txt.getOrElse("").trim)
+                ExtractResult.success(Text(txt).value)
               }
 
           case mt =>
diff --git a/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala b/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala
index 5eaad5f1..bcdd0226 100644
--- a/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala
+++ b/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala
@@ -6,6 +6,7 @@ import fs2.Stream
 import docspell.common.{Language, Logger}
 import docspell.extract.ocr.{OcrConfig, TextExtract}
 import docspell.extract.pdfbox.PdfboxExtract
+import docspell.extract.internal.Text
 
 object PdfExtract {
 
@@ -16,12 +17,12 @@ object PdfExtract {
       stripMinLen: Int,
       ocrCfg: OcrConfig,
       logger: Logger[F]
-  ): F[Either[Throwable, String]] = {
+  ): F[Either[Throwable, Text]] = {
 
     val runOcr =
       TextExtract.extractOCR(in, blocker, logger, lang.iso3, ocrCfg).compile.lastOrError
 
-    def chooseResult(ocrStr: String, strippedStr: String) =
+    def chooseResult(ocrStr: Text, strippedStr: Text) =
       if (ocrStr.length > strippedStr.length)
         logger.info(
           s"Using OCR text, as it is longer (${ocrStr.length} > ${strippedStr.length})"
diff --git a/modules/extract/src/main/scala/docspell/extract/internal/Text.scala b/modules/extract/src/main/scala/docspell/extract/internal/Text.scala
new file mode 100644
index 00000000..a42cb89e
--- /dev/null
+++ b/modules/extract/src/main/scala/docspell/extract/internal/Text.scala
@@ -0,0 +1,20 @@
+package docspell.extract.internal
+
+case class Text(raw: String) {
+
+  private lazy val textValue =
+    raw.trim.replace("\u0000", "")
+
+  def length: Int =
+    textValue.length
+
+  def value: String =
+    textValue
+}
+
+object Text {
+
+  def apply(ot: Option[String]): Text =
+    Text(ot.getOrElse(""))
+
+}
diff --git a/modules/extract/src/main/scala/docspell/extract/ocr/TextExtract.scala b/modules/extract/src/main/scala/docspell/extract/ocr/TextExtract.scala
index 7246bb7c..4abc00e1 100644
--- a/modules/extract/src/main/scala/docspell/extract/ocr/TextExtract.scala
+++ b/modules/extract/src/main/scala/docspell/extract/ocr/TextExtract.scala
@@ -3,6 +3,7 @@ package docspell.extract.ocr
 import cats.effect.{Blocker, ContextShift, Sync}
 import docspell.common._
 import docspell.files._
+import docspell.extract.internal.Text
 import fs2.Stream
 
 object TextExtract {
@@ -13,7 +14,7 @@ object TextExtract {
       logger: Logger[F],
       lang: String,
       config: OcrConfig
-  ): Stream[F, String] =
+  ): Stream[F, Text] =
     extractOCR(in, blocker, logger, lang, config)
 
   def extractOCR[F[_]: Sync: ContextShift](
@@ -22,7 +23,7 @@ object TextExtract {
       logger: Logger[F],
       lang: String,
       config: OcrConfig
-  ): Stream[F, String] =
+  ): Stream[F, Text] =
     Stream
       .eval(TikaMimetype.detect(in, MimeTypeHint.none))
       .flatMap({
@@ -35,6 +36,7 @@ object TextExtract {
         case mt =>
           raiseError(s"File `$mt` not supported")
       })
+      .map(Text.apply)
 
   private def raiseError[F[_]: Sync](msg: String): Stream[F, Nothing] =
     Stream.raiseError[F](new Exception(msg))
diff --git a/modules/extract/src/main/scala/docspell/extract/odf/OdfExtract.scala b/modules/extract/src/main/scala/docspell/extract/odf/OdfExtract.scala
index 08b8e2fd..232be427 100644
--- a/modules/extract/src/main/scala/docspell/extract/odf/OdfExtract.scala
+++ b/modules/extract/src/main/scala/docspell/extract/odf/OdfExtract.scala
@@ -11,10 +11,11 @@ import org.apache.tika.parser.odf.OpenDocumentParser
 import org.apache.tika.sax.BodyContentHandler
 
 import scala.util.Try
+import docspell.extract.internal.Text
 
 object OdfExtract {
 
-  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
+  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
     data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)
 
   def get(is: InputStream) =
@@ -24,7 +25,7 @@ object OdfExtract {
       val meta     = new Metadata()
       val ooparser = new OpenDocumentParser()
       ooparser.parse(is, handler, meta, pctx)
-      handler.toString.trim
+      Text(Option(handler.toString))
     }.toEither
 
 }
diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala
index ac0f2f45..502db289 100644
--- a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala
+++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala
@@ -10,25 +10,26 @@ import org.apache.pdfbox.text.PDFTextStripper
 
 import scala.util.{Try, Using}
 import fs2.Stream
+import docspell.extract.internal.Text
 
 object PdfboxExtract {
 
-  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
+  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
     data.compile
       .to(Array)
       .map(bytes => Using(PDDocument.load(bytes))(readText).toEither.flatten)
 
-  def get(is: InputStream): Either[Throwable, String] =
+  def get(is: InputStream): Either[Throwable, Text] =
     Using(PDDocument.load(is))(readText).toEither.flatten
 
-  def get(inFile: Path): Either[Throwable, String] =
+  def get(inFile: Path): Either[Throwable, Text] =
     Using(PDDocument.load(inFile.toFile))(readText).toEither.flatten
 
-  private def readText(doc: PDDocument): Either[Throwable, String] =
+  private def readText(doc: PDDocument): Either[Throwable, Text] =
     Try {
       val stripper = new PDFTextStripper()
       stripper.setAddMoreFormatting(true)
       stripper.setLineSeparator("\n")
-      stripper.getText(doc).trim // trim here already
+      Text(Option(stripper.getText(doc)))
     }.toEither
 }
diff --git a/modules/extract/src/main/scala/docspell/extract/poi/PoiExtract.scala b/modules/extract/src/main/scala/docspell/extract/poi/PoiExtract.scala
index 10c0c439..daae9aae 100644
--- a/modules/extract/src/main/scala/docspell/extract/poi/PoiExtract.scala
+++ b/modules/extract/src/main/scala/docspell/extract/poi/PoiExtract.scala
@@ -17,19 +17,20 @@ import fs2.Stream
 import scala.util.Try
 import docspell.common._
 import docspell.files.TikaMimetype
+import docspell.extract.internal.Text
 
 object PoiExtract {
 
   def get[F[_]: Sync](
       data: Stream[F, Byte],
       hint: MimeTypeHint
-  ): F[Either[Throwable, String]] =
+  ): F[Either[Throwable, Text]] =
     TikaMimetype.detect(data, hint).flatMap(mt => get(data, mt))
 
   def get[F[_]: Sync](
       data: Stream[F, Byte],
       mime: MimeType
-  ): F[Either[Throwable, String]] =
+  ): F[Either[Throwable, Text]] =
     mime match {
       case PoiType.doc =>
         getDoc(data)
@@ -55,40 +56,40 @@ object PoiExtract {
         Sync[F].pure(Left(new Exception(s"Unsupported content: ${mt.asString}")))
     }
 
-  def getDocx(is: InputStream): Either[Throwable, String] =
+  def getDocx(is: InputStream): Either[Throwable, Text] =
     Try {
       val xt = new XWPFWordExtractor(new XWPFDocument(is))
-      Option(xt.getText).map(_.trim).getOrElse("")
+      Text(Option(xt.getText))
     }.toEither
 
-  def getDoc(is: InputStream): Either[Throwable, String] =
+  def getDoc(is: InputStream): Either[Throwable, Text] =
     Try {
       val xt = new WordExtractor(is)
-      Option(xt.getText).map(_.trim).getOrElse("")
+      Text(Option(xt.getText))
     }.toEither
 
-  def getXlsx(is: InputStream): Either[Throwable, String] =
+  def getXlsx(is: InputStream): Either[Throwable, Text] =
     Try {
       val xt = new XSSFExcelExtractor(new XSSFWorkbook(is))
-      Option(xt.getText).map(_.trim).getOrElse("")
+      Text(Option(xt.getText))
     }.toEither
 
-  def getXls(is: InputStream): Either[Throwable, String] =
+  def getXls(is: InputStream): Either[Throwable, Text] =
     Try {
       val xt = new ExcelExtractor(new HSSFWorkbook(is))
-      Option(xt.getText).map(_.trim).getOrElse("")
+      Text(Option(xt.getText))
     }.toEither
 
-  def getDocx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
+  def getDocx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
     data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDocx)
 
-  def getDoc[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
+  def getDoc[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
     data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDoc)
 
-  def getXlsx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
+  def getXlsx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
     data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXlsx)
 
-  def getXls[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
+  def getXls[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
     data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXls)
 
 }
diff --git a/modules/extract/src/main/scala/docspell/extract/rtf/RtfExtract.scala b/modules/extract/src/main/scala/docspell/extract/rtf/RtfExtract.scala
index c4a37fec..6850ed33 100644
--- a/modules/extract/src/main/scala/docspell/extract/rtf/RtfExtract.scala
+++ b/modules/extract/src/main/scala/docspell/extract/rtf/RtfExtract.scala
@@ -5,6 +5,7 @@ import java.io.{ByteArrayInputStream, InputStream}
 import cats.implicits._
 import cats.effect.Sync
 import docspell.common.MimeType
+import docspell.extract.internal.Text
 import fs2.Stream
 import javax.swing.text.rtf.RTFEditorKit
 
@@ -14,14 +15,14 @@ object RtfExtract {
 
   val rtfType = MimeType.application("rtf")
 
-  def get(is: InputStream): Either[Throwable, String] =
+  def get(is: InputStream): Either[Throwable, Text] =
     Try {
       val kit = new RTFEditorKit()
       val doc = kit.createDefaultDocument()
       kit.read(is, doc, 0)
-      doc.getText(0, doc.getLength).trim
+      Text(doc.getText(0, doc.getLength))
     }.toEither
 
-  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
+  def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] =
     data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)
 }
diff --git a/modules/extract/src/test/scala/docspell/extract/ocr/TextExtractionSuite.scala b/modules/extract/src/test/scala/docspell/extract/ocr/TextExtractionSuite.scala
index ac2998a8..4693fd6b 100644
--- a/modules/extract/src/test/scala/docspell/extract/ocr/TextExtractionSuite.scala
+++ b/modules/extract/src/test/scala/docspell/extract/ocr/TextExtractionSuite.scala
@@ -29,6 +29,6 @@ object TextExtractionSuite extends SimpleTestSuite {
       .lastOrError
       .unsafeRunSync()
 
-    assertEquals(extract.trim, expect.trim)
+    assertEquals(extract.value, expect)
   }
 }
diff --git a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala
index 4d06be76..1f436b25 100644
--- a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala
+++ b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala
@@ -18,7 +18,7 @@ object PdfboxExtractTest extends SimpleTestSuite {
       case (file, txt) =>
         val url      = file.toJavaUrl.fold(sys.error, identity)
         val str      = PdfboxExtract.get(url.openStream()).fold(throw _, identity)
-        val received = removeFormatting(str)
+        val received = removeFormatting(str.value)
         val expect   = removeFormatting(txt)
         assertEquals(received, expect)
     }
@@ -29,7 +29,7 @@ object PdfboxExtractTest extends SimpleTestSuite {
       case (file, txt) =>
         val data     = file.readURL[IO](8192, blocker)
         val str      = PdfboxExtract.get(data).unsafeRunSync().fold(throw _, identity)
-        val received = removeFormatting(str)
+        val received = removeFormatting(str.value)
         val expect   = removeFormatting(txt)
         assertEquals(received, expect)
     }
@@ -40,7 +40,7 @@ object PdfboxExtractTest extends SimpleTestSuite {
 
     val str = PdfboxExtract.get(url.openStream()).fold(throw _, identity)
 
-    assertEquals(str, "")
+    assertEquals(str.value, "")
   }
 
   private def removeFormatting(str: String): String =